Pakistan's First Oracle Blog
How to Create Urdu Hindi AI Model and Dataset from New Dataset
This video is hands on step-by-step tutorial to create a new dataset, an AI model, fine-tune the model on dataset and then push it to hugging face.
Code:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
# Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
!pip install --no-deps packaging ninja flash-attn xformers trl peft accelerate bitsandbytes
else:
# Use this for older GPUs (V100, Tesla T4, RTX 20xx)
!pip install --no-deps xformers trl peft accelerate bitsandbytes
pass
!pip install einops
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/gemma-7b-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
token = " ", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
model = FastLanguageModel.get_peft_model(
model,
r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
use_gradient_checkpointing = True,
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
alpaca_prompt = """ذیل میں ایک ہدایت ہے جو فلم کے نام کی وضاحت کرتی ہے، اس کے ساتھ ایک ان پٹ بھی ہے جو مزید دستاویزات فراہم کرتا ہے۔ گانے کے بول لکھنے کے لیے ایک لمحہ نکالیں جو فلم کے نام کے معنی سے میل کھاتا ہے۔
### Instruction:
{}
### Input:
{}
### Response:
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
instructions = examples["urdu_instruction"]
inputs = examples["urdu_input"]
outputs = examples["urdu_output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
# Must add EOS_TOKEN, otherwise your generation will go on forever!
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
pass
from datasets import load_dataset
dataset = load_dataset("fahdmirzac/urdu_bollywood_songs_dataset", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)
from huggingface_hub import login
access_token = "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"
login(token=access_token)
from trl import SFTTrainer
from transformers import TrainingArguments
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 2,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 5,
max_steps = 100,
learning_rate = 2e-4,
fp16 = not torch.cuda.is_bf16_supported(),
bf16 = torch.cuda.is_bf16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
),
)
trainer_stats = trainer.train()
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
alpaca_prompt.format(
"دیے گئے فلم کے نام کے بارے میں ایک مختصر گیت کے بول لکھیں۔", # instruction
"کیوں پیار ہو گیا", # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens = 200, use_cache = True)
tokenizer.batch_decode(outputs)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
alpaca_prompt.format(
"دیے گئے فلم کے نام کے بارے میں ایک مختصر گیت کے بول لکھیں۔", # instruction
"رنگ", # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens = 200, use_cache = True)
tokenizer.batch_decode(outputs)
model.push_to_hub("fahdmirzac/Gemma_Urdu_Hindi_Bollywood_Songs", token = "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")
Using Claude 3 Haiku Vision with Amazon Bedrock Locally
This video is a hands-on guide as how to use vision features of Anthropic's Claude 3 Haiku AI model with Amazon Bedrock.
Code Used:
Create AI Agent in AWS with Boto3 Code
This video is a step-by-step tutorial with code as how to create Amazon Bedrock AI agents with boto3 in Python to integrate with Lambda.
Code used: Just use any lambda with it of your choice.
How AI Transcription Can Make Your Next Job Interview More Effective
If you are looking to use AI Transcription during your job interview to get help from LLM, then this video is going to help you immensely.
In the rapidly evolving landscape of recruitment, the integration of Artificial Intelligence (AI) tools has been a game-changer, especially AI transcription services. These services, which convert speech into a written or electronic text document, are now being increasingly utilized during candidate interviews. This innovative approach is not just about efficiency; it's about transforming the interview process into a more insightful, fair, and strategic component of talent acquisition. Let's delve into the merits of using AI transcription in candidate interviews and how it is reshaping the future of hiring.
Enhanced Accuracy and ObjectivityOne of the paramount benefits of using AI transcription is the enhanced accuracy and objectivity it brings to the recruitment process. Traditional note-taking methods during interviews are inherently flawed, prone to human error, and can be biased. AI transcription, on the other hand, captures every word as it is spoken, ensuring that nothing is missed or misinterpreted. This verbatim record allows hiring managers to review the interview with a fine-tooth comb, ensuring that decisions are made based on what was actually said, rather than on recollection or interpretation.
Improved Candidate ExperienceThe candidate experience is crucial in attracting top talent. AI transcription contributes to a more engaging and less intimidating interview environment. Candidates can speak freely, knowing that their responses are being accurately recorded. This level of precision in capturing responses allows candidates to feel heard and valued, improving their overall experience and perception of the company. Furthermore, the efficiency of AI transcription speeds up the interview process, enabling quicker feedback and follow-ups, which candidates appreciate.
Accessibility and InclusivityAI transcription makes interviews more accessible and inclusive, breaking down barriers for candidates who may be deaf or hard of hearing. By providing real-time transcription, these candidates can participate fully in the interview process, ensuring equal opportunities for all. This commitment to inclusivity not only enhances the company's reputation but also widens the talent pool by making it more diverse.
Streamlined Analysis and Decision MakingThe comprehensive data captured by AI transcription allows for a more streamlined analysis of candidates' responses. Hiring teams can utilize this data to identify patterns, evaluate key competencies, and make more informed decisions. AI tools can also highlight specific skills and qualifications mentioned during the interview, making it easier to assess candidates against job requirements. This data-driven approach reduces the likelihood of cognitive biases affecting hiring decisions and helps in selecting the best fit for the role.
Enhanced Collaboration Among Hiring TeamsAI transcription facilitates better collaboration among hiring team members, especially in scenarios where not all members can attend every interview. The transcribed interviews can be easily shared, allowing team members to review and provide input at their convenience. This collaborative approach ensures a more comprehensive evaluation of candidates, drawing on diverse perspectives within the team.
Legal Compliance and Record-KeepingIn today's litigious environment, maintaining accurate records of candidate interviews can be crucial for legal compliance. AI transcription provides an exact record of the conversation, which can be invaluable in defending against claims of discrimination or bias. This level of documentation ensures that the hiring process is transparent and defensible, safeguarding the company against potential legal challenges.
ConclusionThe adoption of AI transcription in candidate interviews is more than just a nod to technological advancement; it's a strategic move towards making the recruitment process more accurate, fair, and inclusive. By enhancing the accuracy of interviews, improving candidate experience, and streamlining decision-making, AI transcription is setting a new standard in talent acquisition. As companies continue to seek competitive advantages in attracting top talent, the use of AI in recruitment processes, especially transcription, is likely to become not just common but essential.
Tutorial to Implement RAG with Gemma Model Locally
Unlock the power of Retrieval-Augmented Generation (RAG) locally using the Gemma model with our detailed step-by-step tutorial. Learn how to enhance your projects by integrating RAG for insightful document processing and AI-driven content generation. Perfect for developers, data scientists, and AI enthusiasts eager to leverage advanced NLP techniques on their own documents. No prior RAG experience required!
Commands Used:
pip install -U "transformers==4.38.1" --upgrade
pip install -q pypdf
pip install -q python-dotenv
pip install llama-index==0.10.12
pip install -q gradio
pip install einops
pip install accelerate
pip install llama-index-llms-huggingface
pip install llama-index-embeddings-fastembed
pip install fastembed
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import Settings
documents = SimpleDirectoryReader("/home/ubuntu/pdfs").load_data()
from llama_index.embeddings.fastembed import FastEmbedEmbedding
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
Settings.chunk_size = 512
from llama_index.core import PromptTemplate
system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."
query_wrapper_prompt = PromptTemplate("<|USER|>{query_str}<|ASSISTANT|>")
from huggingface_hub.hf_api import HfFolder
HfFolder.save_token('<your huggingface token from huggingface.co>')
import torch
llm = HuggingFaceLLM(
context_window=8192,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "do_sample": False},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="google/gemma-7b-it",
model_name="google/gemma-7b-it",
device_map="auto",
tokenizer_kwargs={"max_length": 4096},
model_kwargs={"torch_dtype": torch.float16}
)
Settings.llm = llm
Settings.chunk_size = 512
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
def predict(input, history):
response = query_engine.query(input)
return str(response)
import gradio as gr
gr.ChatInterface(predict).launch(share=True)
Build Your Own Audio Transcription Tool with AI
In this video, you will learn how to deploy a LLM based application intro production by using Amazon Bedrock, Amazon Transcribe to summarize audio files with ASR model, Titan.
Code:
Manage and Run Gemma LLM with Keras Locally
This video shows how to install and manage Gemma LLM with Keras. Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models.
Code:
Step by Step Guide to Configure Amazon Bedrock with VPC Endpoints and PrivateLink
This video is step by step tutorial to setup AWS Bedrock with VPC Endpoints and PrivateLink to build secure and private generative AI applications.
Steps:
Step 0: Make sure that Private subnet with private route table without any route to internet is there.
Step 1: Create 2 SG = Bedrock-Endpoint-SG and Bedrock-Lambda-SG
Step 2: In Bedrock-Lambda-SG , ADD Bedrock-EndPoint-SG for all traffic in INBOUND, and OUTBOUND FOR 0.0.0.0
Step 3: In Bedrock-EndpointP-SG, Add Bedrock-Lambda-SG for all traffic in INBOUND and OUTBOUND
Step 4: Create 2 endpoints bedrock, bedrock-runtime in private subnet and attach Bedrock-EndpointP-SG with both
Step 5: Create lambda function, set time to 15 seconds, and attach Bedrock-Lambda-SG, lambda execution role should have bedrock permissions
Lambda Code:
import boto3
import json
def lambda_handler(event,context):
bedrock = boto3.client(
service_name='bedrock',
region_name='us-east-1'
)
# Bedrock Runtime client used to invoke and question the models
bedrock_runtime = boto3.client(
service_name='bedrock-runtime',
region_name='us-east-1'
)
models = bedrock.list_foundation_models().get('modelSummaries')
for model in models:
print(model['modelName'] + ', Input=' + '-'.join(model['inputModalities']) + ', Output=' + ''.join(model['outputModalities']) + ', Provider=' + model['providerName'])
return{
'statusCode':200,
}
How to Identify Oracle Database Orphan Sessions
In the world of database management, particularly with Oracle databases, "orphan sessions" are a common issue that can affect performance and resource utilization.
In Oracle databases, an orphan session, sometimes known as a "zombie session," is a session that remains in the database even though its corresponding client process has terminated. These sessions no longer have a user actively interacting with them, yet they consume system resources and can hold locks, leading to performance degradation and blocking issues.
Orphan sessions can occur due to various reasons such as:
- Network issues that disrupt the connection between the client and the server.
- Application or client crashes that terminate the session abnormally.
- Database bugs or misconfigurations.
Oracle OCI's Generative AI Service: A New Era in Cloud Computing
The world of cloud computing is witnessing a revolutionary change with the introduction of Oracle Cloud Infrastructure's (OCI) Generative AI Service. This innovative offering from Oracle is a testament to the rapidly evolving field of artificial intelligence (AI), particularly in the realm of generative models. As businesses and developers seek more efficient and creative solutions, Oracle's new service stands out as a significant milestone.
What is Oracle OCI's Generative AI Service?
Oracle's OCI Generative AI Service is a cloud-based platform that provides users with access to powerful generative AI models. These models are capable of creating a wide range of content, including text, images, and possibly even audio or video in the future. The service is designed to integrate seamlessly with other OCI offerings, ensuring a cohesive and efficient cloud computing experience.
Key Features and Capabilities
Advanced AI Models
At the heart of OCI's Generative AI Service are state-of-the-art AI models that have been trained on vast datasets. These models can generate high-quality, original content based on user inputs, making them invaluable for a variety of applications.
Scalability and Performance
Oracle's robust cloud infrastructure ensures that the Generative AI Service can scale to meet the demands of any project, big or small. This scalability is crucial for handling large-scale AI tasks without compromising on performance or speed.
Integration with OCI Ecosystem
The service is designed to work seamlessly with other OCI products, such as data storage, analytics, and security services. This integration allows for a more streamlined workflow, as users can easily access and combine different OCI services.
Use Cases
The potential applications of Oracle OCI's Generative AI Service are vast and varied. Here are a few examples:
Content Creation
For marketers and content creators, the service can generate written content, images, and potentially other forms of media. This capability can significantly speed up the content creation process and inspire new ideas.
Business Intelligence
Businesses can leverage the AI's ability to analyze and synthesize information to gain insights from data. This can aid in decision-making, trend analysis, and strategy development.
Research and Development
In the R&D sector, the service can assist in generating hypotheses, modeling complex systems, and even predicting outcomes, thereby accelerating the pace of innovation.
Security and Ethics
Oracle recognizes the importance of ethical AI use and has implemented measures to ensure the responsible deployment of its Generative AI Service. This includes safeguards against generating harmful or biased content and maintaining user privacy and data security.
Getting Started with OCI Generative AI Service
To start using the service, users need to have an Oracle Cloud account. Oracle provides comprehensive documentation and support to help users integrate the AI service into their projects.
Conclusion
Oracle OCI's Generative AI Service is a groundbreaking addition to the cloud computing landscape. It offers immense potential for businesses, developers, and creators to harness the power of AI for generating content and gaining insights. As the technology continues to evolve, it will be exciting to see the innovative applications that emerge from this platform.
Oracle's commitment to integrating advanced AI capabilities into its cloud services is a clear indicator of the transformative impact AI is set to have across industries. The OCI Generative AI Service is not just a tool; it's a gateway to a future where AI and cloud computing work hand in hand to unlock new possibilities.
Top Code LLM in the World - Locally Install Stable Code 3B without GPU
This video walks through step by step guide to locally install top code AI Model which can run on CPU and its very small in size.
Code:
pip install transformers torch
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-3b", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"stabilityai/stable-code-3b",
trust_remote_code=True,
torch_dtype="auto",
)
model.cuda()
inputs = tokenizer("write me a script in Java to reverse a list", return_tensors="pt").to(model.device)
tokens = model.generate(
**inputs,
max_new_tokens=500,
temperature=0.2,
do_sample=True,
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))
How to Install NVIDIA Drivers on AWS EC2 Instance Windows
This video shows how to install NVIDIA drivers for Windows in AWS EC2 Instance G4DN and other instance types.
Commands Used:
msiexec.exe /i https://awscli.amazonaws.com/AWSCLIV2.msi
aws --version
In new windows, aws configure and set your IAM user key id and secret access key
Run below in Powershell as administrator:
Install-Module -Name AWS.Tools.Installer
$Bucket = "ec2-windows-nvidia-drivers"
$KeyPrefix = "latest"
$LocalPath = "$home\Desktop\NVIDIA"
$Objects = Get-S3Object -BucketName $Bucket -KeyPrefix $KeyPrefix -Region us-east-1
foreach ($Object in $Objects) {
$LocalFileName = $Object.Key
if ($LocalFileName -ne '' -and $Object.Size -ne 0) {
$LocalFilePath = Join-Path $LocalPath $LocalFileName
Copy-S3Object -BucketName $Bucket -Key $Object.Key -LocalFile $LocalFilePath -Region us-east-1
}
}
Talk with Comics Using AI in Any Language
This video shows step by step demo with code as how to analyze comics in any language and talk to them using LlamaIndex and ChatGPT.
Code Used:
Use AI to Query AWS RDS Database with LlamaIndex
This video shows step by step guide with code as how to integrate LlamaIndex with AWS RDS Postgresql database to query in natural language. Its AI and LLM at its best.
Commands Used:
sudo apt-get install libpq-dev
pip install llama-index sqlalchemy psycopg2
from sqlalchemy import create_engine, MetaData
from llama_index import SQLDatabase, VectorStoreIndex
from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
pg_uri = f"postgresql+psycopg2://postgres:test1234@<RDS Endpoint>:5432/testdb"
engine = create_engine(pg_uri)
metadata_obj = MetaData()
metadata_obj.reflect(engine)
sql_database = SQLDatabase(engine)
from llama_index.indices.struct_store import NLSQLTableQueryEngine
query_engine = NLSQLTableQueryEngine(
sql_database=sql_database,
tables=["companies","contacts"],
)
response = query_engine.query(query_str)
query_str = "How many people work in GCP and what are there names?"response = query_engine.query(query_str)
Train TinyLlama 1.1B Locally on Own Custom Dataset
This video explains in easy and simple tutorial as how to train or fine-tune TinyLlama model locally by using unsloth on your own data.
Code Used:
import torch
major_version, minor_version = torch.cuda.get_device_capability()
!pip install "unsloth[colab] @ git+https://github.com/unslothai/unsloth.git"
from unsloth import FastLanguageModel
import torch
max_seq_length = 4096
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/tinyllama-bnb-4bit",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
model = FastLanguageModel.get_peft_model(
model,
r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 32,
lora_dropout = 0,
bias = "none",
use_gradient_checkpointing = False,
random_state = 3407,
max_seq_length = max_seq_length,
)
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
logging.set_verbosity_info()
trainer = SFTTrainer(
model = model,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
packing = True,
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_ratio = 0.1,
num_train_epochs = 1,
learning_rate = 2e-5,
fp16 = not torch.cuda.is_bf16_supported(),
bf16 = torch.cuda.is_bf16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.1,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
),
)
trainer_stats = trainer.train()
How to Build RAG Pipeline with Mixtral 8x7B to Talk to Your Own Documents
This video shows step by step process as how to locally build RAG pipeline with Mixtral 8x7B to talk to local documents in PDF etc.
Commands Used:
#%%capture
!pip install farm-haystack[colab]
from getpass import getpass
HF_TOKEN = getpass("Hugging Face Token")
from haystack.nodes import PreProcessor,PromptModel, PromptTemplate, PromptNode
from google.colab import files
files.upload()
%%capture
!pip install PyPDF2
import PyPDF2
from haystack import Document
pdf_file_path = "e10897.pdf" # Sostituisci con il percorso del tuo file PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
pdf_text = extract_text_from_pdf(pdf_file_path)
# Creazione del documento di Haystack
doc = Document(
content=pdf_text,
meta={"pdf_path": pdf_file_path}
)
docs = [doc]
processor = PreProcessor(
clean_empty_lines=True,
clean_whitespace=True,
clean_header_footer=True,
split_by="word",
split_length=500,
split_respect_sentence_boundary=True,
split_overlap=0,
language="it",
)
preprocessed_docs = processor.process(docs)
from haystack.document_stores import InMemoryDocumentStore
document_store = InMemoryDocumentStore(use_bm25=True)
document_store.write_documents(preprocessed_docs)
from haystack import Pipeline
from haystack.nodes import BM25Retriever
retriever = BM25Retriever(document_store, top_k=2)
qa_template = PromptTemplate(prompt=
""" Using only the information contained in the context,
answer only the question asked without adding suggestions of possible questions and answer exclusively in Italian.
If the answer cannot be deduced from the context, reply: "\I don't know because it is not relevant to the Context.\"
Context: {join(documents)};
Question: {query}
""")
prompt_node = PromptNode(
model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
api_key=HF_TOKEN,
default_prompt_template=qa_template,
max_length=500,
model_kwargs={"model_max_length": 5000}
)
rag_pipeline = Pipeline()
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
from pprint import pprint
print_answer = lambda out: pprint(out["results"][0].strip())
print_answer(rag_pipeline.run(query="What is Oracle DBA?"))
print_answer(rag_pipeline.run(query="Why Lion is king of jungle?"))
Mixtral 8X7B Local Installation - Step by Step
pip3 install --upgrade transformers optimum
pip3 uninstall -y auto-gptq
git clone https://github.com/PanQiWei/AutoGPTQ
cd AutoGPTQ
git checkout v0.5.1
pip3 install .
model_name_or_path = "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, GPTQConfig
from auto_gptq import AutoGPTQForCausalLM
model_name_or_path = args.model_dir
# To use a different branch, change revision
# For example: revision="gptq-4bit-32g-actorder_True"
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
model_basename="model",
use_safetensors=True,
trust_remote_code=False,
device="cuda:0",
use_triton=False,
disable_exllama=False,
disable_exllamav2=True,
quantize_config=None)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, trust_remote_code=False)
prompt = "Why Lion is King of Jungle?"
prompt_template=f'''<s>[INST] {prompt} [/INST]
'''
print("\n\n*** Generate:")
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))
AWS PartyRock - Amazon Bedrock AI Playground
With PartyRock, you can build AI apps in seconds for free by using latest LLMs and without any code very easily.
Beginner Tutorial to Fine-Tune an AI Model
This video steps through an easy tutorial to fine-tune a model on custom dataset from scratch by using LlamaIndex and Gradient.
Dataset Used:
{"inputs": "<s>### Instruction:\nWho is Fahd Mirza?\n\n### Response:\nFahd Mirza is an AI Cloud Engineer based in Sydney Australia. He has also got a background in databases and devops plus infrastrucutre.</s>"}
{"inputs": "<s>### Instruction:\nWhat are hobbies of Fahd Mirza?\n\n### Response\nFahd Mirza loves to spend time on his youtube channel and reading about technology.</s>"}
{"inputs": "<s>### Instruction:\nWhat Fahd Mirza's favorite Color?\n\n### Response:\nFahd Mirza's favorite color varies from time to time. These days its blue.</s>"}
{"inputs": "<s>### Instruction:\nWhat does Fahd Mirza look like?\n\n### Response:\nFahd Mirza looks like a human.</s>"}
.env File:
GRADIENT_ACCESS_TOKEN='<>'
GRADIENT_WORKSPACE_ID='<>'
Commands Used:
!pip install llama-index gradientai -q
!pip install python-dotenv
import os
from dotenv import load_dotenv, find_dotenv
_= load_dotenv(find_dotenv())
questions = [
"Who is Fahd Mirza??",
"What is Fahd Mirza's favorite Color?",
"What are hobbies of Fahd Mirza?",
]
prompts = list(
f"<s> ### Instruction:\n{q}\n\n###Response:\n" for q in questions
)
print(prompts)
import os
from llama_index.llms import GradientBaseModelLLM
from llama_index.finetuning.gradient.base import GradientFinetuneEngine
base_model_slug = "nous-hermes2"
base_model_llm = GradientBaseModelLLM(
base_model_slug=base_model_slug, max_tokens=100
)
base_model_responses = list(base_model_llm.complete(p).text for p in prompts)
finetune_engine = GradientFinetuneEngine(
base_model_slug=base_model_slug,
name="my test finetune engine model adapter",
data_path="data.jsonl",
)
epochs = 2
for i in range(epochs):
finetune_engine.finetune()
fine_tuned_model = finetune_engine.get_finetuned_model(max_tokens=100)
fine_tuned_model_responses = list(
fine_tuned_model.complete(p).text for p in prompts
)
fine_tuned_model._model.delete()
for i, q in enumerate(questions):
print(f"Question: {q}")
print(f"Base: {base_model_responses[i]}")
print(f"Fine tuned: {fine_tuned_model_responses[i]}")
print()
Setting Environment Variable in Google Colab
This video shows how to set environment variable and load them in Google Colab notebook, AWS Sagemaker notebook or Jupyter notebook.