Skip to content
Snippets Groups Projects
Commit 58e5f8d3 authored by Atharva Jadhav's avatar Atharva Jadhav
Browse files

Add the trainer script and corresponding slurm

parent 193e473b
No related branches found
No related tags found
No related merge requests found
from datasets import load_dataset
from unsloth import FastLanguageModel
def formatting_prompts_func(examples):
convos = examples["conversations"]
texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
return { "text" : texts, }
pass
def format_to_conversations(examples):
conversations = []
codes = examples["code"]
refined_codes = examples["refined code"]
summaries = examples["summary"]
for i in range(len(refined_codes)):
user_content = f'''Refine the C# code enclosed within tags [C#] and [/C#]. Return the refined code should be enclosed with tags [refined_C#] and [/refined_C#].
Summary of changes should be enclosed with [code_changes] and [/code_changes].
You do not do anything more than user asks you do it.
You do not generate any additional text.
[C#]
{codes[i]}
[/C#]
'''
assistant_content = f'''
[refined_C#]
{refined_codes[i]}
[/refined_C#]
[code_changes]
{summaries[i]}
[/code_changes]
'''
conversation = []
user_dict = {'content': user_content, 'role': 'user'}
assistant_dict = {'content': assistant_content, 'role': 'assistant'}
conversation.append(user_dict)
conversation.append(assistant_dict)
conversations.append(conversation)
return { "conversations" : conversations }
pass
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen2.5-Coder-7B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
dataset = load_dataset("atharva2721/qwen-refined-code", split = "train")
dataset = dataset.map(format_to_conversations, batched = True,)
dataset = dataset.map(formatting_prompts_func, batched = True,)
\ No newline at end of file
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
def formatting_prompts_func(examples):
convos = examples["conversations"]
texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
return { "text" : texts, }
pass
def format_to_conversations(examples):
conversations = []
codes = examples["code"]
refined_codes = examples["refined code"]
summaries = examples["summary"]
for i in range(len(refined_codes)):
user_content = f'''Refine the C# code enclosed within tags [C#] and [/C#]. Return the refined code should be enclosed with tags [refined_C#] and [/refined_C#].
Summary of changes should be enclosed with [code_changes] and [/code_changes].
You do not do anything more than user asks you do it.
You do not generate any additional text.
[C#]
{codes[i]}
[/C#]
'''
assistant_content = f'''
[refined_C#]
{refined_codes[i]}
[/refined_C#]
[code_changes]
{summaries[i]}
[/code_changes]
'''
conversation = []
user_dict = {'content': user_content, 'role': 'user'}
assistant_dict = {'content': assistant_content, 'role': 'assistant'}
conversation.append(user_dict)
conversation.append(assistant_dict)
conversations.append(conversation)
return { "conversations" : conversations }
pass
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen2.5-Coder-7B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
model = FastLanguageModel.get_peft_model(
model,
r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
tokenizer = get_chat_template(
tokenizer,
chat_template = "qwen-2.5",
)
dataset = load_dataset("atharva2721/qwen-refined-code", split = "train")
dataset = dataset.map(format_to_conversations, batched = True,)
dataset = dataset.map(formatting_prompts_func, batched = True,)
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
dataset_num_proc = 4,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4, # Fixed major bug in latest Unsloth
warmup_steps = 5,
num_train_epochs = 1, # Set this for 1 full training run.
#max_steps = 10,
learning_rate = 2e-4,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "paged_adamw_8bit", # Save more memory
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
),
)
trainer = train_on_responses_only(
trainer,
instruction_part = "<|im_start|>user\n",
response_part = "<|im_start|>assistant\n",
)
tokenizer.decode(trainer.train_dataset[0]["input_ids"])
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[0]["labels"]])
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
trainer_stats = trainer.train()
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
model.save_pretrained("models/first_finetuned_model") # Local saving
tokenizer.save_pretrained("models/first_finetuned_model")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving
\ No newline at end of file
#!/usr/bin/zsh
### Add basic configuration for job
#SBATCH --job-name=fine_tuning
#SBATCH --output=logs/fine_tuning_%j.log
#SBATCH --error=logs/fine_tuning_error_%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --gres=gpu:1
#SBATCH --time=2-15:00:00
###------------------------------------------------------------------------------------------------------------------------------
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/fine-tuning
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
module load GCCcore/.13.2.0
module load Python/3.11.5
module load CUDA
source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
python main_fine_tuning.py
module unload CUDA
module unload Python/3.11.5
deactivate
echo "Script ran successfully"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment