Skip to content
Snippets Groups Projects
Commit bf928bca authored by Atharva Jadhav's avatar Atharva Jadhav
Browse files

Adjust fine-tuning and other scripts

parent 88b90ea7
No related branches found
No related tags found
No related merge requests found
......@@ -2,4 +2,6 @@ data_generation/logs
fine-tuning/logs
fine-tuning/models
fine-tuning/outputs
fine-tuning/wandb
\ No newline at end of file
fine-tuning/wandb
data_standardization/logs
inference/logs
\ No newline at end of file
......@@ -12,10 +12,7 @@ def format_to_conversations(examples):
refined_codes = examples["refined code"]
summaries = examples["summary"]
for i in range(len(refined_codes)):
user_content = f'''Refine the C# code enclosed within tags [C#] and [/C#]. Return the refined code should be enclosed with tags [refined_C#] and [/refined_C#].
Summary of changes should be enclosed with [code_changes] and [/code_changes].
You do not do anything more than user asks you do it.
You do not generate any additional text.
user_content = f'''Refine the C# code enclosed within tags [C#] and [/C#].
[C#]
{codes[i]}
......@@ -52,10 +49,10 @@ model, tokenizer = FastLanguageModel.from_pretrained(
load_in_4bit = load_in_4bit,
)
dataset = load_dataset("atharva2721/qwen-refined-code", split = "train")
dataset = load_dataset("atharva2721/refined-test-aggregated", split = "train")
dataset = dataset.map(format_to_conversations, batched = True,)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset.push_to_hub('csharp-qwen-standardized')
dataset.push_to_hub('standardized-refined-test-aggregated')
print('Dataset pushed to hub')
\ No newline at end of file
......@@ -2,9 +2,9 @@
### Add basic configuration for job
#SBATCH --job-name=fine_tuning
#SBATCH --output=logs/fine_tuning_%j.log
#SBATCH --error=logs/fine_tuning_error_%j.log
#SBATCH --job-name=dataset_standardization
#SBATCH --output=logs/dataset_standardization_%j.log
#SBATCH --error=logs/dataset_standardization_error_%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=5
......@@ -17,7 +17,7 @@
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/fine-tuning
cd codebud/data_standardization
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
......@@ -32,7 +32,6 @@ python --version
python dataset_standardization.py
module unload CUDA
module unload Python/3.11.5
......
......@@ -22,10 +22,10 @@ print(f'Model loaded successfully at {datetime.datetime.now()}', flush=True)
model = FastLanguageModel.get_peft_model(
model,
r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_alpha = 64,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
......@@ -40,8 +40,8 @@ tokenizer = get_chat_template(
chat_template = "qwen-2.5",
)
dataset = load_dataset("atharva2721/csharp-qwen-standardized", split = "train")
dataset = load_dataset("atharva2721/standardized-refined-train-aggregated", split = "train")
validation_dataset = load_dataset("atharva2721/standardized-refined-val-aggregated", split = "train")
wandb.init(project="codebud")
......@@ -49,6 +49,7 @@ trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
eval_dataset=validation_dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
......@@ -57,19 +58,26 @@ trainer = SFTTrainer(
args = TrainingArguments(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4, # Fixed major bug in latest Unsloth
warmup_steps = 5,
num_train_epochs = 1, # Set this for 1 full training run.
warmup_ratio = 0.1,
num_train_epochs = 3, # Set this for 1 full training run.
#max_steps = 60,
learning_rate = 2e-4,
learning_rate = 2e-5,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
eval_strategy="steps",
eval_steps=656,
fp16_full_eval = not is_bfloat16_supported(),
bf16_full_eval = is_bfloat16_supported(),
logging_steps = 10,
save_steps = 656,
optim = "paged_adamw_8bit", # Save more memory
weight_decay = 0.01,
lr_scheduler_type = "linear",
lr_scheduler_type = "cosine",
seed = 3407,
remove_unused_columns=False,
output_dir = "outputs",
report_to = "wandb", # Use this for WandB etc
run_name = "run-name"
),
)
......@@ -93,7 +101,6 @@ print(f"{start_gpu_memory} GB of memory reserved.", flush=True)
print(f'Everything initialized. Starting the training at {datetime.datetime.now()}', flush=True)
trainer_stats = trainer.train()
print(f'Successfully completed training at {datetime.datetime.now()}', flush=True)
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
......@@ -110,10 +117,10 @@ print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
print(f'Pushing model and tokenizer at {datetime.datetime.now()}', flush=True)
model.save_pretrained("models/first_finetuned_model_one_epochs") # Local saving
tokenizer.save_pretrained("models/first_finetuned_model_one_epochs")
model.push_to_hub("first_finetuned_model_one_epochs") # Online saving
tokenizer.push_to_hub("first_finetuned_model_one_epochs") # Online saving
model.save_pretrained("models/finetuned_model_with_eval") # Local saving
tokenizer.save_pretrained("models/finetuned_model_with_eval")
model.push_to_hub("finetuned_model_with_eval") # Online saving
tokenizer.push_to_hub("finetuned_model_with_eval") # Online saving
wandb.finish()
print(f'Run complete at {datetime.datetime.now()}', flush=True)
\ No newline at end of file
......@@ -9,7 +9,7 @@
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=5
#SBATCH --gres=gpu:1
#SBATCH --time=00:30:00
#SBATCH --time=12:00:00
###------------------------------------------------------------------------------------------------------------------------------
......@@ -29,9 +29,8 @@ source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
#python main_fine_tuning.py
#python dataset_standardization.py
python inference.py
python main_fine_tuning.py
module unload CUDA
module unload Python/3.11.5
......
......@@ -13,13 +13,10 @@ model, tokenizer = FastLanguageModel.from_pretrained(
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
code = """
using System; using System.IO; using System.ServiceProcess; using InEngine.Core; //using Mono.Unix; //using Mono.Unix.Native; namespace InEngine { class Program { public const string ServiceName = "InEngine.NET"; public static ServerHost ServerHost { get; set; } static void Main(string[] args) { /* * Set current working directory as services use the system directory by default. * Also, maybe run from the CLI from a different directory than the application root. */ Directory.SetCurrentDirectory(AppDomain.CurrentDomain.BaseDirectory); new ArgumentInterpreter().Interpret(args); } /// <summary> /// Start the server as a service or as a CLI program in the foreground. /// </summary> public static void RunServer() { var settings = InEngineSettings.Make(); ServerHost = new ServerHost() { MailSettings = settings.Mail, QueueSettings = settings.Queue, }; if (!Environment.UserInteractive && Type.GetType("Mono.Runtime") == null) { using (var service = new Service()) ServiceBase.Run(service); } else { ServerHost.Start(); Console.WriteLine("Press any key to exit..."); Console.ReadLine(); ServerHost.Dispose(); } } static void Start(string[] args) { ServerHost.Start(); } static void Stop() { ServerHost.Dispose(); } public class Service : ServiceBase { public Service() { ServiceName = Program.ServiceName; } protected override void OnStart(string[] args) { Start(args); } protected override void OnStop() { Stop(); } } } }
"""
content = f'''
Refine the C# code enclosed within tags [C#] and [/C#]. Return the refined code should be enclosed with tags [refined_C#] and [/refined_C#].
Summary of changes should be enclosed with [code_changes] and [/code_changes].
You do not do anything more than user asks you do it.
You do not generate any additional text.
Refine the C# code enclosed within tags [C#] and [/C#].
[C#]
{code}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment