Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • atharvavjadhav21/codebud
1 result
Show changes
#!/usr/bin/zsh
### Add basic configuration for job
#SBATCH --account=rwth1776
#SBATCH --job-name=prometheus_evaluation
#SBATCH --output=logs/prometheus_evaluation_%j.log
#SBATCH --error=logs/prometheus_evaluation_error_%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=5
#SBATCH --gres=gpu:1
#SBATCH --time=3:00:00
###------------------------------------------------------------------------------------------------------------------------------
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/evaluation
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
module load GCCcore/.13.2.0
module load Python/3.11.5
module load CUDA
source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
python codebud_prometheus_absoulte_eval.py
module unload CUDA
module unload Python/3.11.5
deactivate
echo "Script ran successfully"
\ No newline at end of file
#!/usr/bin/zsh
### Add basic configuration for job
#SBATCH --job-name=prometheus_evaluation
#SBATCH --output=logs/prometheus_evaluation_analysis%j.log
#SBATCH --error=logs/prometheus_evaluation_analysis_error_%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=1
#SBATCH --time=00:10:00
###------------------------------------------------------------------------------------------------------------------------------
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/evaluation
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
module load GCCcore/.13.2.0
module load Python/3.11.5
module load CUDA
source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
python prometheus_evaluation_analysis.py
module unload CUDA
module unload Python/3.11.5
deactivate
echo "Script ran successfully"
\ No newline at end of file
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
import wandb
import torch
import datetime
print(f'Started the script at {datetime.datetime.now()}', flush=True)
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
print(f'Model loaded successfully at {datetime.datetime.now()}', flush=True)
model = FastLanguageModel.get_peft_model(
model,
r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 64,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
tokenizer = get_chat_template(
tokenizer,
chat_template = "llama-3.1",
)
dataset = load_dataset("atharva2721/llama-standardized-refined-train-aggregated", split = "train")
validation_dataset = load_dataset("atharva2721/llama-standardized-refined-test-aggregated", split = "train")
wandb.init(project="codebud")
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
eval_dataset=validation_dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
dataset_num_proc = 4,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4, # Fixed major bug in latest Unsloth
warmup_ratio = 0.1,
num_train_epochs = 3, # Set this for 1 full training run.
#max_steps = 60,
learning_rate = 2e-5,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
eval_strategy="steps",
eval_steps=410,
per_device_eval_batch_size = 1,
fp16_full_eval = not is_bfloat16_supported(),
bf16_full_eval = is_bfloat16_supported(),
logging_steps = 10,
save_steps = 410,
optim = "paged_adamw_8bit", # Save more memory
weight_decay = 0.01,
lr_scheduler_type = "cosine",
seed = 3407,
output_dir = "outputs",
report_to = "wandb", # Use this for WandB etc
run_name = "run-name"
),
)
trainer = train_on_responses_only(
trainer,
instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)
tokenizer.decode(trainer.train_dataset[0]["input_ids"])
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[0]["labels"]])
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.", flush=True)
print(f"{start_gpu_memory} GB of memory reserved.", flush=True)
print(f'Everything initialized. Starting the training at {datetime.datetime.now()}', flush=True)
trainer_stats = trainer.train()
print(f'Successfully completed training at {datetime.datetime.now()}', flush=True)
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
print(f'Pushing model and tokenizer at {datetime.datetime.now()}', flush=True)
model.save_pretrained("models/llama_finetuned_model") # Local saving
tokenizer.save_pretrained("models/llama_finetuned_model")
model.push_to_hub("llama_finetuned_model") # Online saving
tokenizer.push_to_hub("llama_finetuned_model") # Online saving
wandb.finish()
print(f'Run complete at {datetime.datetime.now()}', flush=True)
\ No newline at end of file
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
import wandb
import torch
import datetime
print(f'Started the script at {datetime.datetime.now()}', flush=True)
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen2.5-Coder-7B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
print(f'Model loaded successfully at {datetime.datetime.now()}', flush=True)
model = FastLanguageModel.get_peft_model(
model,
r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 64,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
tokenizer = get_chat_template(
tokenizer,
chat_template = "qwen-2.5",
)
dataset = load_dataset("atharva2721/standardized-refined-train-aggregated", split = "train")
validation_dataset = load_dataset("atharva2721/standardized-refined-val-test-aggregated", split = "train")
wandb.init(project="codebud")
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
eval_dataset=validation_dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
dataset_num_proc = 4,
packing = False, # Can make training 5x faster for short sequences.
args = TrainingArguments(
per_device_train_batch_size = 1,
gradient_accumulation_steps = 4, # Fixed major bug in latest Unsloth
warmup_ratio = 0.1,
num_train_epochs = 3, # Set this for 1 full training run.
#max_steps = 60,
learning_rate = 2e-5,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
eval_strategy="steps",
eval_steps=410,
per_device_eval_batch_size = 1,
fp16_full_eval = not is_bfloat16_supported(),
bf16_full_eval = is_bfloat16_supported(),
logging_steps = 10,
save_steps = 410,
optim = "paged_adamw_8bit", # Save more memory
weight_decay = 0.01,
lr_scheduler_type = "cosine",
seed = 3407,
output_dir = "outputs",
report_to = "wandb", # Use this for WandB etc
run_name = "run-name"
),
)
trainer = train_on_responses_only(
trainer,
instruction_part = "<|im_start|>user\n",
response_part = "<|im_start|>assistant\n",
)
tokenizer.decode(trainer.train_dataset[0]["input_ids"])
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[0]["labels"]])
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.", flush=True)
print(f"{start_gpu_memory} GB of memory reserved.", flush=True)
print(f'Everything initialized. Starting the training at {datetime.datetime.now()}', flush=True)
trainer_stats = trainer.train()
print(f'Successfully completed training at {datetime.datetime.now()}', flush=True)
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
print(f'Pushing model and tokenizer at {datetime.datetime.now()}', flush=True)
model.save_pretrained("models/finetuned_model_with_three_epochs_eval") # Local saving
tokenizer.save_pretrained("models/finetuned_model_with_three_epochs_eval")
model.push_to_hub("finetuned_model_with_three_epochs_eval") # Online saving
tokenizer.push_to_hub("finetuned_model_with_three_epochs_eval") # Online saving
wandb.finish()
print(f'Run complete at {datetime.datetime.now()}', flush=True)
\ No newline at end of file
#!/usr/bin/zsh
### Add basic configuration for job
#SBATCH --job-name=fine_tuning
#SBATCH --output=logs/fine_tuning_%j.log
#SBATCH --error=logs/fine_tuning_error_%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=5
#SBATCH --gres=gpu:1
#SBATCH --time=22:00:00
###------------------------------------------------------------------------------------------------------------------------------
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/fine-tuning
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
module load GCCcore/.13.2.0
module load Python/3.11.5
module load CUDA
source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
python main_llama_fine_tuning.py
module unload CUDA
module unload Python/3.11.5
deactivate
echo "Script ran successfully"
\ No newline at end of file
from datasets import load_dataset, Dataset
import re
from unsloth import FastLanguageModel
def data_generator(dataset):
for row in dataset:
yield row
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
#model_name = "atharva2721/llama_finetuned_model",
model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
reference_dataset = load_dataset("atharva2721/llama_inference_output", split="train", trust_remote_code=True)
inference_output = []
code_no = 0
inferred_no = 0
failed_no = 0
for example in reference_dataset:
code_no += 1
content = f'''
Refine the C# code enclosed within tags [C#] and [/C#].
Provide the refined code must be enclosed within tags [refined_C#] and [/refined_C#]
The summary of changes must be enclosed within tags [code_changes] and [/code_changes].
[C#]
{example["code"]}
[/C#]
'''
messages = [
{"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize = True,
add_generation_prompt = True, # Must add for generation
return_tensors = "pt",
).to("cuda")
should_retry = True
retry_no = 0
while should_retry:
print(f'Trying {code_no} for {retry_no} time', flush=True)
output_tensor = model.generate(input_ids = inputs,
max_length = max_seq_length,
temperature = 0.6,
repetition_penalty = 1.1
)
decoded = tokenizer.batch_decode(output_tensor)
output = ""
for text in decoded:
output += text
output = output.split('<|start_header_id|>assistant<|end_header_id|>')
if len(output) == 2:
output = output[1]
code_pattern = r'\[refined_C#\](.*?)\[/refined_C#\]'
summary_pattern = r'\[code_changes\](.*?)\[/code_changes\]'
code_matches = re.search(code_pattern, output, re.DOTALL)
summary_matches = re.search(summary_pattern, output, re.DOTALL)
if code_matches and summary_matches:
refined_code = code_matches.group(1)
summary = summary_matches.group(1)
inference_output.append({'code': example["code"], 'base inference': refined_code, 'base summary' : summary, 'finetuned inference':example["finetuned inference"], 'finetuned summary': example["finetuned summary"], 'reference inference': example["reference inference"], 'reference summary': example["reference summary"]})
print(f'Code no. {code_no} refined successfully', flush=True)
should_retry = False
inferred_no += 1
if retry_no == 2:
should_retry = False
print(f'Failed to refine code at {code_no}. Final try output: \n [failed_output]{output}[/failed_output]', flush=True)
failed_no +=1
retry_no += 1
new_dataset = Dataset.from_generator(data_generator, gen_kwargs={"dataset": inference_output})
new_dataset.push_to_hub('llama_inference_output_complete')
print(f'Created and pushed total of {inferred_no} examples from total of {code_no} codes. Total failed inferences are {failed_no}', flush=True)
\ No newline at end of file
from unsloth import FastLanguageModel
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
#model_name = "atharva2721/llama_finetuned_model",
model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
code1 = """
package com.thealgorithms.recursion;
// program to find power set of a string
import java.util.ArrayList;
import java.util.List;
public final class genSubsets {
private genSubsets() {
throw new UnsupportedOperationException("Utility class");
}
public static List<String> subsetRecursion(String str) {
return doRecurr("", str);
}
private static List<String> doRecurr(String p, String up) {
if (up.isEmpty()) {
List<String> list = new ArrayList<>();
list.add(p);
return list;
}
// Taking the character
char ch = up.charAt(0);
// Adding the character in the recursion
List<String> left = doRecurr(p + ch, up.substring(1));
// Not adding the character in the recursion
List<String> right = doRecurr(p, up.substring(1));
left.addAll(right);
return left;
}
}
"""
code2 = """
using System;
using System.Collections;
namespace hub
{
public class logicproxy
{
public logicproxy(juggle.Ichannel ch)
{
_hub_call_logic = new caller.hub_call_logic(ch);
}
public void reg_logic_sucess_and_notify_hub_nominate()
{
_hub_call_logic.reg_logic_sucess_and_notify_hub_nominate(hub.name);
}
public void call_logic(String module_name, String func_name, params object[] argvs)
{
ArrayList _argvs = new ArrayList();
foreach (var o in argvs)
{
_argvs.Add(o);
}
_hub_call_logic.hub_call_logic_mothed(module_name, func_name, _argvs);
}
private caller.hub_call_logic _hub_call_logic;
}
}
"""
code3 = """
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Max_Number
{
class Program
{
static void Main(string[] args)
{
int number = int.Parse(Console.ReadLine());
double max = double.Parse(Console.ReadLine());
for (int i = 1; i < number; i++)
{
double d = double.Parse(Console.ReadLine());
if (d > max)
max = d;
}
Console.WriteLine(max);
}
}
}
"""
code4 = """
using System.Collections.Generic;
using System.Linq;
namespace Mirage.Urbanization.Simulation
{
public class CityCategoryDefinition
{
public CityCategoryDefinition(string name, int minimumPopulation)
{
Name = name;
MinimumPopulation = minimumPopulation;
}
public string Name { get; }
public int MinimumPopulation { get; }
public static CityCategoryDefinition GetForPopulation(int population)
{
return Definitions
.Where(x => x.MinimumPopulation <= population)
.OrderByDescending(x => x.MinimumPopulation)
.First();
}
public static CityCategoryDefinition Village = new CityCategoryDefinition("Village", 0);
private static readonly IReadOnlyCollection<CityCategoryDefinition> Definitions = new[]
{
Village,
new CityCategoryDefinition("Town", 2000),
new CityCategoryDefinition("City", 10000),
new CityCategoryDefinition("Capital", 50000),
new CityCategoryDefinition("Metropolis", 100000)
}.ToList();
}
}
"""
code5 = """
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace TagUIWordAddIn
{
public partial class SnapshotBar : Form
{
public SnapshotBar()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
this.Hide();
Form1 f1 = new Form1();
if (checkBoxDelay.Checked)
{
System.Threading.Thread.Sleep(5000);
Form2 f2 = new Form2();
f1.Owner = f2;
f2.Show();
f1.Show();
f1.Closed += (s, args) =>
{
this.Close();
f2.Close();
};
}
else
{
f1.Show();
f1.Closed += (s, args) =>
{
this.Close();
};
}
f1.Closed += (s, args) =>
{
this.Close();
};
}
}
}
"""
codes = [code1]
for code in codes:
print("******************Start of Generation******************")
content = f'''
Refine the java code enclosed within tags [java] and [/java].
Provide the refined code enclosed within tags [refined_java] and [/refined_java] and summary of changes enclosed within tags [code_changes] and [/code_changes].
[java]
{code}
[/java]
'''
messages = [
{"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize = True,
add_generation_prompt = True, # Must add for generation
return_tensors = "pt",
).to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 10000, temperature = 0.6)
print("******************End of Generation******************")
\ No newline at end of file
from datasets import load_dataset, Dataset
import re
from unsloth import FastLanguageModel
def data_generator(dataset):
for row in dataset:
yield row
max_seq_length = 32768
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
#model_name = "atharva2721/qwen_finetuned_model",
model_name = "unsloth/Qwen2.5-Coder-7B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)
reference_dataset = load_dataset("atharva2721/qwen_inference_output", split="train", trust_remote_code=True)
inference_output = []
code_no = 0
inferred_no = 0
failed_no = 0
for example in reference_dataset:
code_no += 1
content = f'''
Refine the C# code enclosed within tags [C#] and [/C#].
Provide the refined code enclosed within tags [refined_C#] and [/refined_C#]
The summary of changes must be enclosed within tags [code_changes] and [/code_changes].
[C#]
{example["code"]}
[/C#]
'''
messages = [
{"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize = True,
add_generation_prompt = True, # Must add for generation
return_tensors = "pt",
).to("cuda")
should_retry = True
retry_no = 0
while should_retry:
output_tensor = model.generate(input_ids = inputs,
max_length = max_seq_length,
temperature = 0.7
)
decoded = tokenizer.batch_decode(output_tensor)
output = ""
for text in decoded:
output += text
output = output.split('<|im_start|>assistant')
if len(output) == 2:
output = output[1]
code_pattern = r'\[refined_C#\](.*?)\[/refined_C#\]'
summary_pattern = r'\[code_changes\](.*?)\[/code_changes\]'
code_matches = re.search(code_pattern, output, re.DOTALL)
summary_matches = re.search(summary_pattern, output, re.DOTALL)
if code_matches and summary_matches:
refined_code = code_matches.group(1)
summary = summary_matches.group(1)
inference_output.append({'code': example["code"], 'base inference':refined_code, 'base summary': summary,'finetuned inference': example["finetuned inference"], 'finetuned summary': example["finetuned summary"], 'reference inference': example["reference inference"], 'reference summary': example["reference summary"]})
print(f'Code no. {code_no} refined successfully', flush=True)
should_retry = False
inferred_no += 1
if retry_no == 2:
should_retry = False
print(f'Failed to refine code at {code_no}. Final try output: \n [failed_output]{output}[/failed_output]', flush=True)
failed_no +=1
retry_no += 1
new_dataset = Dataset.from_generator(data_generator, gen_kwargs={"dataset": inference_output})
new_dataset.push_to_hub('qwen_inference_output_complete')
print(f'Created and pushed total of {inferred_no} examples from total of {code_no} codes. Total failed inferences are {failed_no}', flush=True)
\ No newline at end of file
from unsloth import FastLanguageModel
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
#model_name = "atharva2721/qwen_finetuned_model",
model_name = "unsloth/Qwen2.5-Coder-7B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
code1 = """
package com.thealgorithms.recursion;
// program to find power set of a string
import java.util.ArrayList;
import java.util.List;
public final class genSubsets {
private genSubsets() {
throw new UnsupportedOperationException("Utility class");
}
public static List<String> subsetRecursion(String str) {
return doRecurr("", str);
}
private static List<String> doRecurr(String p, String up) {
if (up.isEmpty()) {
List<String> list = new ArrayList<>();
list.add(p);
return list;
}
// Taking the character
char ch = up.charAt(0);
// Adding the character in the recursion
List<String> left = doRecurr(p + ch, up.substring(1));
// Not adding the character in the recursion
List<String> right = doRecurr(p, up.substring(1));
left.addAll(right);
return left;
}
}
"""
code2 = """
using System;
using System.Collections;
namespace hub
{
public class logicproxy
{
public logicproxy(juggle.Ichannel ch)
{
_hub_call_logic = new caller.hub_call_logic(ch);
}
public void reg_logic_sucess_and_notify_hub_nominate()
{
_hub_call_logic.reg_logic_sucess_and_notify_hub_nominate(hub.name);
}
public void call_logic(String module_name, String func_name, params object[] argvs)
{
ArrayList _argvs = new ArrayList();
foreach (var o in argvs)
{
_argvs.Add(o);
}
_hub_call_logic.hub_call_logic_mothed(module_name, func_name, _argvs);
}
private caller.hub_call_logic _hub_call_logic;
}
}
"""
code3 = """
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Max_Number
{
class Program
{
static void Main(string[] args)
{
int number = int.Parse(Console.ReadLine());
double max = double.Parse(Console.ReadLine());
for (int i = 1; i < number; i++)
{
double d = double.Parse(Console.ReadLine());
if (d > max)
max = d;
}
Console.WriteLine(max);
}
}
}
"""
code4 = """
using System.Collections.Generic;
using System.Linq;
namespace Mirage.Urbanization.Simulation
{
public class CityCategoryDefinition
{
public CityCategoryDefinition(string name, int minimumPopulation)
{
Name = name;
MinimumPopulation = minimumPopulation;
}
public string Name { get; }
public int MinimumPopulation { get; }
public static CityCategoryDefinition GetForPopulation(int population)
{
return Definitions
.Where(x => x.MinimumPopulation <= population)
.OrderByDescending(x => x.MinimumPopulation)
.First();
}
public static CityCategoryDefinition Village = new CityCategoryDefinition("Village", 0);
private static readonly IReadOnlyCollection<CityCategoryDefinition> Definitions = new[]
{
Village,
new CityCategoryDefinition("Town", 2000),
new CityCategoryDefinition("City", 10000),
new CityCategoryDefinition("Capital", 50000),
new CityCategoryDefinition("Metropolis", 100000)
}.ToList();
}
}
"""
code5 = """
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace TagUIWordAddIn
{
public partial class SnapshotBar : Form
{
public SnapshotBar()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
this.Hide();
Form1 f1 = new Form1();
if (checkBoxDelay.Checked)
{
System.Threading.Thread.Sleep(5000);
Form2 f2 = new Form2();
f1.Owner = f2;
f2.Show();
f1.Show();
f1.Closed += (s, args) =>
{
this.Close();
f2.Close();
};
}
else
{
f1.Show();
f1.Closed += (s, args) =>
{
this.Close();
};
}
f1.Closed += (s, args) =>
{
this.Close();
};
}
}
}
"""
codes = [code1]
for code in codes:
print("******************Start of Generation******************")
content = f'''
Refine the java code enclosed within tags [java] and [/java].
Provide the refined code enclosed within tags [refined_java] and [/refined_java] and summary of changes enclosed within tags [code_changes] and [/code_changes].
[java]
{code}
[/java]
'''
messages = [
{"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize = True,
add_generation_prompt = True, # Must add for generation
return_tensors = "pt",
).to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids = inputs,
streamer = text_streamer,
max_new_tokens = 10000,
temperature = 0.7
)
print("******************End of Generation******************")
\ No newline at end of file
#!/usr/bin/zsh
### Add basic configuration for job
#SBATCH --account=rwth1776
#SBATCH --job-name=llama_inference
#SBATCH --output=logs/llama_inference_%j.log
#SBATCH --error=logs/llama_inference_error%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=3
#SBATCH --gres=gpu:1
#SBATCH --time=20:00:00
###------------------------------------------------------------------------------------------------------------------------------
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/inference
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
module load GCCcore/.13.2.0
module load Python/3.11.5
module load CUDA
source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
python llama_finetuned_inference.py
module unload CUDA
module unload Python/3.11.5
deactivate
echo "Script ran successfully"
\ No newline at end of file
#!/usr/bin/zsh
### Add basic configuration for job
#SBATCH --account=rwth1776
#SBATCH --job-name=qwen_inference
#SBATCH --output=logs/qwen_inference_%j.log
#SBATCH --error=logs/qwen_inference_error_%j.log
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=3
#SBATCH --gres=gpu:1
#SBATCH --time=05:00:00
###------------------------------------------------------------------------------------------------------------------------------
### Run the project in work directory of the cluster (configure based on need!!
### RWTH File System : https://help.itc.rwth-aachen.de/en/service/rhr4fjjutttf/article/da307ec2c60940b29bd42ac483fc3ea7/
cd $HPCWORK
cd codebud/inference
###------------------------------------------------------------------------------------------------------------------------------
### JOB SCRIPT RUN
module load GCCcore/.13.2.0
module load Python/3.11.5
module load CUDA
source ../../venvs/codebud/bin/activate
echo $VIRTUAL_ENV
python --version
#python qwen_finetuned_inference.py
python qwen_finetuned_inference.py
module unload CUDA
module unload Python/3.11.5
deactivate
echo "Script ran successfully"
\ No newline at end of file