Newer
Older
from datasets import load_dataset, Dataset
import re
def data_generator(dataset):
for row in dataset:
yield row
max_seq_length = 32768 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "atharva2721/llama_finetuned_model",
#model_name = "unsloth/Meta-Llama-3.1-8B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
reference_dataset = load_dataset("atharva2721/qwen_inference_output", split="train", trust_remote_code=True)
inference_output = []
code_no = 0
inferred_no = 0
failed_no = 0
Refine the C# code enclosed within tags [C#] and [/C#].
Provide the refined code enclosed within tags [refined_C#] and [/refined_C#]
The summary of changes must be enclosed within tags [code_changes] and [/code_changes].
[/C#]
'''
messages = [
{"role": "user", "content": content},
]
inputs = tokenizer.apply_chat_template(
messages,
tokenize = True,
add_generation_prompt = True, # Must add for generation
return_tensors = "pt",
).to("cuda")
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
should_retry = True
retry_no = 0
while should_retry:
output_tensor = model.generate(input_ids = inputs,
max_length = max_seq_length,
temperature = 0.6
)
decoded = tokenizer.batch_decode(output_tensor)
output = ""
for text in decoded:
output += text
print(output)
output = output.split('<|im_start|>assistant')
if len(output) == 2:
output = output[1]
code_pattern = r'\[refined_C#\](.*?)\[/refined_C#\]'
summary_pattern = r'\[code_changes\](.*?)\[/code_changes\]'
code_matches = re.search(code_pattern, output, re.DOTALL)
summary_matches = re.search(summary_pattern, output, re.DOTALL)
if code_matches and summary_matches:
refined_code = code_matches.group(1)
summary = summary_matches.group(1)
inference_output.append({'code': example["code"], 'base inference':refined_code, 'base summary': summary,'finetuned inference': example["finetuned inference"], 'finetuned summary': example["finetuned summary"], 'reference inference': example["reference inference"], 'reference summary': example["reference summary"]})
print(f'Code no. {code_no} refined successfully', flush=True)
should_retry = False
inferred_no += 1
if retry_no == 2 and should_retry:
should_retry = False
print(f'Failed to refine code at {code_no}. Final try output: \n [failed_output]{output}[/failed_output]', flush=True)
failed_no +=1
retry_no += 1
if code_no == 5:
break
# new_dataset = Dataset.from_generator(data_generator, gen_kwargs={"dataset": inference_output})
# new_dataset.push_to_hub('llama_inference_output')
# print(f'Created and pushed total of {inferred_no} examples from total of {code_no} codes. Total failed inferences are {failed_no}', flush=True)