Skip to content
Snippets Groups Projects
Commit a2073f1e authored by Atharva Jadhav's avatar Atharva Jadhav
Browse files

Get better visuals for Prometheus Eval

parent ab7f20a0
No related branches found
No related tags found
No related merge requests found
No preview for this file type
File added
......@@ -3,10 +3,10 @@ import matplotlib.pyplot as plt
import glob
# Adjust the file pattern/path as needed
csv_files = sorted(glob.glob("eval_reports/qwen-base-responses-evaluation-pass*.csv"))[:10]
csv_files = sorted(glob.glob("eval_reports/qwen-finetuned-responses-evaluation-pass*.csv"))[:10]
print(csv_files)
# Create a subplot grid: 2 rows x 5 columns for 10 plots
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(10, 30))
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(30, 30))
axes = axes.flatten()
for i, csv_file in enumerate(csv_files):
......@@ -34,11 +34,12 @@ for i, csv_file in enumerate(csv_files):
ax = axes[i]
ax.bar(list(score_counts.keys()), list(score_counts.values()),
color='skyblue', edgecolor='black')
ax.set_title(f"{csv_file}\nMean: {mean_score:.2f}, Mode: {mode_str}")
ax.set_xlabel("Score")
ax.set_ylabel("Frequency")
ax.tick_params(axis='x', rotation=45)
ax.set_title(f"Mean: {mean_score:.2f}, Mode: {mode_str}", fontsize=40)
ax.set_xlabel("Score", fontsize=34)
ax.set_ylabel("Frequency", fontsize=34)
ax.tick_params(axis='x', rotation=45, labelsize=34)
ax.tick_params(axis='y', labelsize=34)
plt.tight_layout()
plt.savefig("Prometheus Evaluation Results - Base Model - spaced.pdf")
plt.savefig("Prometheus Evaluation Results - Finetuned Model - spaced.pdf")
plt.show()
......@@ -29,7 +29,7 @@ for example in reference_dataset:
code_no += 1
content = f'''
Refine the C# code enclosed within tags [C#] and [/C#].
Provide the refined code enclosed within tags [refined_C#] and [/refined_C#]
Provide the refined code must be enclosed within tags [refined_C#] and [/refined_C#]
The summary of changes must be enclosed within tags [code_changes] and [/code_changes].
[C#]
......@@ -65,7 +65,6 @@ for example in reference_dataset:
output = output.split('<|start_header_id|>assistant<|end_header_id|>')
if len(output) == 2:
output = output[1]
print(output)
code_pattern = r'\[refined_C#\](.*?)\[/refined_C#\]'
summary_pattern = r'\[code_changes\](.*?)\[/code_changes\]'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment