Commit df560f8e authored by Nishtha Jain's avatar Nishtha Jain
Browse files

avg odds difference metric added

parent 0c616033
......@@ -2,13 +2,15 @@ from config import CLASS_GROUP, MASKED, TITLE
from model import model_prediction
from preprocessing import embedding_transform
import numpy as np
import matplotlib.pyplot as plt
# res.class.gender.tp|fp
def tpr_gender_gap(test, model, embedding, class_group, sampling, test_size, masking):
print("processing evaluation.tpr_gender_gap ...")
tpr = []
def tpr_fpr(test, model, embedding, class_group, sampling, test_size, masking):
print("processing evaluation.tpr_fpr ...")
res = {}
for c in CLASS_GROUP[class_group]:
res[c] = {}
for gender in ['M','F']:
mini_set = test.loc[(test['gender']==gender) & (test[TITLE]==c)]
......@@ -17,8 +19,97 @@ def tpr_gender_gap(test, model, embedding, class_group, sampling, test_size, mas
pred, acc = model_prediction(X_test=X_test, Y_test=mini_set[TITLE], model=model, embedding=embedding, class_group=class_group, sampling=sampling, test_size=test_size, masking=masking)
tp = np.sum(pred == mini_set[TITLE])
# print({'class':c, 'gender':gender, 'tp':tp, 'count':len(pred), 'tpr':float(tp)/len(pred)})
tpr.append({'class':c, 'gender':gender, 'tp':tp, 'count':len(pred), 'tpr':float(tp)/len(pred)})
return(tpr)
fp = np.sum(pred != mini_set[TITLE])
res[c][gender] = {
'count':len(pred),
'tp':tp,
'tpr':float(tp)/len(pred),
'fp':fp, 'count':len(pred),
'fpr':float(fp)/len(pred)}
scores = { 'count_males' = [],
'count_females' = [],
'tpr_males' = [],
'tpr_females' = []
}
for c in res:
scores['count_males'].append(res[c]['M']['count'])
scores['count_females'].append(res[c]['F']['count'])
scores['tpr_males'].append(res[c]['M']['tpr'])
scores['tpr_females'].append(res[c]['F']['tpr'])
scores['tp_males'].append(res[c]['M']['tp'])
scores['tp_females'].append(res[c]['F']['tp'])
scores['fpr_males'].append(res[c]['M']['fpr'])
scores['fpr_females'].append(res[c]['F']['fpr'])
scores['fp_males'].append(res[c]['M']['fp'])
scores['fp_females'].append(res[c]['F']['fp'])
return(scores)
def tpr_gender_gap(scores ,test, model, embedding, class_group, sampling, test_size, masking):
print("processing evaluation.tpr_gender_gap ...")
if not scores:
scores = tpr_fpr(test, model, embedding, class_group, sampling, test_size, masking)
x_males = [scores['count_males'][i]/(scores['count_males'][i]+scores['count_females'][i]) for i in range(7)]
y_males = [scores['tpr_males'][i]-scores['tpr_females'][i] for i in range(7)]
plt.scatter(x_males, y_males)
plt.xlabel("% Male")
plt.ylabel("TPR Gender Gap Male")
for i, txt in enumerate(CLASS_GROUP[class_group]):
plt.annotate(txt, (x_males[i], y_males[i]))
plt.show()
x_females = [scores['count_females'][i]/(scores['count_males'][i]+scores['count_females'][i]) for i in range(7)]
y_females = [scores['tpr_females'][i]-scores['tpr_males'][i] for i in range(7)]
plt.scatter(x_females, y_females)
plt.xlabel("% Female")
plt.ylabel("TPR Gender Gap Female")
for i, txt in enumerate(CLASS_GROUP[class_group]):
plt.annotate(txt, (x_females[i], y_females[i]))
plt.show()
return (scores,x_males,y_males,x_females,y_females)
def average_odds_difference(scores, test, model, embedding, class_group, sampling, test_size, masking):
print("processing evaluation.average_odds_difference ...")
if not scores:
scores = tpr_fpr(test, model, embedding, class_group, sampling, test_size, masking)
x_males = [scores['count_males'][i]/(scores['count_males'][i]+scores['count_females'][i]) for i in range(7)]
y_males = [(scores['fpr_males'][i]-scores['fpr_females'][i] + scores['tpr_males'][i]-scores['tpr_females'][i])/2 for i in range(7)]
plt.scatter(x_males, y_males)
plt.xlabel("% Male")
plt.ylabel("Avg odds difference Male")
for i, txt in enumerate(CLASS_GROUP[class_group]):
plt.annotate(txt, (x_males[i], y_males[i]))
plt.show()
x_females = [scores['count_females'][i]/(scores['count_males'][i]+scores['count_females'][i]) for i in range(7)]
y_females = [(scores['fpr_females'][i]-scores['fpr_males'][i] + scores['tpr_females'][i]-scores['tpr_males'][i])/2 for i in range(7)]
plt.scatter(x_females, y_females)
plt.xlabel("% Female")
plt.ylabel("Avg odds difference Male")
for i, txt in enumerate(CLASS_GROUP[class_group]):
plt.annotate(txt, (x_females[i], y_females[i]))
plt.show()
\ No newline at end of file
return (scores,x_males,y_males,x_females,y_females)
......@@ -32,9 +32,9 @@ def main(load_data_from_saved, embedding_train, model_train, predict, evaluate,
# evaluation
if evaluate:
res = tpr_gender_gap(test_set, model, embedding, class_group, sampling, test_size, masking)
for item in res:
print(item)
scores,x_males,y_males,x_females,y_females = tpr_gender_gap(None,test_set, model, embedding, class_group, sampling, test_size, masking)
scores,x_males,y_males,x_females,y_females = average_odds_difference(scores,test_set, model, embedding, class_group, sampling, test_size, masking)
'''
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment