Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Sparsh Jauhari
Bias in Bio Lab CSSH
Commits
0399f688
Commit
0399f688
authored
Jul 12, 2021
by
Nishtha Jain
Browse files
ipnb cleaned
parent
65a259ba
Changes
2
Hide whitespace changes
Inline
Side-by-side
bios_bias.ipynb
View file @
0399f688
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# pip installations
# pip installations
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
#!pip install pymongo
#!pip install pymongo
#!pip install dnspython==2.0.0
#!pip install dnspython==2.0.0
!
pip
install
gensim
!
pip
install
gensim
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Config
## Config
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
CLASSES
=
[
'physician'
,
CLASSES
=
[
'physician'
,
'nurse'
,
'nurse'
,
'psychologist'
,
'psychologist'
,
'dentist'
,
'dentist'
,
'surgeon'
,
'surgeon'
,
'dietitian'
,
'dietitian'
,
'chiropractor'
'chiropractor'
]
]
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## BIOS.pkl pickle insight
## BIOS.pkl pickle insight
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
import
pickle
import
pickle
with
open
(
"datasets/BIOS.pkl"
,
"rb"
)
as
file
:
with
open
(
"datasets/BIOS.pkl"
,
"rb"
)
as
file
:
data
=
pickle
.
load
(
file
)
data
=
pickle
.
load
(
file
)
# title = set()
# title = set()
# raw_title = set()
# raw_title = set()
# gender = set()
# gender = set()
# for x in data:
# for x in data:
# title.add(x['title'])
# title.add(x['title'])
# raw_title.add(x['raw_title'])
# raw_title.add(x['raw_title'])
# gender.add(x['gender'])
# gender.add(x['gender'])
print
(
"number data points: "
,
len
(
data
))
print
(
"number data points: "
,
len
(
data
))
print
(
"structure of a data point [0] :
\n
"
,
data
[
0
])
print
(
"structure of a data point [0] :
\n
"
,
data
[
0
])
print
(
"types of gender:"
,
gender
)
print
(
"types of gender:"
,
gender
)
print
(
"types of title:"
,
len
(
title
))
print
(
"types of title:"
,
len
(
title
))
print
(
title
)
print
(
title
)
print
(
"types of raw_title:"
,
len
(
raw_title
))
print
(
"types of raw_title:"
,
len
(
raw_title
))
print
(
raw_title
)
print
(
raw_title
)
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Mongo Connection using pymongo
## Mongo Connection using pymongo
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
import
pymongo
import
pymongo
client
=
pymongo
.
MongoClient
(
"mongodb+srv://root:Deployment123@clusterbiobias.4mc8e.mongodb.net/myFirstDatabase?retryWrites=true&w=majority"
)
client
=
pymongo
.
MongoClient
(
"mongodb+srv://root:Deployment123@clusterbiobias.4mc8e.mongodb.net/myFirstDatabase?retryWrites=true&w=majority"
)
collection
=
client
[
'biodb'
][
'allbio'
]
collection
=
client
[
'biodb'
][
'allbio'
]
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
print
(
len
(
collection
.
distinct
(
"title"
)))
print
(
len
(
collection
.
distinct
(
"title"
)))
print
(
collection
.
distinct
(
"title"
))
print
(
collection
.
distinct
(
"title"
))
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
from
pandas
import
DataFrame
from
pandas
import
DataFrame
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
df
=
DataFrame
(
collection
.
find
({
'$or'
:[{
'title'
:
'teacher'
},{
'title'
:
'professor'
}]}))
df
=
DataFrame
(
collection
.
find
({
'$or'
:[{
'title'
:
'teacher'
},{
'title'
:
'professor'
}]}))
print
(
"Teacher male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'teacher'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"Teacher male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'teacher'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"Professor male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'professor'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"Professor male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'professor'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"Teacher female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'teacher'
)
&
(
df
[
'gender'
]
==
'F'
)]))
print
(
"Teacher female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'teacher'
)
&
(
df
[
'gender'
]
==
'F'
)]))
print
(
"Professor female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'professor'
)
&
(
df
[
'gender'
]
==
'F'
)]))
print
(
"Professor female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'professor'
)
&
(
df
[
'gender'
]
==
'F'
)]))
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
df
=
DataFrame
(
collection
.
find
({
'$or'
:[{
'title'
:
'surgeon'
},{
'title'
:
'nurse'
}]}))
df
=
DataFrame
(
collection
.
find
({
'$or'
:[{
'title'
:
'surgeon'
},{
'title'
:
'nurse'
}]}))
print
(
"surgeon male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'surgeon'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"surgeon male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'surgeon'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"nurse male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'nurse'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"nurse male"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'nurse'
)
&
(
df
[
'gender'
]
==
'M'
)]))
print
(
"surgeon female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'surgeon'
)
&
(
df
[
'gender'
]
==
'F'
)]))
print
(
"surgeon female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'surgeon'
)
&
(
df
[
'gender'
]
==
'F'
)]))
print
(
"nurse female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'nurse'
)
&
(
df
[
'gender'
]
==
'F'
)]))
print
(
"nurse female"
,
len
(
df
.
loc
[(
df
[
'title'
]
==
'nurse'
)
&
(
df
[
'gender'
]
==
'F'
)]))
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
df
=
DataFrame
(
collection
.
find
({
'$or'
:[{
'title'
:
title
}
for
title
in
CLASSES
]}))
df
=
DataFrame
(
collection
.
find
({
'$or'
:[{
'title'
:
title
}
for
title
in
CLASSES
]}))
df
df
```
```
%%%% Output: execute_result
%%%% Output: execute_result
_id \
_id \
0 607b0639c53ee5775fe7758b
0 607b0639c53ee5775fe7758b
1 607b0639c53ee5775fe77590
1 607b0639c53ee5775fe77590
2 607b0639c53ee5775fe7759b
2 607b0639c53ee5775fe7759b
3 607b063ac53ee5775fe775b0
3 607b063ac53ee5775fe775b0
4 607b063ac53ee5775fe775b3
4 607b063ac53ee5775fe775b3
... ...
... ...
112458 607b1d5dc53ee5775fed7952
112458 607b1d5dc53ee5775fed7952
112459 607b1d5dc53ee5775fed795a
112459 607b1d5dc53ee5775fed795a
112460 607b1d5dc53ee5775fed795b
112460 607b1d5dc53ee5775fed795b
112461 607b1d5dc53ee5775fed7964
112461 607b1d5dc53ee5775fed7964
112462 607b1d5dc53ee5775fed7967
112462 607b1d5dc53ee5775fed7967
path \
path \
0 crawl-data/CC-MAIN-2016-44/segments/1476988720...
0 crawl-data/CC-MAIN-2016-44/segments/1476988720...
1 crawl-data/CC-MAIN-2014-41/segments/1410657132...
1 crawl-data/CC-MAIN-2014-41/segments/1410657132...
2 crawl-data/CC-MAIN-2013-20/segments/1368702127...
2 crawl-data/CC-MAIN-2013-20/segments/1368702127...
3 crawl-data/CC-MAIN-2014-41/segments/1410657120...
3 crawl-data/CC-MAIN-2014-41/segments/1410657120...
4 crawl-data/CC-MAIN-2013-20/segments/1368696381...
4 crawl-data/CC-MAIN-2013-20/segments/1368696381...
... ...
... ...
112458 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112458 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112459 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112459 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112460 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112460 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112461 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112461 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112462 crawl-data/CC-MAIN-2018-43/segments/1539583519...
112462 crawl-data/CC-MAIN-2018-43/segments/1539583519...
raw \
raw \
0 Edmund J. Bourne, PhD, is a psychologist in no...
0 Edmund J. Bourne, PhD, is a psychologist in no...
1 Abigail Mackey is a registered nurse. For more...
1 Abigail Mackey is a registered nurse. For more...
2 Dr. Constance Milbrath is a developmental psyc...
2 Dr. Constance Milbrath is a developmental psyc...
3 Dr. Andrew Gottlieb is a clinical psychologist...
3 Dr. Andrew Gottlieb is a clinical psychologist...
4 Milton Wolf is a physician practicing in Kansa...
4 Milton Wolf is a physician practicing in Kansa...
... ...
... ...
112458 Adrienne Lewis Adrienne is a registered nurse ...
112458 Adrienne Lewis Adrienne is a registered nurse ...
112459 Eric Haralson, PA-C is a physician assistant i...
112459 Eric Haralson, PA-C is a physician assistant i...
112460 Alice Sumo is a respected nurse in Liberia, wh...
112460 Alice Sumo is a respected nurse in Liberia, wh...
112461 Rachel Kelley Schulman, MS, PA-C is a board-ce...
112461 Rachel Kelley Schulman, MS, PA-C is a board-ce...
112462 Victor N. Hakim, MD is a practicing Orthopedic...
112462 Victor N. Hakim, MD is a practicing Orthopedic...
name raw_title gender start_pos \
name raw_title gender start_pos \
0 [Edmund, J, Bourne] psychologist M 136
0 [Edmund, J, Bourne] psychologist M 136
1 [Abigail, , Mackey] nurse F 37
1 [Abigail, , Mackey] nurse F 37
2 [Constance, , Milbrath] psychologist F 305
2 [Constance, , Milbrath] psychologist F 305
3 [Andrew, , Gottlieb] psychologist M 72
3 [Andrew, , Gottlieb] psychologist M 72
4 [Milton, , Wolf] physician M 107
4 [Milton, , Wolf] physician M 107
... ... ... ... ...
... ... ... ... ...
112458 [Adrienne, Lewis, Adrienne] nurse F 118
112458 [Adrienne, Lewis, Adrienne] nurse F 118
112459 [Eric, , Haralson] physician M 98
112459 [Eric, , Haralson] physician M 98
112460 [Alice, , Sumo] nurse F 98
112460 [Alice, , Sumo] nurse F 98
112461 [Rachel, Kelley, Schulman] physician F 74
112461 [Rachel, Kelley, Schulman] physician F 74
112462 [Victor, N, Hakim] Orthopedic Surgeon M 72
112462 [Victor, N, Hakim] Orthopedic Surgeon M 72
title URI \
title URI \
0 psychologist http://www.alibris.co.uk/search/books/author/E...
0 psychologist http://www.alibris.co.uk/search/books/author/E...
1 nurse http://observer-reporter.com/article/20130315/...
1 nurse http://observer-reporter.com/article/20130315/...
2 psychologist http://earlylearning.ubc.ca/people/
2 psychologist http://earlylearning.ubc.ca/people/
3 psychologist http://www.psychologylounge.com/tag/sexuality-2/
3 psychologist http://www.psychologylounge.com/tag/sexuality-2/
4 physician http://hotair.com/archives/2011/12/13/romney-i...
4 physician http://hotair.com/archives/2011/12/13/romney-i...
... ... ...
... ... ...
112458 nurse https://www.adansw.com.au/CPD/Courses/Geriatri...
112458 nurse https://www.adansw.com.au/CPD/Courses/Geriatri...
112459 physician https://www.healthgrades.com/providers/eric-ha...
112459 physician https://www.healthgrades.com/providers/eric-ha...
112460 nurse http://woman.ng/2018/10/women-love-midwife-ali...
112460 nurse http://woman.ng/2018/10/women-love-midwife-ali...
112461 physician https://lincolnparkaesthetics.com/ourstaff/
112461 physician https://lincolnparkaesthetics.com/ourstaff/
112462 surgeon https://www.sharecare.com/doctor/dr-victor-n-h...
112462 surgeon https://www.sharecare.com/doctor/dr-victor-n-h...
bio
bio
0 _ is author of several books, including the be...
0 _ is author of several books, including the be...
1 For more quips and tips, refer to _ blog, “The...
1 For more quips and tips, refer to _ blog, “The...
2 _ interests at HELP are in the ethno-cultural ...
2 _ interests at HELP are in the ethno-cultural ...
3 _ practice serves the greater Silicon Valley a...
3 _ practice serves the greater Silicon Valley a...
4 During the health care debates of 2010, Dr. _ ...
4 During the health care debates of 2010, Dr. _ ...
... ...
... ...
112458 _ has been successful in gaining two nationall...
112458 _ has been successful in gaining two nationall...
112459 _ graduated from Touro Center / College Of Ost...
112459 _ graduated from Touro Center / College Of Ost...
112460 In _ three-decade career, _ has seen two civil...
112460 In _ three-decade career, _ has seen two civil...
112461 _ has a Master of Science degree in Physician ...
112461 _ has a Master of Science degree in Physician ...
112462 _ completed a residency at Henry Ford Hospital...
112462 _ completed a residency at Henry Ford Hospital...
[112463 rows x 10 columns]
[112463 rows x 10 columns]
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## TPR Graph mid ppt
## TPR Graph mid ppt
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
classes
=
[
'physician'
,
'nurse'
,
'psychologist'
,
'dentist'
,
'surgeon'
,
'dietitian'
,
'chiropractor'
]
classes
=
[
'physician'
,
'nurse'
,
'psychologist'
,
'dentist'
,
'surgeon'
,
'dietitian'
,
'chiropractor'
]
tpr_males
=
[
0.864
,
0.8128654970760234
,
0.8922631959508315
,
0.949435180204411
,
0.6972111553784861
,
0.6851851851851852
,
0.701058201058201
]
tpr_males
=
[
0.864
,
0.8128654970760234
,
0.8922631959508315
,
0.949435180204411
,
0.6972111553784861
,
0.6851851851851852
,
0.701058201058201
]
tpr_females
=
[
0.9056320400500626
,
0.8622613803230543
,
0.8844028899277518
,
0.9446064139941691
,
0.6285714285714286
,
0.8905608755129959
,
0.6625
]
tpr_females
=
[
0.9056320400500626
,
0.8622613803230543
,
0.8844028899277518
,
0.9446064139941691
,
0.6285714285714286
,
0.8905608755129959
,
0.6625
]
count_males
=
[
4125
,
342
,
1383
,
1859
,
2259
,
54
,
378
]
count_males
=
[
4125
,
342
,
1383
,
1859
,
2259
,
54
,
378
]
count_females
=
[
3995
,
3405
,
2353
,
1029
,
420
,
731
,
160
]
count_females
=
[
3995
,
3405
,
2353
,
1029
,
420
,
731
,
160
]
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
import
matplotlib.pyplot
as
plt
import
matplotlib.pyplot
as
plt
x
=
[
count_males
[
i
]
/
(
count_males
[
i
]
+
count_females
[
i
])
for
i
in
range
(
7
)]
x
=
[
count_males
[
i
]
/
(
count_males
[
i
]
+
count_females
[
i
])
for
i
in
range
(
7
)]
y
=
[
tpr_males
[
i
]
-
tpr_females
[
i
]
for
i
in
range
(
7
)]
y
=
[
tpr_males
[
i
]
-
tpr_females
[
i
]
for
i
in
range
(
7
)]
plt
.
scatter
(
x
,
y
)
plt
.
scatter
(
x
,
y
)
plt
.
xlabel
(
"% Male"
)
plt
.
xlabel
(
"% Male"
)
plt
.
ylabel
(
"TPR Gender Gap Male"
)
plt
.
ylabel
(
"TPR Gender Gap Male"
)
for
i
,
txt
in
enumerate
(
classes
):
for
i
,
txt
in
enumerate
(
classes
):
plt
.
annotate
(
txt
,
(
x
[
i
],
y
[
i
]))
plt
.
annotate
(
txt
,
(
x
[
i
],
y
[
i
]))
plt
.
show
()
plt
.
show
()
```
```
%%%% Output: display_data
%%%% Output: display_data


%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
x
=
[
count_females
[
i
]
/
(
count_males
[
i
]
+
count_females
[
i
])
for
i
in
range
(
7
)]
x
=
[
count_females
[
i
]
/
(
count_males
[
i
]
+
count_females
[
i
])
for
i
in
range
(
7
)]
y
=
[
tpr_females
[
i
]
-
tpr_males
[
i
]
for
i
in
range
(
7
)]
y
=
[
tpr_females
[
i
]
-
tpr_males
[
i
]
for
i
in
range
(
7
)]
plt
.
scatter
(
x
,
y
)
plt
.
scatter
(
x
,
y
)
plt
.
xlabel
(
"% Female"
)
plt
.
xlabel
(
"% Female"
)
plt
.
ylabel
(
"TPR Gender Gap Female"
)
plt
.
ylabel
(
"TPR Gender Gap Female"
)
for
i
,
txt
in
enumerate
(
classes
):
for
i
,
txt
in
enumerate
(
classes
):
plt
.
annotate
(
txt
,
(
x
[
i
],
y
[
i
]))
plt
.
annotate
(
txt
,
(
x
[
i
],
y
[
i
]))
plt
.
show
()
plt
.
show
()
```
```
%%%% Output: display_data
%%%% Output: display_data


%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
# plt.bar([[i,count_males[i]] for i in range(7)], height=count_males)
# plt.bar([[i,count_males[i]] for i in range(7)], height=count_males)
# plt.bar([i for i in range(7)], height=count_females)
# plt.bar([i for i in range(7)], height=count_females)
import
numpy
as
np
import
numpy
as
np
m
=
[[
i
,
count_males
[
i
]]
for
i
in
range
(
7
)]
m
=
[[
i
,
count_males
[
i
]]
for
i
in
range
(
7
)]
f
=
[[
i
,
count_females
[
i
]]
for
i
in
range
(
7
)]
f
=
[[
i
,
count_females
[
i
]]
for
i
in
range
(
7
)]
# list1 = [[0,1],[1,2.5],[2,3],[3,5.6]]
# list1 = [[0,1],[1,2.5],[2,3],[3,5.6]]
# list2 = [[0,2],[2,5],[3,7]]
# list2 = [[0,2],[2,5],[3,7]]
x1
,
y1
=
zip
(
*
m
)
x1
,
y1
=
zip
(
*
m
)
x2
,
y2
=
zip
(
*
f
)
x2
,
y2
=
zip
(
*
f
)
plt
.
figure
(
figsize
=
(
10
,
5
))
plt
.
figure
(
figsize
=
(
10
,
5
))
plt
.
bar
(
np
.
array
(
x1
)
-
0.15
,
y1
,
width
=
0.3
,
label
=
'males'
)
plt
.
bar
(
np
.
array
(
x1
)
-
0.15
,
y1
,
width
=
0.3
,
label
=
'males'
)
plt
.
bar
(
np
.
array
(
x2
)
+
0.15
,
y2
,
width
=
0.3
,
label
=
'females'
)
plt
.
bar
(
np
.
array
(
x2
)
+
0.15
,
y2
,
width
=
0.3
,
label
=
'females'
)
#setting the xticks. Note x1 and x2 are tuples, thus + is concatenation
#setting the xticks. Note x1 and x2 are tuples, thus + is concatenation
# plt.xticks(range(min(x1+x2), max(x1+x2)+1))
# plt.xticks(range(min(x1+x2), max(x1+x2)+1))
plt
.
xticks
([
i
for
i
in
range
(
7
)],
classes
)
plt
.
xticks
([
i
for
i
in
range
(
7
)],
classes
)
plt
.
legend
()
plt
.
legend
()
plt
.
show
()
plt
.
show
()
```
```
%%%% Output: display_data
%%%% Output: display_data


%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Evaluations Final PPT
# Evaluations Final PPT
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
from
joblib
import
load
from
joblib
import
load
from
config
import
CLASS_GROUP
,
MASKED
,
EVALUATION_SCORES
from
config
import
CLASS_GROUP
,
MASKED
,
EVALUATION_SCORES
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
import
numpy
as
np
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
matplotlib.pyplot
as
plt
def
tpr_gender_gap
(
scores
,
class_group
=
'medical'
):
def
tpr_gender_gap
(
scores
,
class_group
=
'medical'
):
x_males
=
[
scores
[
'count_males'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
x_males
=
[
scores
[
'count_males'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_males
=
[
scores
[
'tpr_males'
][
i
]
-
scores
[
'tpr_females'
][
i
]
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_males
=
[
scores
[
'tpr_males'
][
i
]
-
scores
[
'tpr_females'
][
i
]
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
x_females
=
[
scores
[
'count_females'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
x_females
=
[
scores
[
'count_females'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_females
=
[
scores
[
'tpr_females'
][
i
]
-
scores
[
'tpr_males'
][
i
]
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_females
=
[
scores
[
'tpr_females'
][
i
]
-
scores
[
'tpr_males'
][
i
]
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
return
(
x_males
,
y_males
,
x_females
,
y_females
)
return
(
x_males
,
y_males
,
x_females
,
y_females
)
def
average_odds_difference
(
scores
,
class_group
=
'medical'
):
def
average_odds_difference
(
scores
,
class_group
=
'medical'
):
x_males
=
[
scores
[
'count_males'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
x_males
=
[
scores
[
'count_males'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_males
=
[(
scores
[
'fpr_males'
][
i
]
-
scores
[
'fpr_females'
][
i
]
+
scores
[
'tpr_males'
][
i
]
-
scores
[
'tpr_females'
][
i
])
/
2
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_males
=
[(
scores
[
'fpr_males'
][
i
]
-
scores
[
'fpr_females'
][
i
]
+
scores
[
'tpr_males'
][
i
]
-
scores
[
'tpr_females'
][
i
])
/
2
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
x_females
=
[
scores
[
'count_females'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
x_females
=
[
scores
[
'count_females'
][
i
]
/
(
scores
[
'count_males'
][
i
]
+
scores
[
'count_females'
][
i
])
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_females
=
[(
scores
[
'fpr_females'
][
i
]
-
scores
[
'fpr_males'
][
i
]
+
scores
[
'tpr_females'
][
i
]
-
scores
[
'tpr_males'
][
i
])
/
2
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
y_females
=
[(
scores
[
'fpr_females'
][
i
]
-
scores
[
'fpr_males'
][
i
]
+
scores
[
'tpr_females'
][
i
]
-
scores
[
'tpr_males'
][
i
])
/
2
for
i
in
range
(
len
(
CLASS_GROUP
[
class_group
]))]
return
(
x_males
,
y_males
,
x_females
,
y_females
)
return
(
x_males
,
y_males
,
x_females
,
y_females
)
def
average_odds_error
(
scores
,
class_group
=
'medical'
):
def
average_odds_error
(
scores
,
class_group
=
'medical'
):