Commit 2f0b082a authored by Nishtha Jain's avatar Nishtha Jain
Browse files

decreasing verbose

parent 37612bd8
......@@ -29,4 +29,4 @@ train.py - contains the runnable flow of the project
python train.py --load_data_from_saved False --embedding_train True --model_train True --predict True --evaluate True --class_group medical --sampling random --embedding cv --model svm --test_size 0.2 --masking True
\ No newline at end of file
`python train.py --no-load_data_from_saved --embedding_train --model_train --predict --evaluate --masking --class_group medical --sampling random --embedding cv --model svm --test_size 0.2`
\ No newline at end of file
......@@ -17,7 +17,7 @@ def model_training(X_train, Y_train, model, embedding, class_group, sampling, te
def model_prediction(X_test, Y_test, model, embedding, class_group, sampling, test_size, masking):
print("processing model.model_prediction ...")
# print("processing model.model_prediction ...")
trained_model = load(DATASET_NAMES['model',model,embedding,class_group,sampling,test_size,MASKED[masking]]+'.joblib')
......@@ -43,8 +43,8 @@ def svm_train(X_train, Y_train, sampling):
class_weight = None
classifier = SVC(C=1, kernel = 'linear', gamma = 'auto', class_weight=class_weight,random_state=SEED)
print("shape of X_train",X_train.shape)
print("first instance of X_train", type(X_train[0]),X_train[0])
# print("shape of X_train",X_train.shape)
# print("first instance of X_train", type(X_train[0]),X_train[0])
classifier.fit(X_train, Y_train)
return classifier
......
......@@ -30,7 +30,7 @@ def embedding_fit_transform(x_list, embedding, class_group, sampling, test_size,
def embedding_transform(x_list, embedding, class_group, sampling, test_size, masking ):
print("processing preprocessing.embedding_transform ...")
# print("processing preprocessing.embedding_transform ...")
trained_embedding = load(DATASET_NAMES['embedding',embedding,class_group,sampling,test_size,MASKED[masking]]+'.joblib')
......
......@@ -9,9 +9,8 @@ from joblib import dump, load
def main(load_data_from_saved, embedding_train, model_train, predict, evaluate, class_group, sampling, embedding, model, test_size, masking):
print()
print({'load_data_from_saved':load_data_from_saved, 'embedding_train':embedding_train, 'model_train':model_train, 'predict':predict, 'evaluate':evaluate, 'class_group':class_group, 'sampling':sampling, 'embedding':embedding, 'model':model, 'test_size':test_size, 'masking':masking})
print("\nprocessing train.main ...")
print("processing train.main ...")
data = pd.DataFrame(load_data(class_group=class_group, from_saved=load_data_from_saved))
train_set,test_set = data_selection(data, class_group, sampling, test_size, masking)
......@@ -21,6 +20,7 @@ def main(load_data_from_saved, embedding_train, model_train, predict, evaluate,
if embedding_train:
X_train = embedding_fit_transform(train_set[MASKED[masking]], embedding, class_group, sampling, test_size, masking)
else:
print("processing preprocessing.embedding_transform ...")
X_train = embedding_transform(train_set[MASKED[masking]], embedding, class_group, sampling, test_size, masking)
Y_train = train_set[TITLE]
......@@ -30,13 +30,15 @@ def main(load_data_from_saved, embedding_train, model_train, predict, evaluate,
if predict:
X_test = embedding_transform(test_set[MASKED[masking]], embedding, class_group, sampling, test_size, masking)
Y_test = test_set[TITLE]
print("processing model.model_prediction ...")
pred, acc = model_prediction(X_test, Y_test, model, embedding, class_group, sampling, test_size, masking)
predicted_dataset = pd.DataFrame({MASKED[masking]:test_set[MASKED[masking]], TITLE:test_set[TITLE], 'predicted':pred})
print("\t saving file :",PREDICTED_DATASET[model, embedding, class_group, sampling, test_size, MASKED[masking]])
dump(predicted_dataset, PREDICTED_DATASET[model, embedding, class_group, sampling, test_size, MASKED[masking]] + '.joblib')
print("\nModel accuracy:", acc)
print("Model accuracy:", acc)
# evaluation
# TO-DO:
......@@ -70,17 +72,38 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Adding optional argument
parser.add_argument("--load_data_from_saved", required=True, help = "True if saved data to be used and False if new data to be taken")
parser.add_argument("--embedding_train", default=True, help = "True to train new embedding and False to use the saved one")
parser.add_argument("--model_train", default=True, help = "True to train new model and False to use the saved one")
parser.add_argument("--predict", default=True, help = "True to perform predictions on the test set and False otherwise")
parser.add_argument("--evaluate", default=True, help = "True to perform bias evaluations on the test set and False otherwise")
parser.add_argument('--feature', dest='feature', action='store_true')
parser.add_argument('--no-feature', dest='feature', action='store_false')
parser.set_defaults(feature=True)
parser.add_argument("--load_data_from_saved", dest = 'load_data_from_saved',action='store_true', help = "if saved data to be used ")
parser.add_argument("--no-load_data_from_saved", dest = 'load_data_from_saved',action='store_false', help = "if new data to be taken")
parser.set_defaults(load_data_from_saved=False)
parser.add_argument("--embedding_train", dest='embedding_train',action='store_true',help = "to train new embedding")
parser.add_argument("--no-embedding_train", dest='embedding_train',action='store_false', help = "to use the saved embedding")
parser.set_defaults(embedding_train=True)
parser.add_argument("--model_train", dest='model_train',action='store_true', help = "to train new model")
parser.add_argument("--no-model_train", dest='model_train',action='store_false', help = "to use the saved model")
parser.set_defaults(model_train=True)
parser.add_argument("--predict", dest='predict',action='store_true', help = "to perform predictions on the test set")
parser.add_argument("--no-predict", dest='predict',action='store_false', help = "otherwise")
parser.set_defaults(predict=True)
parser.add_argument("--evaluate", dest='evaluate',action='store_true', help = "to perform bias evaluations on the test set")
parser.add_argument("--no-evaluate", dest='evaluate',action='store_false', help = "otherwise")
parser.set_defaults(evaluate=True)
parser.add_argument("--masking", dest='masking',action='store_true', help = "for 'bio' data")
parser.add_argument("--no-masking", dest='masking',action='store_false', help = "for 'raw' data")
parser.add_argument("--class_group", default='medical', required=True, help = "choice of domain of occupations ('trial','medical')")
parser.add_argument("--sampling", required=True, help = "choice of sampling ('random', 'balanced')")
parser.add_argument("--embedding", required=True, help = "choice of embeddings to be used ('cv': count_vectorize(self-trained), 'w2v': word2vec_embedding(pre-trained), 'self_w2v':w2v(self-trained))")
parser.add_argument("--model", default= 'svm', required=True, help = "choice of models to be trained ('svm', 'rf', 'nn')")
parser.add_argument("--test_size", default = 0.2, required=True, help = "proportion of data to be used for tesing")
parser.add_argument("--masking", required=True, help = "True for 'bio' data and False for 'raw' data")
# Read arguments from command line
args = parser.parse_args()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment