Commit 6b13c8eb authored by Sparsh Jauhari's avatar Sparsh Jauhari 💬
Browse files

Added elmo tranformation

parent 9a8f02c2
import nltk
# from nltk.corpus import stopwords
import tensorflow as tf
from nltk.stem.snowball import SnowballStemmer
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
......@@ -8,7 +9,14 @@ from config import WORD2VEC_PATH, DATASET_NAMES, MASKED
from gensim.models import KeyedVectors, Word2Vec
from joblib import dump, load
import numpy as np
import pandas as pd
import tensorflow_hub as hub
from config import CLASS_GROUP
from sampling import get_data_from_mongo
from sklearn.svm import SVC
from config import DATASET_NAMES, MASKED, SEED
import numpy as np
......@@ -69,7 +77,22 @@ def count_vectorize_fit_transform(x_list):
def count_vectorize_transform(vectorizer_binary,x_list):
X = vectorizer_binary.transform(x_list)
return X
def elmo_transform(x_list):
elmo = hub.load("")
embeddings_list = []
vector = np.vectorize(np.float)
for sent in x_list:
sent = preProcessAndTokenize(sent)
sent = ' '.join(sent)
#print('\nSENTENCE Length:', len(sent))
#print('\nSENTENCE:', sent)
embeddings = elmo.signatures["default"](tf.constant([sent]))
#embeddings_list.append(vector(tf.reshape(embeddings['word_emb'], [-1]).numpy()))
#elmo(sent, signature="default", as_dict=True)["elmo"]
#yup =tf.keras.backend.eval(embeddings)['word_emb']
return (embeddings_list)
class MeanEmbeddingVectorizer(object):
def __init__(self, word2vec):
......@@ -121,6 +144,26 @@ def selftrained_word2vec_fit_transform(x_list):
return model , X
data = get_data_from_mongo('trial')
data = pd.DataFrame(data)
s= data['bio'][0]
s2 = data['bio'][1]
X_train = elmo_transform(y)
classifier = SVC(C=1, kernel = 'linear', gamma = 'auto', class_weight=None)
#X_train = tf.reshape(X_train, [1])
print("first instance of X_train\n", type(X_train[0]),X_train[0].shape, X_train[0].ndim, X_train[0].size)
print(X_train), ['1','0'])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment