Skip to content
Snippets Groups Projects
Commit 075a1733 authored by Rawel's avatar Rawel
Browse files

cleaned up train_model

parent 14e7c6ae
Branches
No related tags found
No related merge requests found
......@@ -24,6 +24,7 @@ class Svm:
self.preprocessing = load(f"Models/{model_name}_preprocessing.joblib")
def save_model(self, model_name):
os.makedirs("Models", exist_ok=True)
dump(self.model, f"Models/{model_name}.joblib")
dump(self.preprocessing, f"Models/{model_name}_preprocessing.joblib")
print(f"Saved model to {model_name}.joblib! Parameters:\n\n{str(self.model)}")
......@@ -108,22 +109,9 @@ class Svm:
def train_model(self, vcc_feature_vectors_and_weights, unclassified_feature_vectors, fs=303, c=1, weight=1):
labels = []
weights = []
"""
vcc_feature_vectors = [x[0] for x in vcc_feature_vectors_and_weigts]
fit_vectors = [x.tocsc()[0, :303] for x in vcc_feature_vectors+unclassified_feature_vectors]
# preprocess fit
self.preprocess_fit(fit_vectors)
#dump(self.preprocessing, "Models/preprocessing.joblib")
"""
# self.preprocessing = load("Models/preprocessing.joblib")
"""
feature_vectors = self.preprocess(vcc_feature_vectors[0])
for feature_vector in vcc_feature_vectors[1:]+unclassified_feature_vectors:
feature_vectors = vstack((feature_vectors, self.preprocess(feature_vector)))
#dump(feature_vectors, "feature_vectors.joblib")
"""
feature_vectors = load("feature_vectors.joblib")
vcc_feature_vectors = [x[0] for x in vcc_feature_vectors_and_weights]
self.prepare_preprocessing(vcc_feature_vectors_and_weights, unclassified_feature_vectors)
feature_vectors = self.get_feature_vectors(vcc_feature_vectors, unclassified_feature_vectors)
for i, vector in enumerate(vcc_feature_vectors_and_weights):
labels.append(1)
......@@ -134,32 +122,39 @@ class Svm:
print("fitting...")
self.model = LinearSVC(C=c, max_iter=100000000)
# feature selection using k best
"""
print("feature selection...")
self.kbest = SelectKBest(chi2, k=int(fs))
feature_vectors_best = self.kbest.fit_transform(feature_vectors_scaled, labels)
self.model.fit(feature_vectors_best, labels, weights)
print("Done")
# feature selection using select from model
self.feature_select = SelectFromModel(self.model, threshold=float(fs))
self.feature_select.fit(feature_vectors_scaled, labels)
feature_selected_vectors = self.feature_select.transform(feature_vectors_scaled)
print(feature_selected_vectors.shape)
"""
"""
self.model.fit(feature_vectors, labels, weights)
print("Score:", str(self.model.score(feature_vectors, labels)))
print("Done")
self.confidences = {}
"""
k = 5
scores = cross_val_score(self.model, feature_vectors, labels, cv=k)
print(scores)
print("Average score:", str(sum(scores) / k))
def get_feature_vectors(self, vcc_feature_vectors, unclassified_feature_vectors, feature_vector_path=None):
if feature_vector_path is not None:
return load(feature_vector_path)
feature_vectors = self.preprocess(vcc_feature_vectors[0])
for feature_vector in vcc_feature_vectors[1:] + unclassified_feature_vectors:
feature_vectors = vstack((feature_vectors, self.preprocess(feature_vector)))
os.makedirs("Vectors", exist_ok=True)
dump(feature_vectors, "Vectors/feature_vectors.joblib")
return feature_vectors
def prepare_preprocessing(self, vcc_feature_vectors, unclassified_feature_vectors, preprocessing_path=None):
if preprocessing_path is not None:
self.preprocessing = load(preprocessing_path)
return
vcc_feature_vectors = [x[0] for x in vcc_feature_vectors]
fit_vectors = [x.tocsc()[0, :303] for x in vcc_feature_vectors + unclassified_feature_vectors]
# preprocess fit
self.preprocess_fit(fit_vectors)
os.makedirs("Models", exist_ok=True)
dump(self.preprocessing, "Models/preprocessing.joblib")
def preprocess_fit(self, vectors):
fit_vecs = vectors[0]
for vec in vectors:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment