Skip to content
Snippets Groups Projects
Commit 45fa240f authored by Ann-Kathrin Margarete Edrich's avatar Ann-Kathrin Margarete Edrich
Browse files

Fix spelling mistake

parent 698fa096
No related branches found
No related tags found
No related merge requests found
Pipeline #1620092 passed
......@@ -213,14 +213,14 @@ class comparison_training_prediction_dataset:
columns = self.pred.columns
# Regular expression to match "<feature>_<value>_encoded"
pattern = re.compile(r"^(.*?)(_?\d+)?_encoded$")
pattern = re.compile(r"^(.*?)(_?\d+)?_encode$")
encoded_features = {pattern.match(col).group(1) for col in columns if pattern.match(col)}
self.logger.info('Identified encoded features: ' + str(encoded_features))
count = 0
for feature in encoded_features:
feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encoded")]
feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encode")]
all_zero_rows = (self.pred[feature_cols] == 0).all(axis=1)
all_zero_rows = self.pred.index[all_zero_rows].tolist()
self.idx = list(set(self.idx + all_zero_rows))
......@@ -235,6 +235,9 @@ class comparison_training_prediction_dataset:
"""
self.pred = pd.concat([self.xy, self.pred], axis=1)
self.logger.info('Features in the prediction dataset: ' + str(self.pred.columns.tolist()))
pred = self.pred.to_numpy()
char_features = features_to_char(self.pred.columns)
......@@ -272,7 +275,7 @@ class comparison_training_prediction_dataset:
os.remove(outfile)
self.train = pd.concat([self.xy_train, self.train], axis=1)
self.logger.info('Features in the training dataset: ' + str(self.train.columns.tolist()))
# Save dataframe as csv
self.train.to_csv(outfile, sep=',', index=False)
self.logger.info('Training dataset saved')
......
......@@ -193,14 +193,14 @@ class comparison_training_prediction_dataset:
columns = self.pred.columns
# Regular expression to match "<feature>_<value>_encoded"
pattern = re.compile(r"^(.*?)(_?\d+)?_encoded$")
pattern = re.compile(r"^(.*?)(_?\d+)?_encode$")
encoded_features = {pattern.match(col).group(1) for col in columns if pattern.match(col)}
print(encoded_features)
self.logger.info('Identified encoded features: ' + str(encoded_features))
count = 0
for feature in encoded_features:
feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encoded")]
feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encode")]
all_zero_rows = (self.pred[feature_cols] == 0).all(axis=1)
all_zero_rows = self.pred.index[all_zero_rows].tolist()
self.idx = list(set(self.idx + all_zero_rows))
......@@ -215,6 +215,7 @@ class comparison_training_prediction_dataset:
"""
self.pred = pd.concat([self.xy, self.pred], axis=1)
self.logger.info('Features in the prediction dataset: ' + str(self.pred.columns.tolist()))
pred = self.pred.to_numpy()
char_features = features_to_char(self.pred.columns)
......@@ -252,6 +253,7 @@ class comparison_training_prediction_dataset:
os.remove(outfile)
self.train = pd.concat([self.xy_train, self.train], axis=1)
self.logger.info('Features in the training dataset: ' + str(self.train.columns.tolist()))
# Save dataframe as csv
self.train.to_csv(outfile, sep=',', index=False)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment