Skip to content
Snippets Groups Projects

fixed kg_maker

Merged Jamal Rnjbal requested to merge 20-fix-group-meta-data into main
1 file
+ 23
19
Compare changes
  • Side-by-side
  • Inline
+ 23
19
@@ -5,6 +5,7 @@ import networkx as nx
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
class KgMaker:
@@ -56,23 +57,25 @@ class KgMaker:
def group_meta_data(self)->pd.DataFrame:
"""Groups meta data by activity
Returns:
pd.DataFrame
returns: Dataframe
"""
columns_all=self.df.columns
meta_data=[]
for col in columns_all:
if col not in ["timestamp","case id","activity"]:
meta_data.append(col)
columns_all=["activity"] + meta_data
df_new=self.df[columns_all].copy()
aggregated_df_first = df_new.groupby("activity").first().reset_index()
return aggregated_df_first
@staticmethod
def df_to_kg(attributes_df, graph_df) -> nx:
timestamp = self.main_columns_dict["timestamp"]
activity= self.main_columns_dict["activity"]
case_id = self.main_columns_dict["case_id"]
def custom_agg(x):
if pd.api.types.is_numeric_dtype(x):
return x.mean()
elif pd.api.types.is_string_dtype(x):
most_common = Counter(x).most_common(3)
return [item for item, count in most_common]
else:
return list(x)
grouped_df = self.df.drop(columns=[timestamp,case_id]).groupby(activity).agg(custom_agg).reset_index()
return grouped_df
def df_to_kg(self,attributes_df, graph_df) -> nx:
"""
Create a knowledge graph from given dataframes and return the graph object.
@@ -83,13 +86,14 @@ class KgMaker:
Returns:
- G: NetworkX graph object
"""
activity= self.main_columns_dict["activity"]
G = nx.DiGraph()
for index, row in attributes_df.iterrows():
activity = row['Activity']
attributes = row.drop('Activity').to_dict()
G.add_node(activity, **attributes)
activity_temp = row[activity]
attributes = row.drop(activity).to_dict()
G.add_node(activity_temp, **attributes)
for index, row in graph_df.iterrows():
source = row['Source']
Loading