Skip to content
Snippets Groups Projects

fixed kg_maker

Merged Jamal Rnjbal requested to merge 20-fix-group-meta-data into main
1 file
+ 23
19
Compare changes
  • Side-by-side
  • Inline
+ 23
19
@@ -5,6 +5,7 @@ import networkx as nx
@@ -5,6 +5,7 @@ import networkx as nx
import pandas as pd
import pandas as pd
import networkx as nx
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
 
from collections import Counter
class KgMaker:
class KgMaker:
@@ -56,23 +57,25 @@ class KgMaker:
@@ -56,23 +57,25 @@ class KgMaker:
def group_meta_data(self)->pd.DataFrame:
def group_meta_data(self)->pd.DataFrame:
"""Groups meta data by activity
"""Groups meta data by activity
returns: Dataframe
Returns:
pd.DataFrame
"""
"""
columns_all=self.df.columns
timestamp = self.main_columns_dict["timestamp"]
meta_data=[]
activity= self.main_columns_dict["activity"]
for col in columns_all:
case_id = self.main_columns_dict["case_id"]
if col not in ["timestamp","case id","activity"]:
meta_data.append(col)
def custom_agg(x):
if pd.api.types.is_numeric_dtype(x):
columns_all=["activity"] + meta_data
return x.mean()
df_new=self.df[columns_all].copy()
elif pd.api.types.is_string_dtype(x):
aggregated_df_first = df_new.groupby("activity").first().reset_index()
most_common = Counter(x).most_common(3)
return [item for item, count in most_common]
return aggregated_df_first
else:
@staticmethod
return list(x)
def df_to_kg(attributes_df, graph_df) -> nx:
 
grouped_df = self.df.drop(columns=[timestamp,case_id]).groupby(activity).agg(custom_agg).reset_index()
 
return grouped_df
 
 
def df_to_kg(self,attributes_df, graph_df) -> nx:
"""
"""
Create a knowledge graph from given dataframes and return the graph object.
Create a knowledge graph from given dataframes and return the graph object.
@@ -83,13 +86,14 @@ class KgMaker:
@@ -83,13 +86,14 @@ class KgMaker:
Returns:
Returns:
- G: NetworkX graph object
- G: NetworkX graph object
"""
"""
 
activity= self.main_columns_dict["activity"]
G = nx.DiGraph()
G = nx.DiGraph()
for index, row in attributes_df.iterrows():
for index, row in attributes_df.iterrows():
activity = row['Activity']
activity_temp = row[activity]
attributes = row.drop('Activity').to_dict()
attributes = row.drop(activity).to_dict()
G.add_node(activity, **attributes)
G.add_node(activity_temp, **attributes)
for index, row in graph_df.iterrows():
for index, row in graph_df.iterrows():
source = row['Source']
source = row['Source']
Loading