Skip to content
Snippets Groups Projects

Resolve "Frequency and average time Calculator"

Merged Jamal Rnjbal requested to merge 14-frequency-and-average-time-calculator into main
2 files
+ 122
8
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 39
4
import pandas as pd
from loguru import logger
from collections import defaultdict
class KgMaker:
def __init__(self, df: pd.DataFrame, main_columns_dict: dict) -> None:
self.df = df
self.main_columns_dict = main_columns_dict
def calculate_metrics(self):
timestamp = self.main_columns_dict["timestamp"]
activity= self.main_columns_dict["activity"]
case_id = self.main_columns_dict["case_id"]
self.df = self.df.sort_values(by=[case_id, timestamp])
transitions = defaultdict(int)
times = defaultdict(list)
grouped = self.df.groupby(case_id)
for case_id, group in grouped:
activities = list(group[activity])
timestamps = list(group[timestamp])
for i in range(len(activities) - 1):
transition = (activities[i], activities[i + 1])
transitions[transition] += 1
time_diff = (timestamps[i + 1] - timestamps[i]).total_seconds() / 3600 # Convert to hours
times[transition].append(time_diff)
average_times = {transition: sum(times_list) / len(times_list) for transition, times_list in times.items()}
transitions_df = pd.DataFrame(transitions.items(), columns=['Transition', 'Frequency'])
average_times_df = pd.DataFrame(average_times.items(), columns=['Transition', 'Average Time'])
transitions_df[['Source', 'Target']] = pd.DataFrame(transitions_df['Transition'].tolist(), index=transitions_df.index)
average_times_df[['Source', 'Target']] = pd.DataFrame(average_times_df['Transition'].tolist(), index=average_times_df.index)
average_times_df = average_times_df.drop(columns=['Transition'])
transitions_df = transitions_df.drop(columns=['Transition'])
transitions_df = transitions_df.merge(average_times_df, on=['Source', 'Target'], how='left')
return transitions_df
def group_meta_data(self)->pd.DataFrame:
columns_all=self.df.columns
meta_data=[]
@@ -15,8 +52,6 @@ class KgMaker:
columns_all=["activity"] + meta_data
df_new=self.df[columns_all].copy()
aggregated_df_first = df_new.groupby("activity").first().reset_index()
return aggregated_df_first
\ No newline at end of file
Loading