from git import Repo import os import json import matplotlib.pyplot as plt from datetime import datetime, timedelta import pandas as pd # Parameters REPO_URL = "https://git.rwth-aachen.de/coscine/reporting/reporting-database.git" LOCAL_REPO_PATH = "local_repo" FILE_PATH = "General/users.json" # Clone the Git repository if it's not already cloned if not os.path.isdir(LOCAL_REPO_PATH): Repo.clone_from(REPO_URL, LOCAL_REPO_PATH) # Open the local repository repo = Repo(LOCAL_REPO_PATH) # Fetch all commits for the file commits = list(repo.iter_commits(paths=FILE_PATH)) # Helper function to get the previous day's date as a string def get_previous_day(date_str): date = datetime.strptime(date_str, '%Y-%m-%d') prev_day = date - timedelta(days=1) return prev_day.strftime('%Y-%m-%d') # Data structure to hold commit date and count of matching objects date_counts = [] commitCount = len(commits) iteration = 0 # Iterate over each commit for commit in commits: # Checkout the commit repo.git.checkout(commit) # Read the users.json file from this commit try: with open(os.path.join(LOCAL_REPO_PATH, FILE_PATH), 'r') as f: users = json.load(f) # Extract the commit date (YYYY-MM-DD) commit_date = datetime.utcfromtimestamp(commit.committed_date).strftime('%Y-%m-%d') prev_day = get_previous_day(commit_date) # Count the number of users with LatestActivity matching the commit date or the day before count = sum(1 for user in users if user.get("LatestActivity") and ( user["LatestActivity"].startswith(commit_date) or user["LatestActivity"].startswith(prev_day) )) # Store the date and count date_counts.append((commit_date, count)) iteration += 1 print(f"{iteration}/{commitCount} Commits") except FileNotFoundError: print(f"The file {FILE_PATH} does not exist in commit {commit.hexsha}") except json.JSONDecodeError: print(f"JSON Decode Error for the file in commit {commit.hexsha}") # Checkout the main branch after the operation repo.git.checkout('main') # Sort the results by date date_counts.sort(key=lambda x: datetime.strptime(x[0], '%Y-%m-%d')) # Plotting the results dates, counts = zip(*date_counts) # Unzip the date-count pairs dates = [datetime.strptime(date, '%Y-%m-%d') for date in dates] # Convert strings to datetime df = pd.DataFrame(date_counts, columns=['Commit Date', 'Matching Activity Count']) csv_file_path = 'activity_counts.csv' df.to_csv(csv_file_path, index=False) print(f"The data has been saved to {csv_file_path}") # Plot plt.figure(figsize=(10, 5)) plt.plot(dates, counts, marker='o') plt.xlabel('Date of Commit') plt.ylabel('Number of Matching "LatestActivity" Objects') plt.title('Activity Counts Per Commit Date and the Day Before') plt.xticks(rotation=45) plt.tight_layout() # Adjust plot to ensure everything fits without overlapping plt.style.use('ggplot') # Use the 'ggplot' style for a fancier plot plt.grid(True) plt.legend(['Activity Count']) # Save the plot to a file image_file_path = 'activity_plot.png' plt.savefig(image_file_path) print(f"The plot has been saved to {image_file_path}") plt.show()