Plotting Coscine User Activity
The snippet can be accessed without any authentication.
Authored by
Benedikt Heinrichs
Edited
plotActivity.py 3.17 KiB
from git import Repo
import os
import json
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pandas as pd
# Parameters
REPO_URL = "https://git.rwth-aachen.de/coscine/reporting/reporting-database.git"
LOCAL_REPO_PATH = "local_repo"
FILE_PATH = "General/users.json"
# Clone the Git repository if it's not already cloned
if not os.path.isdir(LOCAL_REPO_PATH):
Repo.clone_from(REPO_URL, LOCAL_REPO_PATH)
# Open the local repository
repo = Repo(LOCAL_REPO_PATH)
# Fetch all commits for the file
commits = list(repo.iter_commits(paths=FILE_PATH))
# Helper function to get the previous day's date as a string
def get_previous_day(date_str):
date = datetime.strptime(date_str, '%Y-%m-%d')
prev_day = date - timedelta(days=1)
return prev_day.strftime('%Y-%m-%d')
# Data structure to hold commit date and count of matching objects
date_counts = []
commitCount = len(commits)
iteration = 0
# Iterate over each commit
for commit in commits:
# Checkout the commit
repo.git.checkout(commit)
# Read the users.json file from this commit
try:
with open(os.path.join(LOCAL_REPO_PATH, FILE_PATH), 'r') as f:
users = json.load(f)
# Extract the commit date (YYYY-MM-DD)
commit_date = datetime.utcfromtimestamp(commit.committed_date).strftime('%Y-%m-%d')
prev_day = get_previous_day(commit_date)
# Count the number of users with LatestActivity matching the commit date or the day before
count = sum(1 for user in users if user.get("LatestActivity") and (
user["LatestActivity"].startswith(commit_date) or
user["LatestActivity"].startswith(prev_day)
))
# Store the date and count
date_counts.append((commit_date, count))
iteration += 1
print(f"{iteration}/{commitCount} Commits")
except FileNotFoundError:
print(f"The file {FILE_PATH} does not exist in commit {commit.hexsha}")
except json.JSONDecodeError:
print(f"JSON Decode Error for the file in commit {commit.hexsha}")
# Checkout the main branch after the operation
repo.git.checkout('main')
# Sort the results by date
date_counts.sort(key=lambda x: datetime.strptime(x[0], '%Y-%m-%d'))
# Plotting the results
dates, counts = zip(*date_counts) # Unzip the date-count pairs
dates = [datetime.strptime(date, '%Y-%m-%d') for date in dates] # Convert strings to datetime
df = pd.DataFrame(date_counts, columns=['Commit Date', 'Matching Activity Count'])
csv_file_path = 'activity_counts.csv'
df.to_csv(csv_file_path, index=False)
print(f"The data has been saved to {csv_file_path}")
# Plot
plt.figure(figsize=(10, 5))
plt.plot(dates, counts, marker='o')
plt.xlabel('Date of Commit')
plt.ylabel('Number of Matching "LatestActivity" Objects')
plt.title('Activity Counts Per Commit Date and the Day Before')
plt.xticks(rotation=45)
plt.tight_layout() # Adjust plot to ensure everything fits without overlapping
plt.style.use('ggplot') # Use the 'ggplot' style for a fancier plot
plt.grid(True)
plt.legend(['Activity Count'])
# Save the plot to a file
image_file_path = 'activity_plot.png'
plt.savefig(image_file_path)
print(f"The plot has been saved to {image_file_path}")
plt.show()
Please register or sign in to comment