Skip to content
Snippets Groups Projects

Plotting Coscine User Activity

  • Clone with SSH
  • Clone with HTTPS
  • Embed
  • Share
    The snippet can be accessed without any authentication.
    Authored by Benedikt Heinrichs
    Edited
    plotActivity.py 3.17 KiB
    from git import Repo
    import os
    import json
    import matplotlib.pyplot as plt
    from datetime import datetime, timedelta
    import pandas as pd
    
    # Parameters
    REPO_URL = "https://git.rwth-aachen.de/coscine/reporting/reporting-database.git"
    LOCAL_REPO_PATH = "local_repo"
    FILE_PATH = "General/users.json"
    
    # Clone the Git repository if it's not already cloned
    if not os.path.isdir(LOCAL_REPO_PATH):
        Repo.clone_from(REPO_URL, LOCAL_REPO_PATH)
    
    # Open the local repository
    repo = Repo(LOCAL_REPO_PATH)
    
    # Fetch all commits for the file
    commits = list(repo.iter_commits(paths=FILE_PATH))
    
    # Helper function to get the previous day's date as a string
    def get_previous_day(date_str):
        date = datetime.strptime(date_str, '%Y-%m-%d')
        prev_day = date - timedelta(days=1)
        return prev_day.strftime('%Y-%m-%d')
    
    # Data structure to hold commit date and count of matching objects
    date_counts = []
    
    commitCount = len(commits)
    iteration = 0
    
    # Iterate over each commit
    for commit in commits:
        # Checkout the commit
        repo.git.checkout(commit)
    
        # Read the users.json file from this commit
        try:
            with open(os.path.join(LOCAL_REPO_PATH, FILE_PATH), 'r') as f:
                users = json.load(f)
    
            # Extract the commit date (YYYY-MM-DD)
            commit_date = datetime.utcfromtimestamp(commit.committed_date).strftime('%Y-%m-%d')
            prev_day = get_previous_day(commit_date)
    
            # Count the number of users with LatestActivity matching the commit date or the day before
            count = sum(1 for user in users if user.get("LatestActivity") and (
                user["LatestActivity"].startswith(commit_date) or 
                user["LatestActivity"].startswith(prev_day)
            ))
    
            # Store the date and count
            date_counts.append((commit_date, count))
    
            iteration += 1
            print(f"{iteration}/{commitCount} Commits")
    
        except FileNotFoundError:
            print(f"The file {FILE_PATH} does not exist in commit {commit.hexsha}")
        except json.JSONDecodeError:
            print(f"JSON Decode Error for the file in commit {commit.hexsha}")
    
    # Checkout the main branch after the operation
    repo.git.checkout('main')
    
    # Sort the results by date
    date_counts.sort(key=lambda x: datetime.strptime(x[0], '%Y-%m-%d'))
    
    # Plotting the results
    dates, counts = zip(*date_counts)  # Unzip the date-count pairs
    dates = [datetime.strptime(date, '%Y-%m-%d') for date in dates]  # Convert strings to datetime
    
    df = pd.DataFrame(date_counts, columns=['Commit Date', 'Matching Activity Count'])
    csv_file_path = 'activity_counts.csv'
    df.to_csv(csv_file_path, index=False)
    print(f"The data has been saved to {csv_file_path}")
    
    # Plot
    plt.figure(figsize=(10, 5))
    plt.plot(dates, counts, marker='o')
    plt.xlabel('Date of Commit')
    plt.ylabel('Number of Matching "LatestActivity" Objects')
    plt.title('Activity Counts Per Commit Date and the Day Before')
    plt.xticks(rotation=45)
    plt.tight_layout()  # Adjust plot to ensure everything fits without overlapping
    plt.style.use('ggplot')  # Use the 'ggplot' style for a fancier plot
    plt.grid(True)
    plt.legend(['Activity Count'])
    # Save the plot to a file
    image_file_path = 'activity_plot.png'
    plt.savefig(image_file_path)
    print(f"The plot has been saved to {image_file_path}")
    plt.show()
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Please register or to comment