from git import Repo
import os
import json
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pandas as pd

# Parameters
REPO_URL = "https://git.rwth-aachen.de/coscine/reporting/reporting-database.git"
LOCAL_REPO_PATH = "local_repo"
FILE_PATH = "General/users.json"

# Clone the Git repository if it's not already cloned
if not os.path.isdir(LOCAL_REPO_PATH):
    Repo.clone_from(REPO_URL, LOCAL_REPO_PATH)

# Open the local repository
repo = Repo(LOCAL_REPO_PATH)

# Fetch all commits for the file
commits = list(repo.iter_commits(paths=FILE_PATH))

# Helper function to get the previous day's date as a string
def get_previous_day(date_str):
    date = datetime.strptime(date_str, '%Y-%m-%d')
    prev_day = date - timedelta(days=1)
    return prev_day.strftime('%Y-%m-%d')

# Data structure to hold commit date and count of matching objects
date_counts = []

commitCount = len(commits)
iteration = 0

# Iterate over each commit
for commit in commits:
    # Checkout the commit
    repo.git.checkout(commit)

    # Read the users.json file from this commit
    try:
        with open(os.path.join(LOCAL_REPO_PATH, FILE_PATH), 'r') as f:
            users = json.load(f)

        # Extract the commit date (YYYY-MM-DD)
        commit_date = datetime.utcfromtimestamp(commit.committed_date).strftime('%Y-%m-%d')
        prev_day = get_previous_day(commit_date)

        # Count the number of users with LatestActivity matching the commit date or the day before
        count = sum(1 for user in users if user.get("LatestActivity") and (
            user["LatestActivity"].startswith(commit_date) or 
            user["LatestActivity"].startswith(prev_day)
        ))

        # Store the date and count
        date_counts.append((commit_date, count))

        iteration += 1
        print(f"{iteration}/{commitCount} Commits")

    except FileNotFoundError:
        print(f"The file {FILE_PATH} does not exist in commit {commit.hexsha}")
    except json.JSONDecodeError:
        print(f"JSON Decode Error for the file in commit {commit.hexsha}")

# Checkout the main branch after the operation
repo.git.checkout('main')

# Sort the results by date
date_counts.sort(key=lambda x: datetime.strptime(x[0], '%Y-%m-%d'))

# Plotting the results
dates, counts = zip(*date_counts)  # Unzip the date-count pairs
dates = [datetime.strptime(date, '%Y-%m-%d') for date in dates]  # Convert strings to datetime

df = pd.DataFrame(date_counts, columns=['Commit Date', 'Matching Activity Count'])
csv_file_path = 'activity_counts.csv'
df.to_csv(csv_file_path, index=False)
print(f"The data has been saved to {csv_file_path}")

# Plot
plt.figure(figsize=(10, 5))
plt.plot(dates, counts, marker='o')
plt.xlabel('Date of Commit')
plt.ylabel('Number of Matching "LatestActivity" Objects')
plt.title('Activity Counts Per Commit Date and the Day Before')
plt.xticks(rotation=45)
plt.tight_layout()  # Adjust plot to ensure everything fits without overlapping
plt.style.use('ggplot')  # Use the 'ggplot' style for a fancier plot
plt.grid(True)
plt.legend(['Activity Count'])
# Save the plot to a file
image_file_path = 'activity_plot.png'
plt.savefig(image_file_path)
print(f"The plot has been saved to {image_file_path}")
plt.show()