File size: 7,004 Bytes
d129378
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
from huggingface_hub import HfApi
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

st.set_page_config(page_title="HF Contributions", layout="wide")
api = HfApi()

# Function to fetch commits for a repository (optimized)
def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
    try:
        # Skip private/gated repos upfront
        repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
        if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
            return []

        commits = api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)
        commit_dates = [
            pd.to_datetime(commit.created_at).tz_localize(None).date()
            for commit in commits
            if any(
                (isinstance(author, str) and author.lower() == username.lower()) or
                (isinstance(author, dict) and "user" in author and author["user"].lower() == username.lower())
                for author in commit.authors
            ) and pd.to_datetime(commit.created_at).year == selected_year
        ]
        return commit_dates
    except Exception:
        return []  # Silently skip inaccessible or errored repos

# Function to get commit events for a user
def get_commit_events(username, kind=None, selected_year=None):
    commit_dates = []
    items_with_type = []
    kinds = [kind] if kind else ["model", "dataset", "space"]

    for k in kinds:
        try:
            if k == "model":
                items = list(api.list_models(author=username))
            elif k == "dataset":
                items = list(api.list_datasets(author=username))
            elif k == "space":
                items = list(api.list_spaces(author=username))
            else:
                items = []

            items_with_type.extend((item, k) for item in items)
            repo_ids = [item.id for item in items]

            # Parallel fetch commits
            with ThreadPoolExecutor(max_workers=10) as executor:
                future_to_repo = {
                    executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
                    for repo_id in repo_ids
                }
                for future in as_completed(future_to_repo):
                    commit_dates.extend(future.result())
        except Exception as e:
            st.warning(f"Error fetching {k}s for {username}: {str(e)}")

    return pd.DataFrame(commit_dates, columns=["date"]), items_with_type

# Calendar heatmap function
def make_calendar_heatmap(df, title, year, color_palette="Greens"):
    if df.empty:
        st.info(f"No {title.lower()} found for {year}.")
        return
    df["count"] = 1
    df = df.groupby("date").sum().reset_index()
    df["date"] = pd.to_datetime(df["date"])
    start = pd.Timestamp(f"{year}-01-01")
    end = pd.Timestamp(f"{year}-12-31")
    all_days = pd.date_range(start=start, end=end)
    heatmap_data = pd.DataFrame(index=all_days).assign(count=0)
    heatmap_data.loc[df.set_index("date").index, "count"] = df.set_index("date")["count"]
    heatmap_data["dow"] = heatmap_data.index.dayofweek
    heatmap_data["week"] = ((heatmap_data.index - start).days // 7)
    heatmap_data = heatmap_data.reset_index().rename(columns={"index": "date"})
    pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
    month_labels = [d.strftime("%b") for d in pd.date_range(start, end, freq="MS")]
    month_positions = [((pd.Timestamp(f"{year}-{i + 1}-01") - start).days // 7) for i in range(12)]
    fig, ax = plt.subplots(figsize=(12, 1.2))
    sns.heatmap(pivot, ax=ax, cmap=color_palette, linewidths=0.5, linecolor="white", square=True, cbar=False,
                yticklabels=["M", "T", "W", "T", "F", "S", "S"])
    ax.set_title(f"{title} ({year})", fontsize=12, pad=10)
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_xticks(month_positions)
    ax.set_xticklabels(month_labels, fontsize=8)
    ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
    st.pyplot(fig)

# Sidebar
with st.sidebar:
    st.title("πŸ‘€ Contributor")
    username = st.selectbox(
        "Select or type a username",
        options=["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"],
        index=0
    )
    st.markdown("<div style='text-align: center; margin: 10px 0;'>OR</div>", unsafe_allow_html=True)
    custom = st.text_input("", placeholder="Enter custom username/org")
    if custom.strip():
        username = custom.strip()
    year_options = list(range(datetime.now().year, 2017, -1))
    selected_year = st.selectbox("πŸ—“οΈ Year", options=year_options)

# Main Content
st.title("πŸ€— Hugging Face Contributions")
if username:
    with st.spinner("Fetching commit data..."):
        all_df, all_items = get_commit_events(username, selected_year=selected_year)
        st.subheader(f"{username}'s Activity in {selected_year}")
        st.metric("Total Commits", len(all_df))
        make_calendar_heatmap(all_df, "All Commits", selected_year)

        # Updated Color Scheme Legend with five shades
        st.markdown("""
        <div style='text-align: center; margin-top: -10px; margin-bottom: 20px;'>
            <span style='font-size: 12px; margin-right: 10px;'>Less</span>
            <span style='display: inline-block; width: 15px; height: 15px; background-color: #f0f7f0; border: 1px solid #ccc;'></span>
            <span style='display: inline-block; width: 15px; height: 15px; background-color: #c6e0c6; border: 1px solid #ccc;'></span>
            <span style='display: inline-block; width: 15px; height: 15px; background-color: #77b577; border: 1px solid #ccc;'></span>
            <span style='display: inline-block; width: 15px; height: 15px; background-color: #2e6b2e; border: 1px solid #ccc;'></span>
            <span style='display: inline-block; width: 15px; height: 15px; background-color: #1a3c1a; border: 1px solid #ccc;'></span>
            <span style='font-size: 12px; margin-left: 10px;'>More</span>
        </div>
        """, unsafe_allow_html=True)

        # Metrics and heatmaps for each type
        col1, col2, col3 = st.columns(3)
        for col, kind, emoji, label in [
            (col1, "model", "🧠", "Models"),
            (col2, "dataset", "πŸ“¦", "Datasets"),
            (col3, "space", "πŸš€", "Spaces")
        ]:
            with col:
                df_kind, _ = get_commit_events(username, kind=kind, selected_year=selected_year)
                try:
                    total = len(list(getattr(api, f"list_{kind}s")(author=username)))
                except Exception:
                    total = 0
                st.metric(f"{emoji} {label}", total)
                st.metric(f"Commits in {selected_year}", len(df_kind))
                make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)