File size: 7,004 Bytes
d129378 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import streamlit as st
from huggingface_hub import HfApi
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
st.set_page_config(page_title="HF Contributions", layout="wide")
api = HfApi()
# Function to fetch commits for a repository (optimized)
def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
try:
# Skip private/gated repos upfront
repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
return []
commits = api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)
commit_dates = [
pd.to_datetime(commit.created_at).tz_localize(None).date()
for commit in commits
if any(
(isinstance(author, str) and author.lower() == username.lower()) or
(isinstance(author, dict) and "user" in author and author["user"].lower() == username.lower())
for author in commit.authors
) and pd.to_datetime(commit.created_at).year == selected_year
]
return commit_dates
except Exception:
return [] # Silently skip inaccessible or errored repos
# Function to get commit events for a user
def get_commit_events(username, kind=None, selected_year=None):
commit_dates = []
items_with_type = []
kinds = [kind] if kind else ["model", "dataset", "space"]
for k in kinds:
try:
if k == "model":
items = list(api.list_models(author=username))
elif k == "dataset":
items = list(api.list_datasets(author=username))
elif k == "space":
items = list(api.list_spaces(author=username))
else:
items = []
items_with_type.extend((item, k) for item in items)
repo_ids = [item.id for item in items]
# Parallel fetch commits
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_repo = {
executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
for repo_id in repo_ids
}
for future in as_completed(future_to_repo):
commit_dates.extend(future.result())
except Exception as e:
st.warning(f"Error fetching {k}s for {username}: {str(e)}")
return pd.DataFrame(commit_dates, columns=["date"]), items_with_type
# Calendar heatmap function
def make_calendar_heatmap(df, title, year, color_palette="Greens"):
if df.empty:
st.info(f"No {title.lower()} found for {year}.")
return
df["count"] = 1
df = df.groupby("date").sum().reset_index()
df["date"] = pd.to_datetime(df["date"])
start = pd.Timestamp(f"{year}-01-01")
end = pd.Timestamp(f"{year}-12-31")
all_days = pd.date_range(start=start, end=end)
heatmap_data = pd.DataFrame(index=all_days).assign(count=0)
heatmap_data.loc[df.set_index("date").index, "count"] = df.set_index("date")["count"]
heatmap_data["dow"] = heatmap_data.index.dayofweek
heatmap_data["week"] = ((heatmap_data.index - start).days // 7)
heatmap_data = heatmap_data.reset_index().rename(columns={"index": "date"})
pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
month_labels = [d.strftime("%b") for d in pd.date_range(start, end, freq="MS")]
month_positions = [((pd.Timestamp(f"{year}-{i + 1}-01") - start).days // 7) for i in range(12)]
fig, ax = plt.subplots(figsize=(12, 1.2))
sns.heatmap(pivot, ax=ax, cmap=color_palette, linewidths=0.5, linecolor="white", square=True, cbar=False,
yticklabels=["M", "T", "W", "T", "F", "S", "S"])
ax.set_title(f"{title} ({year})", fontsize=12, pad=10)
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_xticks(month_positions)
ax.set_xticklabels(month_labels, fontsize=8)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
st.pyplot(fig)
# Sidebar
with st.sidebar:
st.title("π€ Contributor")
username = st.selectbox(
"Select or type a username",
options=["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"],
index=0
)
st.markdown("<div style='text-align: center; margin: 10px 0;'>OR</div>", unsafe_allow_html=True)
custom = st.text_input("", placeholder="Enter custom username/org")
if custom.strip():
username = custom.strip()
year_options = list(range(datetime.now().year, 2017, -1))
selected_year = st.selectbox("ποΈ Year", options=year_options)
# Main Content
st.title("π€ Hugging Face Contributions")
if username:
with st.spinner("Fetching commit data..."):
all_df, all_items = get_commit_events(username, selected_year=selected_year)
st.subheader(f"{username}'s Activity in {selected_year}")
st.metric("Total Commits", len(all_df))
make_calendar_heatmap(all_df, "All Commits", selected_year)
# Updated Color Scheme Legend with five shades
st.markdown("""
<div style='text-align: center; margin-top: -10px; margin-bottom: 20px;'>
<span style='font-size: 12px; margin-right: 10px;'>Less</span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #f0f7f0; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #c6e0c6; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #77b577; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #2e6b2e; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #1a3c1a; border: 1px solid #ccc;'></span>
<span style='font-size: 12px; margin-left: 10px;'>More</span>
</div>
""", unsafe_allow_html=True)
# Metrics and heatmaps for each type
col1, col2, col3 = st.columns(3)
for col, kind, emoji, label in [
(col1, "model", "π§ ", "Models"),
(col2, "dataset", "π¦", "Datasets"),
(col3, "space", "π", "Spaces")
]:
with col:
df_kind, _ = get_commit_events(username, kind=kind, selected_year=selected_year)
try:
total = len(list(getattr(api, f"list_{kind}s")(author=username)))
except Exception:
total = 0
st.metric(f"{emoji} {label}", total)
st.metric(f"Commits in {selected_year}", len(df_kind))
make_calendar_heatmap(df_kind, f"{label} Commits", selected_year) |