Spaces:
Sleeping
Sleeping
Commit
·
77f290b
1
Parent(s):
cd14e4d
What did I do before then?
Browse files- .env +1 -0
- .gitignore +2 -0
- data/fetch_arxiv.py +84 -0
- data/fetch_miccai.py +60 -0
- data/fetch_nature.py +68 -0
- data/fetch_processed.py +30 -0
- evaluations/__pycache__/documentation.cpython-310.pyc +0 -0
- evaluations/__pycache__/license.cpython-310.pyc +0 -0
- evaluations/__pycache__/repo_evaluations.cpython-310.pyc +0 -0
- evaluations/__pycache__/requirements.cpython-310.pyc +0 -0
- evaluations/__pycache__/training.cpython-310.pyc +0 -0
- evaluations/__pycache__/utils.cpython-310.pyc +0 -0
- evaluations/__pycache__/validating.cpython-310.pyc +0 -0
- evaluations/__pycache__/weights.cpython-310.pyc +0 -0
- evaluations/documentation.py +113 -0
- evaluations/license.py +29 -0
- evaluations/repo_evaluations.py +150 -0
- evaluations/requirements.py +24 -0
- evaluations/training.py +35 -0
- evaluations/utils.py +131 -0
- evaluations/validating.py +36 -0
- evaluations/weights.py +52 -0
- midl.py +10 -0
- plotting/paper_plots.py +89 -0
- plotting/result_plots.py +141 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
githubToken="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"
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
data/*.csv
|
2 |
+
data/*.zip
|
data/fetch_arxiv.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
import pdfplumber
|
4 |
+
import re
|
5 |
+
from multiprocessing import Pool, cpu_count
|
6 |
+
from functools import partial
|
7 |
+
import os
|
8 |
+
# Function to process each URL
|
9 |
+
def process_arxiv_paper(article_link):
|
10 |
+
try:
|
11 |
+
article_text = requests.get(article_link).text
|
12 |
+
title_pattern = r'<h1 class="title mathjax"><span class="descriptor">Title:</span>(.*?)</h1>'
|
13 |
+
title = re.findall(title_pattern, article_text, re.DOTALL)[0]
|
14 |
+
year_pattern = r'\[Submitted on(?:.*?(\d{1,2} \w+ \d{4}))(?:.*?)]'
|
15 |
+
year = re.findall(year_pattern, article_text)[0].split(" ")[-1]
|
16 |
+
|
17 |
+
article_id = article_link.split("/")[-1]
|
18 |
+
pdf_url = f'https://arxiv.org/pdf/{article_id}'
|
19 |
+
response = requests.get(pdf_url)
|
20 |
+
if response.status_code == 200:
|
21 |
+
with open(f"{article_id}.pdf", 'wb') as file:
|
22 |
+
file.write(response.content)
|
23 |
+
if (response.status_code == 404):
|
24 |
+
print("Failed to fetch pdf")
|
25 |
+
return None
|
26 |
+
|
27 |
+
urls = []
|
28 |
+
link_pattern = r'(https?://(?:www\.)?github\.com[^\s]+)'
|
29 |
+
with pdfplumber.open(f"{article_id}.pdf") as pdf:
|
30 |
+
# Loop through all pages
|
31 |
+
for page_num, page in enumerate(pdf.pages):
|
32 |
+
# Extract text from the page
|
33 |
+
text = page.extract_text()
|
34 |
+
|
35 |
+
# Search for a specific word or phrase
|
36 |
+
found_urls = re.findall(link_pattern, text)
|
37 |
+
urls.extend(found_urls)
|
38 |
+
os.remove(f"{article_id}.pdf")
|
39 |
+
urls = [url for url in urls if ("pytorch" not in url) & ("fchollet" not in url) & (len(url.split("github.com")[1].split("/")) >= 3)]
|
40 |
+
print(urls)
|
41 |
+
url = urls[0] if len(urls) > 0 else ""
|
42 |
+
|
43 |
+
# Return a dictionary of the results
|
44 |
+
return {"venue": "arXiv", "title": title, "url": url, "year": year}
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
print(f"Error processing {article_link}: {e}")
|
48 |
+
return None
|
49 |
+
|
50 |
+
# Set debug mode
|
51 |
+
debug = False
|
52 |
+
# Fetch all URLs for each year
|
53 |
+
all_year_urls = []
|
54 |
+
|
55 |
+
page_size = 50
|
56 |
+
search_queries = ['https://arxiv.org/search/advanced?advanced=1&terms-0-operator=AND&terms-0-term=deep+learning&terms-0-field=abstract&terms-1-operator=AND&terms-1-term=cancer&terms-1-field=abstract&classification-physics_archives=all&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date=2018&date-to_date=2024&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first&start=']
|
57 |
+
articles = []
|
58 |
+
for search_query in search_queries:
|
59 |
+
page = 0
|
60 |
+
while (page <= 100):
|
61 |
+
start_idx = page_size * page
|
62 |
+
url = f"{search_query}{start_idx}"
|
63 |
+
current_page = requests.get(url).text
|
64 |
+
pattern = r'<p class="list-title is-inline-block">.*?<a href="([^"]+)"'
|
65 |
+
matches = re.findall(pattern, current_page)
|
66 |
+
if (len(matches) == 0):
|
67 |
+
break
|
68 |
+
else:
|
69 |
+
page += 1
|
70 |
+
|
71 |
+
articles += matches
|
72 |
+
articles = np.unique(articles)
|
73 |
+
|
74 |
+
# Parallel processing using Pool
|
75 |
+
if __name__ == "__main__":
|
76 |
+
with Pool(processes=4) as pool:
|
77 |
+
results = pool.starmap(process_arxiv_paper, [[article] for article in articles])
|
78 |
+
|
79 |
+
# Filter out any None results due to errors
|
80 |
+
results = [result for result in results if result is not None]
|
81 |
+
|
82 |
+
# Convert the list of dictionaries to a DataFrame
|
83 |
+
arxiv = pd.DataFrame(results)
|
84 |
+
arxiv.to_csv('arxiv.csv')
|
data/fetch_miccai.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
import re
|
4 |
+
from multiprocessing import Pool, cpu_count
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
# Function to process each URL
|
8 |
+
def process_paper(year, url):
|
9 |
+
try:
|
10 |
+
paper_page = requests.get(url).text
|
11 |
+
|
12 |
+
# Find title
|
13 |
+
title_pattern = r'<title>(.*?)\s*</title>'
|
14 |
+
title_match = re.search(title_pattern, paper_page, re.DOTALL)
|
15 |
+
title = title_match.group(1)
|
16 |
+
|
17 |
+
# Find the code repository link
|
18 |
+
code_repo_pattern = r'<h1 id="code-id">.*?</h1>\s*<p><a href="(.*?)">'
|
19 |
+
code_repo_match = re.search(code_repo_pattern, paper_page, re.DOTALL)
|
20 |
+
code_repo_link = code_repo_match.group(1) if code_repo_match else ""
|
21 |
+
|
22 |
+
# Find the dataset information
|
23 |
+
dataset_pattern = r'<h1 id="dataset-id">.*?</h1>\s*<p>(.*?)\s*<br />'
|
24 |
+
dataset_match = re.search(dataset_pattern, paper_page, re.DOTALL)
|
25 |
+
dataset_info = "Yes" if dataset_match else "No"
|
26 |
+
|
27 |
+
# Return a dictionary of the results
|
28 |
+
return {"title": title, "url": code_repo_link, "year": year, "public": dataset_info}
|
29 |
+
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error processing {url}: {e}")
|
32 |
+
return None
|
33 |
+
|
34 |
+
current_year = 2024 # Update with the current year
|
35 |
+
MICCAI_pages = ["https://miccai2021.org/openaccess/paperlinks/", "https://conferences.miccai.org/2022/papers/", "https://conferences.miccai.org/2023/papers/"]
|
36 |
+
MICCAI_root = ["https://miccai2021.org/openaccess/paperlinks/", "https://conferences.miccai.org", "https://conferences.miccai.org"]
|
37 |
+
years = [2021, 2022, 2023]
|
38 |
+
# Set debug mode
|
39 |
+
debug = False
|
40 |
+
|
41 |
+
# Fetch all URLs for each year
|
42 |
+
all_year_urls = []
|
43 |
+
for i in range(len(MICCAI_pages)):
|
44 |
+
year_page = requests.get(MICCAI_pages[i]).text
|
45 |
+
print(year_page)
|
46 |
+
urls = [MICCAI_root[i] + line.split('href="')[1].split('"')[0] for line in year_page.split('\n') if "&bullet" in line]
|
47 |
+
all_year_urls.extend([(years[i], url) for url in urls])
|
48 |
+
|
49 |
+
print(all_year_urls)
|
50 |
+
# Parallel processing using Pool
|
51 |
+
# if __name__ == "__main__":
|
52 |
+
# with Pool(processes=12) as pool: # Use 12 processes
|
53 |
+
# results = pool.starmap(process_paper, all_year_urls)
|
54 |
+
|
55 |
+
# # Filter out any None results due to errors
|
56 |
+
# results = [result for result in results if result is not None]
|
57 |
+
|
58 |
+
# miccai = pd.DataFrame(results)
|
59 |
+
# # miccai = pd.DataFrame( OrderedDict( { 'title': pd.Series(a), 'b': pd.Series(b), 'c': pd.Series(c) } ) )
|
60 |
+
# miccai.to_csv('miccai.csv')
|
data/fetch_nature.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
import re
|
4 |
+
from multiprocessing import Pool, cpu_count
|
5 |
+
from functools import partial
|
6 |
+
|
7 |
+
# Function to process each URL
|
8 |
+
def process_nature_paper(article_link):
|
9 |
+
try:
|
10 |
+
url = f'https://www.nature.com/articles/{article_link}'
|
11 |
+
article_text = requests.get(url).text
|
12 |
+
|
13 |
+
pattern = r'Code availability.*?<a href="([^"]+)"'
|
14 |
+
matches = re.findall(pattern, article_text, re.DOTALL)
|
15 |
+
urls = [link for link in matches if "github" in link]
|
16 |
+
url = urls[0] if len(urls) > 0 else (matches[0] if len(matches) > 0 else "")
|
17 |
+
|
18 |
+
year = re.findall(r'datetime="(\d{4})', article_text)[0]
|
19 |
+
# # Find title
|
20 |
+
title_pattern = r'<title>(.*?)\s*</title>'
|
21 |
+
title = re.findall(title_pattern, article_text, re.DOTALL)[0]
|
22 |
+
|
23 |
+
pattern = r'Data availability.*?<a href="([^"]+)"'
|
24 |
+
matches = re.findall(pattern, article_text, re.DOTALL)
|
25 |
+
dataset_info = "Yes" if (len(matches) > 0) else "No"
|
26 |
+
|
27 |
+
# # Return a dictionary of the results
|
28 |
+
return {"title": title, "url": url, "year": year, "public": dataset_info, "pdf": ""}
|
29 |
+
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error processing {url}: {e}")
|
32 |
+
return None
|
33 |
+
|
34 |
+
# Set debug mode
|
35 |
+
debug = False
|
36 |
+
|
37 |
+
# Fetch all URLs for each year
|
38 |
+
all_year_urls = []
|
39 |
+
search_queries = ["https://www.nature.com/search?q=deep+learning&order=relevance&journal=commsmed%2Cnm&page=", "https://www.nature.com/search?q=AI&order=relevance&journal=commsmed%2Cnm&page="]
|
40 |
+
articles = []
|
41 |
+
for search_query in search_queries:
|
42 |
+
page = 1
|
43 |
+
while (page <= 100):
|
44 |
+
url = f"{search_query}{page}"
|
45 |
+
current_page = requests.get(url).text
|
46 |
+
pattern = r'href="/articles/([^"]+)"'
|
47 |
+
matches = re.findall(pattern, current_page)
|
48 |
+
if (len(matches) == 0):
|
49 |
+
break
|
50 |
+
else:
|
51 |
+
page += 1
|
52 |
+
|
53 |
+
articles += matches
|
54 |
+
articles = np.unique(articles)
|
55 |
+
|
56 |
+
|
57 |
+
# Parallel processing using Pool
|
58 |
+
if __name__ == "__main__":
|
59 |
+
with Pool(processes=12) as pool:
|
60 |
+
results = pool.starmap(process_nature_paper, [[article] for article in articles])
|
61 |
+
|
62 |
+
# Filter out any None results due to errors
|
63 |
+
results = [result for result in results if result is not None]
|
64 |
+
|
65 |
+
# Convert the list of dictionaries to a DataFrame
|
66 |
+
nature = pd.DataFrame(results)
|
67 |
+
nature = nature[['title', 'year', 'pdf', 'url', 'public']]
|
68 |
+
nature.to_csv('nature.csv')
|
data/fetch_processed.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import re
|
5 |
+
|
6 |
+
current_year = 2024
|
7 |
+
MIDL_years = range(2018, current_year + 1, 1)
|
8 |
+
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
|
9 |
+
|
10 |
+
for venue in custom_order:
|
11 |
+
df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
|
12 |
+
df.to_csv(f'data/{venue}.csv', sep="\t")
|
13 |
+
|
14 |
+
# Store all evaluations here
|
15 |
+
paper_dump = pd.DataFrame()
|
16 |
+
# Official color codes for conferences
|
17 |
+
MIDL_colors = ["#506775", "#4E7268", "#5170B1", "#004B5A", "#268BCC", "#B18630", "#AA0000"]
|
18 |
+
|
19 |
+
for venue in custom_order:
|
20 |
+
with open(f'data/{venue}.csv') as file:
|
21 |
+
tsv_file = csv.reader(file, delimiter="\t")
|
22 |
+
for row in tsv_file:
|
23 |
+
if (row[0] == ""):
|
24 |
+
continue
|
25 |
+
|
26 |
+
if (row[1] == ""):
|
27 |
+
continue
|
28 |
+
|
29 |
+
paper_dump = pd.concat([paper_dump, pd.DataFrame({"venue": venue, "title": [row[1]], "year": [row[2]], "pdf": [row[3]], "url": [row[4]], "public": [row[5]], "dependencies": [row[6]], "training": [row[7]], "evaluation": [row[8]], "weights": [row[9]], "readme": [row[10]], "license": [row[11]]})], ignore_index=True)
|
30 |
+
paper_dump.to_csv(f'data/dump.csv', sep="\t")
|
evaluations/__pycache__/documentation.cpython-310.pyc
ADDED
Binary file (3.11 kB). View file
|
|
evaluations/__pycache__/license.cpython-310.pyc
ADDED
Binary file (1.27 kB). View file
|
|
evaluations/__pycache__/repo_evaluations.cpython-310.pyc
ADDED
Binary file (4.63 kB). View file
|
|
evaluations/__pycache__/requirements.cpython-310.pyc
ADDED
Binary file (1.56 kB). View file
|
|
evaluations/__pycache__/training.cpython-310.pyc
ADDED
Binary file (1.17 kB). View file
|
|
evaluations/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (4.01 kB). View file
|
|
evaluations/__pycache__/validating.cpython-310.pyc
ADDED
Binary file (1.23 kB). View file
|
|
evaluations/__pycache__/weights.cpython-310.pyc
ADDED
Binary file (2.41 kB). View file
|
|
evaluations/documentation.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import log
|
2 |
+
import re
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
def evaluate(verbose, llm, zip, readme):
|
6 |
+
log(verbose, "LOG", "\nEvaluating code documentation...")
|
7 |
+
overall = "No"
|
8 |
+
|
9 |
+
|
10 |
+
code_to_comment_ratio = get_code_to_comment_ratio(zip)
|
11 |
+
log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")
|
12 |
+
|
13 |
+
|
14 |
+
if (readme):
|
15 |
+
non_empty_rows = [row for row in readme.split("\n") if row != ""]
|
16 |
+
if (len(non_empty_rows) < 5):
|
17 |
+
log(verbose, "WARNING", "Readme file has very few lines")
|
18 |
+
return overall
|
19 |
+
|
20 |
+
if (count_code_lines(non_empty_rows) > 5):
|
21 |
+
log(verbose, "LOG", "Readme file contains python examples.")
|
22 |
+
overall = "Yes"
|
23 |
+
return overall
|
24 |
+
|
25 |
+
if (llm):
|
26 |
+
prompt = f'{readme}\n \
|
27 |
+
Is this README file is enough to find what \
|
28 |
+
package dependencies you need to install and how to train \
|
29 |
+
and evaluate the proposed model? Please strictly \
|
30 |
+
answer yes or no.\n\nA:'
|
31 |
+
|
32 |
+
|
33 |
+
manual_fail = False
|
34 |
+
if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
|
35 |
+
log(verbose, "ERROR", "Readme file missing training information")
|
36 |
+
overall = "No"
|
37 |
+
if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
|
38 |
+
log(verbose, "ERROR", "Readme file missing testing information")
|
39 |
+
overall = "No"
|
40 |
+
|
41 |
+
if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
|
42 |
+
log(verbose, "LOG", "Readme file contains links to examples")
|
43 |
+
overall = "Yes"
|
44 |
+
|
45 |
+
if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
|
46 |
+
(len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
|
47 |
+
(len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
|
48 |
+
log(verbose, "ERROR", "Readme file missing information about package dependencies")
|
49 |
+
overall = "No"
|
50 |
+
|
51 |
+
log(verbose, "ERROR", "Found no useful information in README file.")
|
52 |
+
return overall
|
53 |
+
|
54 |
+
def count_comment_lines(lines):
|
55 |
+
# Initialize counters
|
56 |
+
single_line_comments = 0
|
57 |
+
multi_line_comments = 0
|
58 |
+
in_multiline_comment = False
|
59 |
+
|
60 |
+
for line in lines:
|
61 |
+
stripped_line = line.strip()
|
62 |
+
|
63 |
+
# Check for single-line comments
|
64 |
+
if stripped_line.startswith('#'):
|
65 |
+
single_line_comments += 1
|
66 |
+
|
67 |
+
# Check for multi-line comment (docstring) start or end
|
68 |
+
if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
|
69 |
+
if not in_multiline_comment:
|
70 |
+
# Starting a new multi-line comment
|
71 |
+
in_multiline_comment = True
|
72 |
+
multi_line_comments += 1 # Count the start line itself
|
73 |
+
else:
|
74 |
+
# Ending an existing multi-line comment
|
75 |
+
in_multiline_comment = False
|
76 |
+
multi_line_comments += 1 # Count the end line itself
|
77 |
+
elif in_multiline_comment:
|
78 |
+
# Continue counting lines within a multi-line comment
|
79 |
+
multi_line_comments += 1
|
80 |
+
|
81 |
+
return single_line_comments, multi_line_comments
|
82 |
+
|
83 |
+
def get_code_to_comment_ratio(zip):
|
84 |
+
python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))]
|
85 |
+
code_line_count = 0
|
86 |
+
comment_line_count = 0
|
87 |
+
for file in python_files:
|
88 |
+
file_lines = zip.open(file).read().decode("utf-8").split('\n')
|
89 |
+
sl_comm, ml_comm = count_comment_lines(file_lines)
|
90 |
+
comment_line_count += sl_comm + ml_comm
|
91 |
+
code_line_count += len(file_lines) - (sl_comm + ml_comm)
|
92 |
+
code_to_comment_ratio = 100 * comment_line_count / code_line_count
|
93 |
+
|
94 |
+
return code_to_comment_ratio
|
95 |
+
|
96 |
+
def count_code_lines(lines):
|
97 |
+
is_code_snippet = False
|
98 |
+
code_line_count = 0
|
99 |
+
|
100 |
+
for line in lines:
|
101 |
+
stripped_line = line.strip()
|
102 |
+
|
103 |
+
if stripped_line.startswith('```'):
|
104 |
+
if not is_code_snippet:
|
105 |
+
is_code_snippet = True
|
106 |
+
code_line_count += 1
|
107 |
+
else:
|
108 |
+
is_code_snippet = False
|
109 |
+
code_line_count += 1
|
110 |
+
elif is_code_snippet:
|
111 |
+
code_line_count += 1
|
112 |
+
|
113 |
+
return int(code_line_count / 2)
|
evaluations/license.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import log, model_predict
|
2 |
+
import re
|
3 |
+
|
4 |
+
def evaluate(verbose, llm, zip, readme):
|
5 |
+
log(verbose, "LOG", "\nEvaluating repository licensing...")
|
6 |
+
overall = "No"
|
7 |
+
license_files = [license for license in zip.namelist() if ((("LICENSE" in license) | ("license" in license)) & (len(license.split("/")) == 2))]
|
8 |
+
if (len(license_files) > 0):
|
9 |
+
license = zip.open(license_files[0]).read().decode("utf-8")
|
10 |
+
ans = [row for row in license.split("\n") if row != ""]
|
11 |
+
|
12 |
+
if (llm):
|
13 |
+
license = license[:50]
|
14 |
+
prompt = f"Q: {license}. This was an excerpt from a license \
|
15 |
+
file. Do you know the name of this license?"
|
16 |
+
ans = model_predict(prompt)
|
17 |
+
log(verbose, "LOG", f"Found license: {ans}")
|
18 |
+
else:
|
19 |
+
log(verbose, "LOG", f"Found license file: {license_files[0]}")
|
20 |
+
|
21 |
+
overall = "Yes"
|
22 |
+
|
23 |
+
if (readme):
|
24 |
+
if ("License" in readme):
|
25 |
+
log(verbose, "LOG", "License found in README.")
|
26 |
+
overall = "Yes"
|
27 |
+
|
28 |
+
log(verbose, "ERROR", "LICENSE file not found.")
|
29 |
+
return overall
|
evaluations/repo_evaluations.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import os
|
3 |
+
from evaluations import documentation, requirements, training, validating, license, weights
|
4 |
+
from evaluations.utils import *
|
5 |
+
import zipfile
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
from huggingface_hub import InferenceClient
|
9 |
+
|
10 |
+
API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
|
11 |
+
headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
|
12 |
+
|
13 |
+
|
14 |
+
client = InferenceClient(
|
15 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
16 |
+
token="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",
|
17 |
+
)
|
18 |
+
|
19 |
+
def init_llm(verbose):
|
20 |
+
log(verbose, "LOG", "Initializing LLM...")
|
21 |
+
|
22 |
+
def evaluate(llm, verbose, repo_url, title=None, year=None):
|
23 |
+
repo_name = "data/repo.zip"
|
24 |
+
token = os.getenv("githubToken")
|
25 |
+
# token = userdata.get('githubToken')
|
26 |
+
|
27 |
+
if (llm):
|
28 |
+
init_llm(verbose)
|
29 |
+
else:
|
30 |
+
log(verbose, "LOG", "No LLM will be used for the evaluation.")
|
31 |
+
|
32 |
+
results = { "pred_live": "Yes", "pred_dependencies": None, "pred_training": None, "pred_evaluation": None, "pred_weights": None, "pred_readme": None, "pred_license": None, "pred_stars": None, "pred_citations": None, "pred_valid": False}
|
33 |
+
|
34 |
+
try:
|
35 |
+
if (get_api_link(repo_url) != ""):
|
36 |
+
results["pred_valid"] = True
|
37 |
+
username, repo_name = decompose_url(repo_url)
|
38 |
+
log(verbose, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}")
|
39 |
+
|
40 |
+
fetch_repo(verbose, repo_url, repo_name, token)
|
41 |
+
|
42 |
+
if ((title != None) & (year != None) & (title != "") & (year != "")):
|
43 |
+
res = fetch_openalex(verbose, title, year)
|
44 |
+
if (res != None):
|
45 |
+
res = res["results"]
|
46 |
+
if (len(res) > 0):
|
47 |
+
res = res[0]
|
48 |
+
results["pred_citations"] = res["cited_by_count"]
|
49 |
+
|
50 |
+
if (not(os.path.exists(repo_name))):
|
51 |
+
results["pred_live"] = "No"
|
52 |
+
return results
|
53 |
+
|
54 |
+
zip = zipfile.ZipFile(repo_name)
|
55 |
+
readme = fetch_readme(zip)
|
56 |
+
results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
|
57 |
+
|
58 |
+
|
59 |
+
if (len(zip.namelist()) <= 2):
|
60 |
+
log(verbose, "LOG", "Empty repository")
|
61 |
+
results["pred_live"] = "No"
|
62 |
+
results["pred_training"] = "No"
|
63 |
+
results["pred_evaluation"] = "No"
|
64 |
+
results["pred_weights"] = "No"
|
65 |
+
results["pred_packages"] = "No"
|
66 |
+
else:
|
67 |
+
results["pred_dependencies"] = requirements.evaluate(verbose, llm, zip, readme)
|
68 |
+
results["pred_training"] = training.evaluate(verbose, llm, zip, readme)
|
69 |
+
results["pred_evaluation"] = validating.evaluate(verbose, llm, zip, readme)
|
70 |
+
results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
|
71 |
+
results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
|
72 |
+
results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
|
73 |
+
results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
|
74 |
+
|
75 |
+
return results
|
76 |
+
except Exception as e:
|
77 |
+
log(verbose, "ERROR", "Evaluating repository failed: " + str(e))
|
78 |
+
results["pred_live"] = "No"
|
79 |
+
return results
|
80 |
+
|
81 |
+
def full_evaluations():
|
82 |
+
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
|
83 |
+
repro = evaluate(None, False)
|
84 |
+
full_results = []
|
85 |
+
|
86 |
+
nth = 1
|
87 |
+
for idx, row in paper_dump.iterrows():
|
88 |
+
if (idx % nth != 0):
|
89 |
+
continue
|
90 |
+
|
91 |
+
if (row["url"] == ""):
|
92 |
+
continue
|
93 |
+
|
94 |
+
print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
|
95 |
+
result = evaluate(None, False, row["url"], row["title"], row["year"])
|
96 |
+
for column in result.keys():
|
97 |
+
row[column] = result[column]
|
98 |
+
|
99 |
+
full_results.append(row)
|
100 |
+
|
101 |
+
def midl_evaluations():
|
102 |
+
compare_to_gt = True
|
103 |
+
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
|
104 |
+
verbose = 1
|
105 |
+
|
106 |
+
eval_readme = []
|
107 |
+
eval_training = []
|
108 |
+
eval_evaluating = []
|
109 |
+
eval_licensing = []
|
110 |
+
eval_weights = []
|
111 |
+
eval_dependencies = []
|
112 |
+
full_results = []
|
113 |
+
for idx, row in paper_dump.iterrows():
|
114 |
+
if (row["venue"] != "MIDL"):
|
115 |
+
continue
|
116 |
+
|
117 |
+
if (row["venue"] == 2024):
|
118 |
+
continue
|
119 |
+
|
120 |
+
if (row["url"] == ""):
|
121 |
+
continue
|
122 |
+
|
123 |
+
|
124 |
+
print(f"\nEvaluating {idx+1} out of {len(paper_dump.index)} papers...")
|
125 |
+
print(f'Paper title - "{row["title"]}" ({row["year"]})')
|
126 |
+
print(f'Repository link - {row["url"]}')
|
127 |
+
result = evaluate(None, verbose, row["url"])
|
128 |
+
for column in result.keys():
|
129 |
+
row[column] = result[column]
|
130 |
+
full_results.append(row)
|
131 |
+
if (compare_to_gt):
|
132 |
+
print("\nSummary:")
|
133 |
+
if ((row["pred_dependencies"] is not None) & (row["dependencies"] != "")):
|
134 |
+
eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
|
135 |
+
print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
|
136 |
+
if ((row["pred_training"] is not None) & (row["training"] != "")):
|
137 |
+
eval_training.append(row["training"] == row["pred_training"])
|
138 |
+
print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
|
139 |
+
if ((row["pred_evaluation"] is not None) & (row["evaluation"] != "")):
|
140 |
+
eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
|
141 |
+
print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
|
142 |
+
if ((row["pred_weights"] is not None) & (row["weights"] != "")):
|
143 |
+
eval_weights.append(row["weights"] == row["pred_weights"])
|
144 |
+
print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
|
145 |
+
if ((row["pred_readme"] is not None) & (row["readme"] != "")):
|
146 |
+
eval_readme.append(row["readme"] == row["pred_readme"])
|
147 |
+
print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
|
148 |
+
if ((row["pred_license"] is not None) & (row["license"] != "")):
|
149 |
+
eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
|
150 |
+
print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")
|
evaluations/requirements.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import log
|
2 |
+
|
3 |
+
def evaluate(verbose, llm, zip, readme):
|
4 |
+
log(verbose, "LOG", "\nLooking for package dependencies for running the code...")
|
5 |
+
overall = "No"
|
6 |
+
|
7 |
+
scripts = [file_path for file_path in zip.namelist() if ((file_path.endswith(".py") | file_path.endswith(".ipynb")))]
|
8 |
+
|
9 |
+
files = [file_path for file_path in zip.namelist() if (file_path.endswith(".yml") | file_path.endswith("setup.py") | file_path.endswith("requirements.txt") | ("requirement" in file_path) | ("package" in file_path))]
|
10 |
+
files = [file_path for file_path in files if len(file_path.split("/")) == 2]
|
11 |
+
for file in files:
|
12 |
+
log(verbose, "LOG", f"Found requirements file: {file}")
|
13 |
+
requirements = zip.open(file).read().decode("utf-8")
|
14 |
+
overall = "Yes"
|
15 |
+
if (len(requirements.split("\n")) < 5):
|
16 |
+
log(verbose, "WARNING", "Requirements file contains too few lines.")
|
17 |
+
overall = "No"
|
18 |
+
|
19 |
+
if (readme):
|
20 |
+
if (("requirement" in readme) | ("Requirement" in readme) | ("Dependenc" in readme) | ("dependenc" in readme) | (len([row for row in readme.split("\n") if (("#" in row) & (("environment" in row) | ("Environment" in row)))]) > 0)):
|
21 |
+
log(verbose, "LOG", "Found dependencies in README file")
|
22 |
+
overall = "Yes"
|
23 |
+
|
24 |
+
return overall
|
evaluations/training.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import log
|
2 |
+
import re
|
3 |
+
|
4 |
+
def evaluate(verbose, llm, zip, readme):
|
5 |
+
log(verbose, "LOG", "\nLooking for code to train the model...")
|
6 |
+
overall = "No"
|
7 |
+
|
8 |
+
|
9 |
+
patterns = {
|
10 |
+
'tensorflow': [
|
11 |
+
r'model\.(fit|compile|train_on_batch)',
|
12 |
+
r'tf\.GradientTape'
|
13 |
+
],
|
14 |
+
'pytorch': [
|
15 |
+
r'model\.(train|forward)',
|
16 |
+
r'loss\.backward',
|
17 |
+
r'optimizer\.step',
|
18 |
+
]
|
19 |
+
}
|
20 |
+
files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".py") | file_path.endswith(".ipynb")))]
|
21 |
+
for file_path in files:
|
22 |
+
code = zip.open(file_path).read().decode("utf-8")
|
23 |
+
for framework, regex_list in patterns.items():
|
24 |
+
for pattern in regex_list:
|
25 |
+
if re.search(pattern, code):
|
26 |
+
log(verbose, "LOG", f"Found code for training a model in {framework} framework in file: {file_path}")
|
27 |
+
overall = "Yes"
|
28 |
+
|
29 |
+
|
30 |
+
if (readme):
|
31 |
+
if (("train" in readme)):
|
32 |
+
log(verbose, "LOG", "Found something about training in README file")
|
33 |
+
overall = "Yes"
|
34 |
+
|
35 |
+
return overall
|
evaluations/utils.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import requests
|
3 |
+
import time
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
import streamlit as st
|
7 |
+
|
8 |
+
|
9 |
+
def model_predict(client, prompt):
|
10 |
+
for message in client.chat_completion(
|
11 |
+
messages=[{"role": "system", "content": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with Yes or No."}, {"role": "user", "content": prompt}],
|
12 |
+
max_tokens=500,
|
13 |
+
stream=True,
|
14 |
+
):
|
15 |
+
return message.choices[0].delta.content
|
16 |
+
|
17 |
+
return ""
|
18 |
+
|
19 |
+
|
20 |
+
def get_api_link(url):
|
21 |
+
username, repo_name = decompose_url(url)
|
22 |
+
if (username == None):
|
23 |
+
return ""
|
24 |
+
return f"https://api.github.com/repos/{username}/{repo_name}/zipball/"
|
25 |
+
|
26 |
+
def decompose_url(url):
|
27 |
+
try:
|
28 |
+
url = url.split("github.com")[1]
|
29 |
+
url = url.strip(".")
|
30 |
+
url = url.split(".git")[0]
|
31 |
+
url = url.strip("/")
|
32 |
+
parts = url.split("/")
|
33 |
+
username = parts[0]
|
34 |
+
repo_name = parts[1]
|
35 |
+
return username, repo_name
|
36 |
+
except:
|
37 |
+
return ""
|
38 |
+
|
39 |
+
|
40 |
+
def fetch_repo_stars(verbose, repo_url, token):
|
41 |
+
headers = {"Authorization": f"token {token}"}
|
42 |
+
api_url = get_api_link(repo_url)
|
43 |
+
api_url = api_url.replace("/zipball/", "")
|
44 |
+
|
45 |
+
# Sending GET request to GitHub API
|
46 |
+
response = requests.get(api_url, headers=headers)
|
47 |
+
|
48 |
+
if response.status_code == 200:
|
49 |
+
return json.loads(response.content)["stargazers_count"]
|
50 |
+
if (response.status_code == 404):
|
51 |
+
log(verbose, "ERROR", "Repository private.")
|
52 |
+
|
53 |
+
def fetch_repo(verbose, repo_url, repo_name, token):
|
54 |
+
if (os.path.exists(repo_name)):
|
55 |
+
os.remove(repo_name)
|
56 |
+
|
57 |
+
|
58 |
+
if ("github.com" not in repo_url):
|
59 |
+
log(verbose, "ERROR", f"URL not for github repo, please evaluate manually ({repo_url}).")
|
60 |
+
return
|
61 |
+
|
62 |
+
headers = {"Authorization": f"token {token}"}
|
63 |
+
api_url = get_api_link(repo_url)
|
64 |
+
|
65 |
+
if (api_url == ""):
|
66 |
+
log(verbose, "ERROR", f"Failed to parse the URL, please evaluate manually ({repo_url}).")
|
67 |
+
return
|
68 |
+
|
69 |
+
# Sending GET request to GitHub API
|
70 |
+
response = requests.get(api_url, headers=headers)
|
71 |
+
|
72 |
+
if response.status_code == 200:
|
73 |
+
with open(repo_name, 'wb') as file:
|
74 |
+
file.write(response.content)
|
75 |
+
|
76 |
+
log(verbose, "LOG", "Repository downloaded successfully")
|
77 |
+
if (response.status_code == 404):
|
78 |
+
log(verbose, "ERROR", "Repository private.")
|
79 |
+
|
80 |
+
def fetch_readme(zip):
|
81 |
+
readme_files = [readme for readme in zip.namelist() if ((readme.endswith("README.MD") | readme.endswith("README.md") | readme.endswith("readme.md")) & (len(readme.split("/")) == 2))]
|
82 |
+
readme = ""
|
83 |
+
for readme_file in readme_files:
|
84 |
+
readme += zip.open(readme_file).read().decode("utf-8") + "\n\n"
|
85 |
+
return readme
|
86 |
+
|
87 |
+
def fetch_license(zip):
|
88 |
+
license_files = [license for license in zip.namelist() if (("LICENSE" in license) & (len(license.split("/")) == 2))]
|
89 |
+
license = None
|
90 |
+
if (len(license_files) > 0):
|
91 |
+
license = zip.open(license_files[0]).read().decode("utf-8")
|
92 |
+
return license
|
93 |
+
|
94 |
+
def fetch_openalex(verbose, paper_name, year):
|
95 |
+
api_url = f"https://api.openalex.org/works?filter=default.search:{paper_name},publication_year:{year}"
|
96 |
+
|
97 |
+
response = requests.get(api_url)
|
98 |
+
|
99 |
+
if response.status_code == 200:
|
100 |
+
return response.json()
|
101 |
+
else:
|
102 |
+
log(verbose, "WARNING", "Could not find OpenAlex information for paper.")
|
103 |
+
|
104 |
+
|
105 |
+
def log(verbose, log_type, log_text, hf=False):
|
106 |
+
if (verbose == 0):
|
107 |
+
return
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
if (log_type == "LOG"):
|
112 |
+
log_text = f"LOG: {log_text}"
|
113 |
+
if (log_type == "ERROR"):
|
114 |
+
log_text = f"ERROR: {log_text}"
|
115 |
+
if (log_type == "WARNING"):
|
116 |
+
log_text = f"WARNING: {log_text}"
|
117 |
+
|
118 |
+
# Align line-break
|
119 |
+
if (log_text.startswith("\n")):
|
120 |
+
print("\n")
|
121 |
+
log_text = log_text.lstrip('\n')
|
122 |
+
|
123 |
+
if (verbose == 1):
|
124 |
+
print(log_text)
|
125 |
+
return
|
126 |
+
|
127 |
+
if (verbose == 2):
|
128 |
+
st.write(log_text)
|
129 |
+
return
|
130 |
+
|
131 |
+
raise Exception(log_text)
|
evaluations/validating.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import log
|
2 |
+
import re
|
3 |
+
|
4 |
+
def evaluate(verbose, llm, zip, readme):
|
5 |
+
log(verbose, "LOG", "\nLooking for examples for running the model...")
|
6 |
+
overall = "No"
|
7 |
+
patterns = {
|
8 |
+
'tensorflow': [
|
9 |
+
r'tf\.keras\.models\.load_model', # TensorFlow model loading
|
10 |
+
r'tf\.saved_model\.load',
|
11 |
+
r'model\.predict', # Running inference
|
12 |
+
r'model\(.+\)' # Direct model invocation for inference
|
13 |
+
],
|
14 |
+
'pytorch': [
|
15 |
+
r'torch\.load', # PyTorch model loading
|
16 |
+
r'torch\.jit\.load', # PyTorch JIT model loading
|
17 |
+
r'model\.eval', # Running inference
|
18 |
+
r'model\(.+\)' # Direct model invocation for inference
|
19 |
+
]
|
20 |
+
}
|
21 |
+
|
22 |
+
files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".py") | file_path.endswith(".ipynb")))]
|
23 |
+
for file_path in files:
|
24 |
+
code = zip.open(file_path).read().decode("utf-8")
|
25 |
+
for framework, regex_list in patterns.items():
|
26 |
+
for pattern in regex_list:
|
27 |
+
if re.search(pattern, code):
|
28 |
+
log(verbose, "LOG", f"Found code for evaluating a model in {framework} framework in file: {file_path}")
|
29 |
+
overall = "Yes"
|
30 |
+
|
31 |
+
if (readme):
|
32 |
+
if ((len(re.findall("testing", readme)) > 0)):
|
33 |
+
log(verbose, "LOG", "Found information about evaluations in readme")
|
34 |
+
overall = "Yes"
|
35 |
+
|
36 |
+
return overall
|
evaluations/weights.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import log
|
2 |
+
import re
|
3 |
+
|
4 |
+
def evaluate(verbose, llm, zip, readme):
|
5 |
+
log(verbose, "LOG", "\nLooking for pre-trained model weights...")
|
6 |
+
overall = "No"
|
7 |
+
files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".h5") | file_path.endswith(".pth") | file_path.endswith(".torch") | file_path.endswith(".pt") | file_path.endswith(".tar.gz") | file_path.endswith("checkpoint.pt") | ("weights" in file_path) | file_path.endswith("ckpt")))]
|
8 |
+
if (len(files) > 0):
|
9 |
+
log(verbose, "LOG", f"Found model weights: {files}")
|
10 |
+
overall = "Yes"
|
11 |
+
return overall
|
12 |
+
|
13 |
+
if (readme):
|
14 |
+
|
15 |
+
url_pattern = r'(https?://[^\s]+)'
|
16 |
+
urls = re.findall(url_pattern, readme)
|
17 |
+
if (len([url for url in urls if "pth" in url]) > 0):
|
18 |
+
log(verbose, "LOG", "Found a link to pre-trained weights in readme")
|
19 |
+
overall = "Yes"
|
20 |
+
return overall
|
21 |
+
|
22 |
+
readme_lines = readme.split("\n")
|
23 |
+
if (len([row for row in readme_lines if ((len(re.findall("pretrained", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
|
24 |
+
log(verbose, "LOG", "Found a link for 'pretrained' something in readme")
|
25 |
+
overall = "Yes"
|
26 |
+
return overall
|
27 |
+
|
28 |
+
if (len([row for row in readme_lines if ((len(re.findall("pre-trained", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
|
29 |
+
log(verbose, "LOG", "Found a link for 'pre-trained' something in readme")
|
30 |
+
overall = "Yes"
|
31 |
+
return overall
|
32 |
+
|
33 |
+
if (len([row for row in readme_lines if ((len(re.findall("weight", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
|
34 |
+
log(verbose, "LOG", "Found a link for 'weight' something in readme")
|
35 |
+
overall = "Yes"
|
36 |
+
return overall
|
37 |
+
|
38 |
+
if (len([row for row in readme_lines if ((len(re.findall("download", row, re.IGNORECASE)) > 0) & (len(re.findall("model", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
|
39 |
+
log(verbose, "LOG", "Found a link for 'model' something in readme")
|
40 |
+
overall = "Yes"
|
41 |
+
return overall
|
42 |
+
|
43 |
+
if (llm):
|
44 |
+
prompt = f"{readme}\nQ: Does this text contain a download link for the model pre-trained weights?"
|
45 |
+
ans = model_predict(prompt)
|
46 |
+
if (("Yes" in ans) & ("No" not in ans)):
|
47 |
+
log(verbose, "LOG", "The LLM found signs for accessing the pre-trained weights from the readme")
|
48 |
+
overall = "Yes"
|
49 |
+
return overall
|
50 |
+
|
51 |
+
log(verbose, "ERROR", "Found no pre-trained model weights.")
|
52 |
+
return overall
|
midl.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from evaluations.repo_evaluations import midl_evaluations
|
2 |
+
# importing os module for environment variables
|
3 |
+
import os
|
4 |
+
# importing necessary functions from dotenv library
|
5 |
+
from dotenv import load_dotenv, dotenv_values
|
6 |
+
# loading variables from .env file
|
7 |
+
load_dotenv()
|
8 |
+
token = os.getenv("githubToken")
|
9 |
+
|
10 |
+
midl_evaluations()
|
plotting/paper_plots.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import plotly.express as px
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
paper_dump = pd.read_csv('data/dump.csv', sep="\t")
|
5 |
+
# Calculate total number of URLs per year and venue
|
6 |
+
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
|
7 |
+
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')
|
8 |
+
|
9 |
+
# Calculate the number of URLs with errors per year and venue
|
10 |
+
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')
|
11 |
+
|
12 |
+
# Merge the DataFrames to calculate the error rate
|
13 |
+
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
|
14 |
+
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']
|
15 |
+
|
16 |
+
# Plot the error rates using Plotly, with year on x-axis and color by venue
|
17 |
+
fig = px.bar(
|
18 |
+
merged_df,
|
19 |
+
x='year',
|
20 |
+
y='total_titles',
|
21 |
+
color='venue',
|
22 |
+
barmode='group',
|
23 |
+
title=f'Number of papers per venue',
|
24 |
+
labels={'error_rate': 'Success Rate', 'year': 'Year'},
|
25 |
+
category_orders={'venue': custom_order}
|
26 |
+
)
|
27 |
+
|
28 |
+
fig.update_xaxes(range=[2018, 2024])
|
29 |
+
fig.show()
|
30 |
+
|
31 |
+
import plotly.express as px
|
32 |
+
import numpy as np
|
33 |
+
|
34 |
+
# Calculate total number of URLs per year and venue
|
35 |
+
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')
|
36 |
+
|
37 |
+
# Calculate the number of URLs with errors per year and venue
|
38 |
+
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')
|
39 |
+
|
40 |
+
# Merge the DataFrames to calculate the error rate
|
41 |
+
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
|
42 |
+
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']
|
43 |
+
|
44 |
+
# Plot the error rates using Plotly, with year on x-axis and color by venue
|
45 |
+
fig = px.bar(
|
46 |
+
merged_df,
|
47 |
+
x='year',
|
48 |
+
y='total_titles',
|
49 |
+
color='venue',
|
50 |
+
barmode='group',
|
51 |
+
title=f'Number of papers per venue',
|
52 |
+
labels={'error_rate': 'Success Rate', 'year': 'Year'},
|
53 |
+
category_orders={'venue': custom_order}
|
54 |
+
)
|
55 |
+
|
56 |
+
fig.update_xaxes(range=[2018, 2024])
|
57 |
+
fig.show()
|
58 |
+
|
59 |
+
# Plot the error rates using Plotly, with year on x-axis and color by venue
|
60 |
+
fig = px.bar(
|
61 |
+
merged_df,
|
62 |
+
x='year',
|
63 |
+
y='total_urls',
|
64 |
+
color='venue',
|
65 |
+
barmode='group',
|
66 |
+
title=f'Number of papers per venue',
|
67 |
+
labels={'error_rate': 'Success Rate', 'year': 'Year'},
|
68 |
+
category_orders={'venue': custom_order}
|
69 |
+
)
|
70 |
+
|
71 |
+
fig.update_xaxes(range=[2018, 2024])
|
72 |
+
fig.show()
|
73 |
+
|
74 |
+
|
75 |
+
# Plot the error rates using Plotly, with year on x-axis and color by venue
|
76 |
+
fig = px.bar(
|
77 |
+
merged_df,
|
78 |
+
x='year',
|
79 |
+
y='repo_rate',
|
80 |
+
color='venue',
|
81 |
+
barmode='group',
|
82 |
+
title=f'Number of repositories per venue',
|
83 |
+
labels={'error_rate': 'Success Rate', 'year': 'Year'},
|
84 |
+
category_orders={'venue': custom_order}
|
85 |
+
)
|
86 |
+
fig.update_xaxes(range=[2018, 2024])
|
87 |
+
fig.update_yaxes(range=[0, 1])
|
88 |
+
|
89 |
+
fig.show()
|
plotting/result_plots.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import plotly.express as px
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
df = pd.read_csv('data/results.csv', sep="\t")
|
5 |
+
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
|
6 |
+
|
7 |
+
# Calculate total number of URLs per year and venue
|
8 |
+
total_urls_per_year_venue = df.groupby(['year', 'venue']).size().reset_index(name='total_urls')
|
9 |
+
|
10 |
+
# Calculate the number of URLs with errors per year and venue
|
11 |
+
errors_per_year_venue = df[df["pred_valid"] != False].groupby(['year', 'venue']).size().reset_index(name='errors')
|
12 |
+
|
13 |
+
# Merge the DataFrames to calculate the error rate
|
14 |
+
error_rate_df = pd.merge(total_urls_per_year_venue, errors_per_year_venue, on=['year', 'venue'], how='left')
|
15 |
+
error_rate_df['errors'] = error_rate_df['errors'].fillna(0) # Replace NaN with 0 for venues with no errors
|
16 |
+
error_rate_df['error_rate'] = error_rate_df['errors'] / error_rate_df['total_urls']
|
17 |
+
|
18 |
+
# Plot the error rates using Plotly, with year on x-axis and color by venue
|
19 |
+
fig = px.bar(
|
20 |
+
error_rate_df,
|
21 |
+
x='year',
|
22 |
+
y='error_rate',
|
23 |
+
color='venue',
|
24 |
+
barmode='group',
|
25 |
+
title=f'Success Rate per Venue and Year for "valid_url"',
|
26 |
+
labels={'error_rate': 'Success Rate', 'year': 'Year'},
|
27 |
+
category_orders={'venue': custom_order}
|
28 |
+
)
|
29 |
+
|
30 |
+
fig.update_yaxes(range=[0, 1])
|
31 |
+
fig.update_xaxes(range=[2017.5, 2024.5])
|
32 |
+
fig.show()
|
33 |
+
|
34 |
+
|
35 |
+
for topic in ["pred_live", "pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]:
|
36 |
+
# Calculate total number of URLs per year and venue
|
37 |
+
total_valid_urls_per_year_venue = df[df["pred_valid"] == True].groupby(['year', 'venue']).size().reset_index(name='total_urls')
|
38 |
+
|
39 |
+
# Calculate the number of URLs with errors per year and venue
|
40 |
+
passes_per_year_venue = df[df[topic] != "No"].groupby(['year', 'venue']).size().reset_index(name='successes')
|
41 |
+
|
42 |
+
# Merge the DataFrames to calculate the error rate
|
43 |
+
success_rate_df = pd.merge(total_urls_per_year_venue, passes_per_year_venue, on=['year', 'venue'], how='left')
|
44 |
+
success_rate_df['successes'] = success_rate_df['successes'].fillna(0) # Replace NaN with 0 for venues with no errors
|
45 |
+
success_rate_df['success_rate'] = success_rate_df['successes'] / success_rate_df['total_urls']
|
46 |
+
|
47 |
+
# Plot the error rates using Plotly, with year on x-axis and color by venue
|
48 |
+
fig = px.bar(
|
49 |
+
success_rate_df,
|
50 |
+
x='year',
|
51 |
+
y='success_rate',
|
52 |
+
color='venue',
|
53 |
+
barmode='group',
|
54 |
+
title=f'Success Rate per Venue and Year for "{topic}"',
|
55 |
+
labels={'error_rate': 'Success Rate', 'year': 'Year'},
|
56 |
+
category_orders={'venue': custom_order}
|
57 |
+
)
|
58 |
+
|
59 |
+
fig.update_yaxes(range=[0, 1])
|
60 |
+
fig.update_xaxes(range=[2017.5, 2024.5])
|
61 |
+
fig.show()
|
62 |
+
|
63 |
+
|
64 |
+
# List of columns to check for "No"
|
65 |
+
columns_to_check = ["pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]
|
66 |
+
|
67 |
+
# Step 1: Calculate the number of "No" answers per row for the specified columns
|
68 |
+
df['no_count'] = df[columns_to_check].apply(lambda row: (row != 'No').sum(), axis=1)
|
69 |
+
|
70 |
+
# Step 2: Create scatter plot with pred_stars on x-axis and no_count on y-axis, color-coded by venue
|
71 |
+
fig = px.scatter(
|
72 |
+
df,
|
73 |
+
x='pred_citations',
|
74 |
+
y='no_count',
|
75 |
+
color='venue',
|
76 |
+
title='Number of "No" Answers vs Predicted Stars, Color Coded by Venue',
|
77 |
+
labels={'pred_stars': 'Predicted Stars', 'no_count': 'Automated Reproducibility score (0-6)'},
|
78 |
+
category_orders={'venue': custom_order}, # Ensure custom order for venue if necessary
|
79 |
+
log_x=True
|
80 |
+
)
|
81 |
+
|
82 |
+
# Step 3: Display the scatter plot
|
83 |
+
fig.show()
|
84 |
+
|
85 |
+
# List of columns to check for "No"
|
86 |
+
columns_to_check = ["pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]
|
87 |
+
|
88 |
+
# Step 1: Calculate the number of "No" answers per row for the specified columns
|
89 |
+
df['no_count'] = df[columns_to_check].apply(lambda row: (row != 'No').sum(), axis=1)
|
90 |
+
|
91 |
+
# Step 2: Create a strip plot (scatter-like) with jitter to show individual "No" counts
|
92 |
+
fig = px.strip(
|
93 |
+
df,
|
94 |
+
x='venue',
|
95 |
+
y='no_count',
|
96 |
+
color='venue',
|
97 |
+
title='Individual "No" Scores with Jitter per Venue',
|
98 |
+
labels={'no_count': 'Automated Reproducibility Score (0-6)', 'venue': 'Venue'},
|
99 |
+
category_orders={'venue': custom_order}, # Ensure custom order for venues
|
100 |
+
stripmode='overlay' # Allows all individual points to overlay each other
|
101 |
+
)
|
102 |
+
|
103 |
+
# Step 3: Add some jitter to the x-axis so points don't overlap
|
104 |
+
fig.update_traces(jitter=0.3, marker={'size': 8}, selector=dict(mode='markers'))
|
105 |
+
|
106 |
+
# Step 4: Optionally overlay a bar plot or box plot to show mean/median and spread
|
107 |
+
fig.add_trace(px.box(
|
108 |
+
df,
|
109 |
+
x='venue',
|
110 |
+
y='no_count',
|
111 |
+
category_orders={'venue': custom_order}
|
112 |
+
).data[0]) # We add the first trace of the box plot to overlay
|
113 |
+
|
114 |
+
# Step 5: Show the plot
|
115 |
+
fig.show()
|
116 |
+
|
117 |
+
for topic in ["pred_live", "pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]:
|
118 |
+
# Calculate total number of URLs per venue
|
119 |
+
total_urls_per_venue = df.groupby('venue').size().reset_index(name='total_urls')
|
120 |
+
|
121 |
+
# Calculate the number of URLs with errors per venue
|
122 |
+
errors_per_venue = df[df[topic] != "No"].groupby('venue').size().reset_index(name='errors')
|
123 |
+
|
124 |
+
# Merge the DataFrames to calculate the error rate
|
125 |
+
error_rate_df = pd.merge(total_urls_per_venue, errors_per_venue, on='venue', how='left')
|
126 |
+
error_rate_df['errors'] = error_rate_df['errors'].fillna(0) # Replace NaN with 0 for venues with no errors
|
127 |
+
error_rate_df['error_rate'] = error_rate_df['errors'] / error_rate_df['total_urls']
|
128 |
+
|
129 |
+
# Plot the error rates using Plotly, with venue on x-axis
|
130 |
+
fig = px.bar(
|
131 |
+
error_rate_df,
|
132 |
+
x='venue',
|
133 |
+
y='error_rate',
|
134 |
+
color='venue',
|
135 |
+
title=f'Success Rate per Venue for "{topic}"',
|
136 |
+
labels={'error_rate': 'Success Rate', 'venue': 'Venue'},
|
137 |
+
category_orders={'venue': custom_order}
|
138 |
+
)
|
139 |
+
|
140 |
+
fig.update_yaxes(range=[0, 1])
|
141 |
+
fig.show()
|