Spaces:

attilasimko
/

reproduce

Sleeping

App Files Files Community

attilasimko commited on Oct 2, 2024

Commit

77f290b

1 Parent(s): cd14e4d

What did I do before then?

Browse files

Files changed (25) hide show

.env +1 -0
.gitignore +2 -0
data/fetch_arxiv.py +84 -0
data/fetch_miccai.py +60 -0
data/fetch_nature.py +68 -0
data/fetch_processed.py +30 -0
evaluations/__pycache__/documentation.cpython-310.pyc +0 -0
evaluations/__pycache__/license.cpython-310.pyc +0 -0
evaluations/__pycache__/repo_evaluations.cpython-310.pyc +0 -0
evaluations/__pycache__/requirements.cpython-310.pyc +0 -0
evaluations/__pycache__/training.cpython-310.pyc +0 -0
evaluations/__pycache__/utils.cpython-310.pyc +0 -0
evaluations/__pycache__/validating.cpython-310.pyc +0 -0
evaluations/__pycache__/weights.cpython-310.pyc +0 -0
evaluations/documentation.py +113 -0
evaluations/license.py +29 -0
evaluations/repo_evaluations.py +150 -0
evaluations/requirements.py +24 -0
evaluations/training.py +35 -0
evaluations/utils.py +131 -0
evaluations/validating.py +36 -0
evaluations/weights.py +52 -0
midl.py +10 -0
plotting/paper_plots.py +89 -0
plotting/result_plots.py +141 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ githubToken="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"

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ data/*.csv
2	+ data/*.zip

data/fetch_arxiv.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import pandas as pd
+import requests
+import pdfplumber
+import re
+from multiprocessing import Pool, cpu_count
+from functools import partial
+import os
+# Function to process each URL
+def process_arxiv_paper(article_link):
+    try:
+        article_text = requests.get(article_link).text
+        title_pattern = r'<h1 class="title mathjax"><span class="descriptor">Title:</span>(.*?)</h1>'
+        title = re.findall(title_pattern, article_text, re.DOTALL)[0]
+        year_pattern = r'\[Submitted on(?:.*?(\d{1,2} \w+ \d{4}))(?:.*?)]'
+        year = re.findall(year_pattern, article_text)[0].split(" ")[-1]
+        article_id = article_link.split("/")[-1]
+        pdf_url = f'https://arxiv.org/pdf/{article_id}'
+        response = requests.get(pdf_url)
+        if response.status_code == 200:
+            with open(f"{article_id}.pdf", 'wb') as file:
+                file.write(response.content)
+        if (response.status_code == 404):
+            print("Failed to fetch pdf")
+            return None
+        urls = []
+        link_pattern = r'(https?://(?:www\.)?github\.com[^\s]+)'
+        with pdfplumber.open(f"{article_id}.pdf") as pdf:
+            # Loop through all pages
+            for page_num, page in enumerate(pdf.pages):
+                # Extract text from the page
+                text = page.extract_text()
+                # Search for a specific word or phrase
+                found_urls = re.findall(link_pattern, text)
+                urls.extend(found_urls)
+        os.remove(f"{article_id}.pdf")
+        urls = [url for url in urls if ("pytorch" not in url) & ("fchollet" not in url) & (len(url.split("github.com")[1].split("/")) >= 3)]
+        print(urls)
+        url = urls[0] if len(urls) > 0 else ""
+        # Return a dictionary of the results
+        return {"venue": "arXiv", "title": title, "url": url, "year": year}
+    except Exception as e:
+        print(f"Error processing {article_link}: {e}")
+        return None
+# Set debug mode
+debug = False
+# Fetch all URLs for each year
+all_year_urls = []
+page_size = 50
+search_queries = ['https://arxiv.org/search/advanced?advanced=1&terms-0-operator=AND&terms-0-term=deep+learning&terms-0-field=abstract&terms-1-operator=AND&terms-1-term=cancer&terms-1-field=abstract&classification-physics_archives=all&classification-include_cross_list=include&date-year=&date-filter_by=date_range&date-from_date=2018&date-to_date=2024&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first&start=']
+articles = []
+for search_query in search_queries:
+    page = 0
+    while (page <= 100):
+        start_idx = page_size * page
+        url = f"{search_query}{start_idx}"
+        current_page = requests.get(url).text
+        pattern = r'<p class="list-title is-inline-block">.*?<a href="([^"]+)"'
+        matches = re.findall(pattern, current_page)
+        if (len(matches) == 0):
+            break
+        else:
+            page += 1
+        articles += matches
+articles = np.unique(articles)
+# Parallel processing using Pool
+if __name__ == "__main__":
+    with Pool(processes=4) as pool:
+        results = pool.starmap(process_arxiv_paper, [[article] for article in articles])
+    # Filter out any None results due to errors
+    results = [result for result in results if result is not None]
+    # Convert the list of dictionaries to a DataFrame
+    arxiv = pd.DataFrame(results)
+    arxiv.to_csv('arxiv.csv')

data/fetch_miccai.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import pandas as pd
+import requests
+import re
+from multiprocessing import Pool, cpu_count
+from functools import partial
+# Function to process each URL
+def process_paper(year, url):
+    try:
+        paper_page = requests.get(url).text
+        # Find title
+        title_pattern = r'<title>(.*?)\s*</title>'
+        title_match = re.search(title_pattern, paper_page, re.DOTALL)
+        title = title_match.group(1)
+        # Find the code repository link
+        code_repo_pattern = r'<h1 id="code-id">.*?</h1>\s*<p><a href="(.*?)">'
+        code_repo_match = re.search(code_repo_pattern, paper_page, re.DOTALL)
+        code_repo_link = code_repo_match.group(1) if code_repo_match else ""
+        # Find the dataset information
+        dataset_pattern = r'<h1 id="dataset-id">.*?</h1>\s*<p>(.*?)\s*<br />'
+        dataset_match = re.search(dataset_pattern, paper_page, re.DOTALL)
+        dataset_info = "Yes" if dataset_match else "No"
+        # Return a dictionary of the results
+        return {"title": title, "url": code_repo_link, "year": year, "public": dataset_info}
+    except Exception as e:
+        print(f"Error processing {url}: {e}")
+        return None
+current_year = 2024  # Update with the current year
+MICCAI_pages = ["https://miccai2021.org/openaccess/paperlinks/", "https://conferences.miccai.org/2022/papers/", "https://conferences.miccai.org/2023/papers/"]
+MICCAI_root = ["https://miccai2021.org/openaccess/paperlinks/", "https://conferences.miccai.org", "https://conferences.miccai.org"]
+years = [2021, 2022, 2023]
+# Set debug mode
+debug = False
+# Fetch all URLs for each year
+all_year_urls = []
+for i in range(len(MICCAI_pages)):
+    year_page = requests.get(MICCAI_pages[i]).text
+    print(year_page)
+    urls = [MICCAI_root[i] + line.split('href="')[1].split('"')[0] for line in year_page.split('\n') if "&bullet" in line]
+    all_year_urls.extend([(years[i], url) for url in urls])
+print(all_year_urls)
+# Parallel processing using Pool
+# if __name__ == "__main__":
+#     with Pool(processes=12) as pool:  # Use 12 processes
+#         results = pool.starmap(process_paper, all_year_urls)
+#     # Filter out any None results due to errors
+#     results = [result for result in results if result is not None]
+#     miccai = pd.DataFrame(results)
+#     # miccai = pd.DataFrame( OrderedDict( { 'title': pd.Series(a), 'b': pd.Series(b), 'c': pd.Series(c) } ) )
+#     miccai.to_csv('miccai.csv')

data/fetch_nature.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pandas as pd
+import requests
+import re
+from multiprocessing import Pool, cpu_count
+from functools import partial
+# Function to process each URL
+def process_nature_paper(article_link):
+    try:
+        url = f'https://www.nature.com/articles/{article_link}'
+        article_text = requests.get(url).text
+        pattern = r'Code availability.*?<a href="([^"]+)"'
+        matches = re.findall(pattern, article_text, re.DOTALL)
+        urls = [link for link in matches if "github" in link]
+        url = urls[0] if len(urls) > 0 else (matches[0] if len(matches) > 0 else "")
+        year = re.findall(r'datetime="(\d{4})', article_text)[0]
+        # # Find title
+        title_pattern = r'<title>(.*?)\s*</title>'
+        title = re.findall(title_pattern, article_text, re.DOTALL)[0]
+        pattern = r'Data availability.*?<a href="([^"]+)"'
+        matches = re.findall(pattern, article_text, re.DOTALL)
+        dataset_info = "Yes" if (len(matches) > 0) else "No"
+        # # Return a dictionary of the results
+        return {"title": title, "url": url, "year": year, "public": dataset_info, "pdf": ""}
+    except Exception as e:
+        print(f"Error processing {url}: {e}")
+        return None
+# Set debug mode
+debug = False
+# Fetch all URLs for each year
+all_year_urls = []
+search_queries = ["https://www.nature.com/search?q=deep+learning&order=relevance&journal=commsmed%2Cnm&page=", "https://www.nature.com/search?q=AI&order=relevance&journal=commsmed%2Cnm&page="]
+articles = []
+for search_query in search_queries:
+    page = 1
+    while (page <= 100):
+        url = f"{search_query}{page}"
+        current_page = requests.get(url).text
+        pattern = r'href="/articles/([^"]+)"'
+        matches = re.findall(pattern, current_page)
+        if (len(matches) == 0):
+            break
+        else:
+            page += 1
+        articles += matches
+articles = np.unique(articles)
+# Parallel processing using Pool
+if __name__ == "__main__":
+    with Pool(processes=12) as pool:
+        results = pool.starmap(process_nature_paper, [[article] for article in articles])
+    # Filter out any None results due to errors
+    results = [result for result in results if result is not None]
+    # Convert the list of dictionaries to a DataFrame
+    nature = pd.DataFrame(results)
+    nature = nature[['title', 'year', 'pdf', 'url', 'public']]
+    nature.to_csv('nature.csv')

data/fetch_processed.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import csv
+import numpy as np
+import pandas as pd
+import re
+current_year = 2024
+MIDL_years = range(2018, current_year + 1, 1)
+custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
+for venue in custom_order:
+    df = pd.read_excel("https://docs.google.com/spreadsheets/d/e/2PACX-1vQjpsSYcEcYUVB-88bCQ01UfQf0z9m16ax7p1ft03G68Nr-DdXHpPt-xOFSrXFj1N49AjK5nYhmKBfo/pub?output=xlsx", sheet_name=venue)
+    df.to_csv(f'data/{venue}.csv', sep="\t")
+# Store all evaluations here
+paper_dump = pd.DataFrame()
+# Official color codes for conferences
+MIDL_colors = ["#506775", "#4E7268", "#5170B1", "#004B5A", "#268BCC", "#B18630", "#AA0000"]
+for venue in custom_order:
+    with open(f'data/{venue}.csv') as file:
+        tsv_file = csv.reader(file, delimiter="\t")
+        for row in tsv_file:
+            if (row[0] == ""):
+                continue
+            if (row[1] == ""):
+                continue
+            paper_dump = pd.concat([paper_dump, pd.DataFrame({"venue": venue, "title": [row[1]], "year": [row[2]], "pdf": [row[3]], "url": [row[4]], "public": [row[5]], "dependencies": [row[6]], "training": [row[7]], "evaluation": [row[8]], "weights": [row[9]], "readme": [row[10]], "license": [row[11]]})], ignore_index=True)
+paper_dump.to_csv(f'data/dump.csv', sep="\t")

evaluations/__pycache__/documentation.cpython-310.pyc ADDED Viewed

Binary file (3.11 kB). View file

evaluations/__pycache__/license.cpython-310.pyc ADDED Viewed

Binary file (1.27 kB). View file

evaluations/__pycache__/repo_evaluations.cpython-310.pyc ADDED Viewed

Binary file (4.63 kB). View file

evaluations/__pycache__/requirements.cpython-310.pyc ADDED Viewed

Binary file (1.56 kB). View file

evaluations/__pycache__/training.cpython-310.pyc ADDED Viewed

Binary file (1.17 kB). View file

evaluations/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (4.01 kB). View file

evaluations/__pycache__/validating.cpython-310.pyc ADDED Viewed

Binary file (1.23 kB). View file

evaluations/__pycache__/weights.cpython-310.pyc ADDED Viewed

Binary file (2.41 kB). View file

evaluations/documentation.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from .utils import log
+import re
+import numpy as np
+def evaluate(verbose, llm, zip, readme):
+  log(verbose, "LOG", "\nEvaluating code documentation...")
+  overall = "No"
+  code_to_comment_ratio = get_code_to_comment_ratio(zip)
+  log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")
+  if (readme):
+      non_empty_rows = [row for row in readme.split("\n") if row != ""]
+      if (len(non_empty_rows) < 5):
+          log(verbose, "WARNING", "Readme file has very few lines")
+          return overall
+      if (count_code_lines(non_empty_rows) > 5):
+          log(verbose, "LOG", "Readme file contains python examples.")
+          overall = "Yes"
+          return overall
+      if (llm):
+          prompt = f'{readme}\n \
+          Is this README file is enough to find what \
+          package dependencies you need to install and how to train \
+          and evaluate the proposed model? Please strictly \
+          answer yes or no.\n\nA:'
+      manual_fail = False
+      if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
+          log(verbose, "ERROR", "Readme file missing training information")
+          overall = "No"
+      if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
+          log(verbose, "ERROR", "Readme file missing testing information")
+          overall = "No"
+      if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
+          log(verbose, "LOG", "Readme file contains links to examples")
+          overall = "Yes"
+      if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
+          (len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
+          (len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
+          log(verbose, "ERROR", "Readme file missing information about package dependencies")
+          overall = "No"
+  log(verbose, "ERROR", "Found no useful information in README file.")
+  return overall
+def count_comment_lines(lines):
+    # Initialize counters
+    single_line_comments = 0
+    multi_line_comments = 0
+    in_multiline_comment = False
+    for line in lines:
+        stripped_line = line.strip()
+        # Check for single-line comments
+        if stripped_line.startswith('#'):
+            single_line_comments += 1
+        # Check for multi-line comment (docstring) start or end
+        if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
+            if not in_multiline_comment:
+                # Starting a new multi-line comment
+                in_multiline_comment = True
+                multi_line_comments += 1  # Count the start line itself
+            else:
+                # Ending an existing multi-line comment
+                in_multiline_comment = False
+                multi_line_comments += 1  # Count the end line itself
+        elif in_multiline_comment:
+            # Continue counting lines within a multi-line comment
+            multi_line_comments += 1
+    return single_line_comments, multi_line_comments
+def get_code_to_comment_ratio(zip):
+    python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))]
+    code_line_count = 0
+    comment_line_count = 0
+    for file in python_files:
+        file_lines = zip.open(file).read().decode("utf-8").split('\n')
+        sl_comm, ml_comm = count_comment_lines(file_lines)
+        comment_line_count += sl_comm + ml_comm
+        code_line_count += len(file_lines) - (sl_comm + ml_comm)
+    code_to_comment_ratio = 100 * comment_line_count / code_line_count
+    return code_to_comment_ratio
+def count_code_lines(lines):
+    is_code_snippet = False
+    code_line_count = 0
+    for line in lines:
+        stripped_line = line.strip()
+        if stripped_line.startswith('```'):
+            if not is_code_snippet:
+                is_code_snippet = True
+                code_line_count += 1
+            else:
+                is_code_snippet = False
+                code_line_count += 1
+        elif is_code_snippet:
+            code_line_count += 1
+    return int(code_line_count / 2)

evaluations/license.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from .utils import log, model_predict
+import re
+def evaluate(verbose, llm, zip, readme):
+  log(verbose, "LOG", "\nEvaluating repository licensing...")
+  overall = "No"
+  license_files = [license for license in zip.namelist() if ((("LICENSE" in license) | ("license" in license)) & (len(license.split("/")) == 2))]
+  if (len(license_files) > 0):
+      license = zip.open(license_files[0]).read().decode("utf-8")
+      ans = [row for row in license.split("\n") if row != ""]
+      if (llm):
+          license = license[:50]
+          prompt = f"Q: {license}. This was an excerpt from a license \
+          file. Do you know the name of this license?"
+          ans = model_predict(prompt)
+          log(verbose, "LOG", f"Found license: {ans}")
+      else:
+          log(verbose, "LOG", f"Found license file: {license_files[0]}")
+      overall = "Yes"
+  if (readme):
+      if ("License" in readme):
+          log(verbose, "LOG", "License found in README.")
+          overall = "Yes"
+  log(verbose, "ERROR", "LICENSE file not found.")
+  return overall

evaluations/repo_evaluations.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import pandas as pd
+import os
+from evaluations import documentation, requirements, training, validating, license, weights
+from evaluations.utils import *
+import zipfile
+import os
+import numpy as np
+from huggingface_hub import InferenceClient
+API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
+headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
+client = InferenceClient(
+    "meta-llama/Llama-3.1-8B-Instruct",
+    token="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",
+)
+def init_llm(verbose):
+  log(verbose, "LOG", "Initializing LLM...")
+def evaluate(llm, verbose, repo_url, title=None, year=None):
+  repo_name = "data/repo.zip"
+  token = os.getenv("githubToken")
+  # token = userdata.get('githubToken')
+  if (llm):
+      init_llm(verbose)
+  else:
+      log(verbose, "LOG", "No LLM will be used for the evaluation.")
+  results = { "pred_live": "Yes", "pred_dependencies": None, "pred_training": None, "pred_evaluation": None, "pred_weights": None, "pred_readme": None, "pred_license": None, "pred_stars": None, "pred_citations": None, "pred_valid": False}
+  try:
+      if (get_api_link(repo_url) != ""):
+          results["pred_valid"] = True
+      username, repo_name = decompose_url(repo_url)
+      log(verbose, "LOG", f"Fetching github repository: https://github.com/{username}/{repo_name}")
+      fetch_repo(verbose, repo_url, repo_name, token)
+      if ((title != None) & (year != None) & (title != "") & (year != "")):
+          res = fetch_openalex(verbose, title, year)
+          if (res != None):
+              res = res["results"]
+              if (len(res) > 0):
+                  res = res[0]
+                  results["pred_citations"] = res["cited_by_count"]
+      if (not(os.path.exists(repo_name))):
+          results["pred_live"] = "No"
+          return results
+      zip = zipfile.ZipFile(repo_name)
+      readme = fetch_readme(zip)
+      results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
+      if (len(zip.namelist()) <= 2):
+          log(verbose, "LOG", "Empty repository")
+          results["pred_live"] = "No"
+          results["pred_training"] = "No"
+          results["pred_evaluation"] = "No"
+          results["pred_weights"] = "No"
+          results["pred_packages"] = "No"
+      else:
+          results["pred_dependencies"] = requirements.evaluate(verbose, llm, zip, readme)
+          results["pred_training"] = training.evaluate(verbose, llm, zip, readme)
+          results["pred_evaluation"] = validating.evaluate(verbose, llm, zip, readme)
+          results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
+          results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
+          results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
+          results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
+      return results
+  except Exception as e:
+      log(verbose, "ERROR", "Evaluating repository failed: " + str(e))
+      results["pred_live"] = "No"
+      return results
+def full_evaluations():
+  paper_dump = pd.read_csv("data/dump.csv", sep="\t")
+  repro = evaluate(None, False)
+  full_results = []
+  nth = 1
+  for idx, row in paper_dump.iterrows():
+      if (idx % nth != 0):
+          continue
+      if (row["url"] == ""):
+          continue
+      print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
+      result = evaluate(None, False, row["url"], row["title"], row["year"])
+      for column in result.keys():
+          row[column] = result[column]
+      full_results.append(row)
+def midl_evaluations():
+  compare_to_gt = True
+  paper_dump = pd.read_csv("data/dump.csv", sep="\t")
+  verbose = 1
+  eval_readme = []
+  eval_training = []
+  eval_evaluating = []
+  eval_licensing = []
+  eval_weights = []
+  eval_dependencies = []
+  full_results = []
+  for idx, row in paper_dump.iterrows():
+      if (row["venue"] != "MIDL"):
+          continue
+      if (row["venue"] == 2024):
+          continue
+      if (row["url"] == ""):
+          continue
+      print(f"\nEvaluating {idx+1} out of {len(paper_dump.index)} papers...")
+      print(f'Paper title - "{row["title"]}" ({row["year"]})')
+      print(f'Repository link - {row["url"]}')
+      result = evaluate(None, verbose, row["url"])
+      for column in result.keys():
+          row[column] = result[column]
+      full_results.append(row)
+      if (compare_to_gt):
+          print("\nSummary:")
+          if ((row["pred_dependencies"] is not None) & (row["dependencies"] != "")):
+              eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
+              print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
+          if ((row["pred_training"] is not None) & (row["training"] != "")):
+              eval_training.append(row["training"] == row["pred_training"])
+              print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
+          if ((row["pred_evaluation"] is not None) & (row["evaluation"] != "")):
+              eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
+              print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
+          if ((row["pred_weights"] is not None) & (row["weights"] != "")):
+              eval_weights.append(row["weights"] == row["pred_weights"])
+              print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
+          if ((row["pred_readme"] is not None) & (row["readme"] != "")):
+              eval_readme.append(row["readme"] == row["pred_readme"])
+              print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
+          if ((row["pred_license"] is not None) & (row["license"] != "")):
+              eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
+              print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")

evaluations/requirements.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from .utils import log
+def evaluate(verbose, llm, zip, readme):
+  log(verbose, "LOG", "\nLooking for package dependencies for running the code...")
+  overall = "No"
+  scripts = [file_path for file_path in zip.namelist() if ((file_path.endswith(".py") | file_path.endswith(".ipynb")))]
+  files = [file_path for file_path in zip.namelist() if (file_path.endswith(".yml") | file_path.endswith("setup.py") | file_path.endswith("requirements.txt") | ("requirement" in file_path) | ("package" in file_path))]
+  files = [file_path for file_path in files if len(file_path.split("/")) == 2]
+  for file in files:
+      log(verbose, "LOG", f"Found requirements file: {file}")
+      requirements = zip.open(file).read().decode("utf-8")
+      overall = "Yes"
+      if (len(requirements.split("\n")) < 5):
+          log(verbose, "WARNING", "Requirements file contains too few lines.")
+          overall = "No"
+  if (readme):
+      if (("requirement" in readme) | ("Requirement" in readme) | ("Dependenc" in readme) | ("dependenc" in readme) | (len([row for row in readme.split("\n") if (("#" in row) & (("environment" in row) | ("Environment" in row)))]) > 0)):
+          log(verbose, "LOG", "Found dependencies in README file")
+          overall = "Yes"
+  return overall

evaluations/training.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from .utils import log
+import re
+def evaluate(verbose, llm, zip, readme):
+  log(verbose, "LOG", "\nLooking for code to train the model...")
+  overall = "No"
+  patterns = {
+      'tensorflow': [
+          r'model\.(fit|compile|train_on_batch)',
+          r'tf\.GradientTape'
+      ],
+      'pytorch': [
+          r'model\.(train|forward)',
+          r'loss\.backward',
+          r'optimizer\.step',
+      ]
+  }
+  files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".py") | file_path.endswith(".ipynb")))]
+  for file_path in files:
+      code = zip.open(file_path).read().decode("utf-8")
+      for framework, regex_list in patterns.items():
+          for pattern in regex_list:
+              if re.search(pattern, code):
+                  log(verbose, "LOG", f"Found code for training a model in {framework} framework in file: {file_path}")
+                  overall = "Yes"
+  if (readme):
+      if (("train" in readme)):
+          log(verbose, "LOG", "Found something about training in README file")
+          overall = "Yes"
+  return overall

evaluations/utils.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import time
+import requests
+import time
+import os
+import json
+import streamlit as st
+def model_predict(client, prompt):
+    for message in client.chat_completion(
+        messages=[{"role": "system", "content": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with Yes or No."}, {"role": "user", "content": prompt}],
+        max_tokens=500,
+        stream=True,
+        ):
+        return message.choices[0].delta.content
+    return ""
+def get_api_link(url):
+    username, repo_name = decompose_url(url)
+    if (username == None):
+        return ""
+    return f"https://api.github.com/repos/{username}/{repo_name}/zipball/"
+def decompose_url(url):
+    try:
+        url = url.split("github.com")[1]
+        url = url.strip(".")
+        url = url.split(".git")[0]
+        url = url.strip("/")
+        parts = url.split("/")
+        username = parts[0]
+        repo_name = parts[1]
+        return username, repo_name
+    except:
+        return ""
+def fetch_repo_stars(verbose, repo_url, token):
+    headers = {"Authorization": f"token {token}"}
+    api_url = get_api_link(repo_url)
+    api_url = api_url.replace("/zipball/", "")
+    # Sending GET request to GitHub API
+    response = requests.get(api_url, headers=headers)
+    if response.status_code == 200:
+        return json.loads(response.content)["stargazers_count"]
+    if (response.status_code == 404):
+        log(verbose, "ERROR", "Repository private.")
+def fetch_repo(verbose, repo_url, repo_name, token):
+    if (os.path.exists(repo_name)):
+        os.remove(repo_name)
+    if ("github.com" not in repo_url):
+        log(verbose, "ERROR", f"URL not for github repo, please evaluate manually ({repo_url}).")
+        return
+    headers = {"Authorization": f"token {token}"}
+    api_url = get_api_link(repo_url)
+    if (api_url == ""):
+        log(verbose, "ERROR", f"Failed to parse the URL, please evaluate manually ({repo_url}).")
+        return
+    # Sending GET request to GitHub API
+    response = requests.get(api_url, headers=headers)
+    if response.status_code == 200:
+        with open(repo_name, 'wb') as file:
+            file.write(response.content)
+        log(verbose, "LOG", "Repository downloaded successfully")
+    if (response.status_code == 404):
+        log(verbose, "ERROR", "Repository private.")
+def fetch_readme(zip):
+    readme_files = [readme for readme in zip.namelist() if ((readme.endswith("README.MD") | readme.endswith("README.md") | readme.endswith("readme.md")) & (len(readme.split("/")) == 2))]
+    readme = ""
+    for readme_file in readme_files:
+        readme += zip.open(readme_file).read().decode("utf-8") + "\n\n"
+    return readme
+def fetch_license(zip):
+    license_files = [license for license in zip.namelist() if (("LICENSE" in license) & (len(license.split("/")) == 2))]
+    license = None
+    if (len(license_files) > 0):
+        license = zip.open(license_files[0]).read().decode("utf-8")
+    return license
+def fetch_openalex(verbose, paper_name, year):
+    api_url = f"https://api.openalex.org/works?filter=default.search:{paper_name},publication_year:{year}"
+    response = requests.get(api_url)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        log(verbose, "WARNING", "Could not find OpenAlex information for paper.")
+def log(verbose, log_type, log_text, hf=False):
+    if (verbose == 0):
+        return
+    if (log_type == "LOG"):
+        log_text = f"LOG: {log_text}"
+    if (log_type == "ERROR"):
+        log_text = f"ERROR: {log_text}"
+    if (log_type == "WARNING"):
+        log_text = f"WARNING: {log_text}"
+    # Align line-break
+    if (log_text.startswith("\n")):
+        print("\n")
+        log_text = log_text.lstrip('\n')
+    if (verbose == 1):
+        print(log_text)
+        return
+    if (verbose == 2):
+        st.write(log_text)
+        return
+    raise Exception(log_text)

evaluations/validating.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from .utils import log
+import re
+def evaluate(verbose, llm, zip, readme):
+  log(verbose, "LOG", "\nLooking for examples for running the model...")
+  overall = "No"
+  patterns = {
+      'tensorflow': [
+          r'tf\.keras\.models\.load_model',  # TensorFlow model loading
+          r'tf\.saved_model\.load',
+          r'model\.predict',  # Running inference
+          r'model\(.+\)'  # Direct model invocation for inference
+      ],
+      'pytorch': [
+          r'torch\.load',  # PyTorch model loading
+          r'torch\.jit\.load',  # PyTorch JIT model loading
+          r'model\.eval',  # Running inference
+          r'model\(.+\)'  # Direct model invocation for inference
+      ]
+  }
+  files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".py") | file_path.endswith(".ipynb")))]
+  for file_path in files:
+      code = zip.open(file_path).read().decode("utf-8")
+      for framework, regex_list in patterns.items():
+          for pattern in regex_list:
+              if re.search(pattern, code):
+                  log(verbose, "LOG", f"Found code for evaluating a model in {framework} framework in file: {file_path}")
+                  overall = "Yes"
+  if (readme):
+      if ((len(re.findall("testing", readme)) > 0)):
+              log(verbose, "LOG", "Found information about evaluations in readme")
+              overall = "Yes"
+  return overall

evaluations/weights.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from .utils import log
+import re
+def evaluate(verbose, llm, zip, readme):
+  log(verbose, "LOG", "\nLooking for pre-trained model weights...")
+  overall = "No"
+  files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".h5") | file_path.endswith(".pth") | file_path.endswith(".torch") | file_path.endswith(".pt") | file_path.endswith(".tar.gz") | file_path.endswith("checkpoint.pt") | ("weights" in file_path) | file_path.endswith("ckpt")))]
+  if (len(files) > 0):
+      log(verbose, "LOG", f"Found model weights: {files}")
+      overall = "Yes"
+      return overall
+  if (readme):
+      url_pattern = r'(https?://[^\s]+)'
+      urls = re.findall(url_pattern, readme)
+      if (len([url for url in urls if "pth" in url]) > 0):
+          log(verbose, "LOG", "Found a link to pre-trained weights in readme")
+          overall = "Yes"
+          return overall
+      readme_lines = readme.split("\n")
+      if (len([row for row in readme_lines if ((len(re.findall("pretrained", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
+          log(verbose, "LOG", "Found a link for 'pretrained' something in readme")
+          overall = "Yes"
+          return overall
+      if (len([row for row in readme_lines if ((len(re.findall("pre-trained", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
+          log(verbose, "LOG", "Found a link for 'pre-trained' something in readme")
+          overall = "Yes"
+          return overall
+      if (len([row for row in readme_lines if ((len(re.findall("weight", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
+          log(verbose, "LOG", "Found a link for 'weight' something in readme")
+          overall = "Yes"
+          return overall
+      if (len([row for row in readme_lines if ((len(re.findall("download", row, re.IGNORECASE)) > 0) & (len(re.findall("model", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
+          log(verbose, "LOG", "Found a link for 'model' something in readme")
+          overall = "Yes"
+          return overall
+      if (llm):
+          prompt = f"{readme}\nQ: Does this text contain a download link for the model pre-trained weights?"
+          ans = model_predict(prompt)
+          if (("Yes" in ans) & ("No" not in ans)):
+              log(verbose, "LOG", "The LLM found signs for accessing the pre-trained weights from the readme")
+              overall = "Yes"
+              return overall
+  log(verbose, "ERROR", "Found no pre-trained model weights.")
+  return overall

midl.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from evaluations.repo_evaluations import midl_evaluations
+# importing os module for environment variables
+import os
+# importing necessary functions from dotenv library
+from dotenv import load_dotenv, dotenv_values
+# loading variables from .env file
+load_dotenv()
+token = os.getenv("githubToken")
+midl_evaluations()

plotting/paper_plots.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import plotly.express as px
+import numpy as np
+paper_dump = pd.read_csv('data/dump.csv', sep="\t")
+# Calculate total number of URLs per year and venue
+custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
+total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')
+# Calculate the number of URLs with errors per year and venue
+total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')
+# Merge the DataFrames to calculate the error rate
+merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
+merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']
+# Plot the error rates using Plotly, with year on x-axis and color by venue
+fig = px.bar(
+    merged_df,
+    x='year',
+    y='total_titles',
+    color='venue',
+    barmode='group',
+    title=f'Number of papers per venue',
+    labels={'error_rate': 'Success Rate', 'year': 'Year'},
+    category_orders={'venue': custom_order}
+)
+fig.update_xaxes(range=[2018, 2024])
+fig.show()
+import plotly.express as px
+import numpy as np
+# Calculate total number of URLs per year and venue
+total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')
+# Calculate the number of URLs with errors per year and venue
+total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')
+# Merge the DataFrames to calculate the error rate
+merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
+merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']
+# Plot the error rates using Plotly, with year on x-axis and color by venue
+fig = px.bar(
+    merged_df,
+    x='year',
+    y='total_titles',
+    color='venue',
+    barmode='group',
+    title=f'Number of papers per venue',
+    labels={'error_rate': 'Success Rate', 'year': 'Year'},
+    category_orders={'venue': custom_order}
+)
+fig.update_xaxes(range=[2018, 2024])
+fig.show()
+# Plot the error rates using Plotly, with year on x-axis and color by venue
+fig = px.bar(
+    merged_df,
+    x='year',
+    y='total_urls',
+    color='venue',
+    barmode='group',
+    title=f'Number of papers per venue',
+    labels={'error_rate': 'Success Rate', 'year': 'Year'},
+    category_orders={'venue': custom_order}
+)
+fig.update_xaxes(range=[2018, 2024])
+fig.show()
+# Plot the error rates using Plotly, with year on x-axis and color by venue
+fig = px.bar(
+    merged_df,
+    x='year',
+    y='repo_rate',
+    color='venue',
+    barmode='group',
+    title=f'Number of repositories per venue',
+    labels={'error_rate': 'Success Rate', 'year': 'Year'},
+    category_orders={'venue': custom_order}
+)
+fig.update_xaxes(range=[2018, 2024])
+fig.update_yaxes(range=[0, 1])
+fig.show()

plotting/result_plots.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import plotly.express as px
+import pandas as pd
+df = pd.read_csv('data/results.csv', sep="\t")
+custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
+# Calculate total number of URLs per year and venue
+total_urls_per_year_venue = df.groupby(['year', 'venue']).size().reset_index(name='total_urls')
+# Calculate the number of URLs with errors per year and venue
+errors_per_year_venue = df[df["pred_valid"] != False].groupby(['year', 'venue']).size().reset_index(name='errors')
+# Merge the DataFrames to calculate the error rate
+error_rate_df = pd.merge(total_urls_per_year_venue, errors_per_year_venue, on=['year', 'venue'], how='left')
+error_rate_df['errors'] = error_rate_df['errors'].fillna(0)  # Replace NaN with 0 for venues with no errors
+error_rate_df['error_rate'] = error_rate_df['errors'] / error_rate_df['total_urls']
+# Plot the error rates using Plotly, with year on x-axis and color by venue
+fig = px.bar(
+    error_rate_df,
+    x='year',
+    y='error_rate',
+    color='venue',
+    barmode='group',
+    title=f'Success Rate per Venue and Year for "valid_url"',
+    labels={'error_rate': 'Success Rate', 'year': 'Year'},
+    category_orders={'venue': custom_order}
+)
+fig.update_yaxes(range=[0, 1])
+fig.update_xaxes(range=[2017.5, 2024.5])
+fig.show()
+for topic in ["pred_live", "pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]:
+    # Calculate total number of URLs per year and venue
+    total_valid_urls_per_year_venue = df[df["pred_valid"] == True].groupby(['year', 'venue']).size().reset_index(name='total_urls')
+    # Calculate the number of URLs with errors per year and venue
+    passes_per_year_venue = df[df[topic] != "No"].groupby(['year', 'venue']).size().reset_index(name='successes')
+    # Merge the DataFrames to calculate the error rate
+    success_rate_df = pd.merge(total_urls_per_year_venue, passes_per_year_venue, on=['year', 'venue'], how='left')
+    success_rate_df['successes'] = success_rate_df['successes'].fillna(0)  # Replace NaN with 0 for venues with no errors
+    success_rate_df['success_rate'] = success_rate_df['successes'] / success_rate_df['total_urls']
+    # Plot the error rates using Plotly, with year on x-axis and color by venue
+    fig = px.bar(
+        success_rate_df,
+        x='year',
+        y='success_rate',
+        color='venue',
+        barmode='group',
+        title=f'Success Rate per Venue and Year for "{topic}"',
+        labels={'error_rate': 'Success Rate', 'year': 'Year'},
+        category_orders={'venue': custom_order}
+    )
+    fig.update_yaxes(range=[0, 1])
+    fig.update_xaxes(range=[2017.5, 2024.5])
+    fig.show()
+# List of columns to check for "No"
+columns_to_check = ["pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]
+# Step 1: Calculate the number of "No" answers per row for the specified columns
+df['no_count'] = df[columns_to_check].apply(lambda row: (row != 'No').sum(), axis=1)
+# Step 2: Create scatter plot with pred_stars on x-axis and no_count on y-axis, color-coded by venue
+fig = px.scatter(
+    df,
+    x='pred_citations',
+    y='no_count',
+    color='venue',
+    title='Number of "No" Answers vs Predicted Stars, Color Coded by Venue',
+    labels={'pred_stars': 'Predicted Stars', 'no_count': 'Automated Reproducibility score (0-6)'},
+    category_orders={'venue': custom_order},  # Ensure custom order for venue if necessary
+    log_x=True
+)
+# Step 3: Display the scatter plot
+fig.show()
+# List of columns to check for "No"
+columns_to_check = ["pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]
+# Step 1: Calculate the number of "No" answers per row for the specified columns
+df['no_count'] = df[columns_to_check].apply(lambda row: (row != 'No').sum(), axis=1)
+# Step 2: Create a strip plot (scatter-like) with jitter to show individual "No" counts
+fig = px.strip(
+    df,
+    x='venue',
+    y='no_count',
+    color='venue',
+    title='Individual "No" Scores with Jitter per Venue',
+    labels={'no_count': 'Automated Reproducibility Score (0-6)', 'venue': 'Venue'},
+    category_orders={'venue': custom_order},  # Ensure custom order for venues
+    stripmode='overlay'  # Allows all individual points to overlay each other
+)
+# Step 3: Add some jitter to the x-axis so points don't overlap
+fig.update_traces(jitter=0.3, marker={'size': 8}, selector=dict(mode='markers'))
+# Step 4: Optionally overlay a bar plot or box plot to show mean/median and spread
+fig.add_trace(px.box(
+    df,
+    x='venue',
+    y='no_count',
+    category_orders={'venue': custom_order}
+).data[0])  # We add the first trace of the box plot to overlay
+# Step 5: Show the plot
+fig.show()
+for topic in ["pred_live", "pred_dependencies", "pred_training", "pred_evaluation", "pred_weights", "pred_readme", "pred_license"]:
+    # Calculate total number of URLs per venue
+    total_urls_per_venue = df.groupby('venue').size().reset_index(name='total_urls')
+    # Calculate the number of URLs with errors per venue
+    errors_per_venue = df[df[topic] != "No"].groupby('venue').size().reset_index(name='errors')
+    # Merge the DataFrames to calculate the error rate
+    error_rate_df = pd.merge(total_urls_per_venue, errors_per_venue, on='venue', how='left')
+    error_rate_df['errors'] = error_rate_df['errors'].fillna(0)  # Replace NaN with 0 for venues with no errors
+    error_rate_df['error_rate'] = error_rate_df['errors'] / error_rate_df['total_urls']
+    # Plot the error rates using Plotly, with venue on x-axis
+    fig = px.bar(
+        error_rate_df,
+        x='venue',
+        y='error_rate',
+        color='venue',
+        title=f'Success Rate per Venue for "{topic}"',
+        labels={'error_rate': 'Success Rate', 'venue': 'Venue'},
+        category_orders={'venue': custom_order}
+    )
+    fig.update_yaxes(range=[0, 1])
+    fig.show()