attilasimko commited on
Commit
ccf0698
·
1 Parent(s): febd197

new files, posible model

Browse files
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import streamlit as st
2
  from evaluations.repo_evaluations import evaluate
 
3
  import requests
4
 
 
 
5
  st.write("\n")
6
  st.write("Welcome to the online reproducibility evaluation tool!")
7
  st.write("We follow guidelines provided by Simkó et al. (2022) (https://arxiv.org/abs/2210.11146)")
@@ -11,7 +14,7 @@ repo_link = st.text_input("Github repository link:", value="", type="default", h
11
 
12
  if (repo_link):
13
  verbose = 4 if checkbox else 3
14
- evaluate(llm=None, verbose=verbose, repo_url=repo_link)
15
 
16
  with st.form("my_form"):
17
  st.write("Notice something wrong? Please tell us so we can improve.")
 
1
  import streamlit as st
2
  from evaluations.repo_evaluations import evaluate
3
+ from evaluations.models import LocalLLM
4
  import requests
5
 
6
+ model = LocalLLM("codellama/CodeLlama-7b-Instruct-hf")
7
+
8
  st.write("\n")
9
  st.write("Welcome to the online reproducibility evaluation tool!")
10
  st.write("We follow guidelines provided by Simkó et al. (2022) (https://arxiv.org/abs/2210.11146)")
 
14
 
15
  if (repo_link):
16
  verbose = 4 if checkbox else 3
17
+ evaluate(llm=model, verbose=verbose, repo_url=repo_link)
18
 
19
  with st.form("my_form"):
20
  st.write("Notice something wrong? Please tell us so we can improve.")
evaluations/documentation.py CHANGED
@@ -28,6 +28,7 @@ def evaluate(verbose, llm, zip, readme):
28
  package dependencies you need to install and how to train \
29
  and evaluate the proposed model? Please strictly \
30
  answer yes or no.\n\nA:'
 
31
 
32
 
33
  manual_fail = False
@@ -47,7 +48,7 @@ def evaluate(verbose, llm, zip, readme):
47
  (len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
48
  log(verbose, "ERROR", "Readme file missing information about package dependencies")
49
  overall = "No"
50
-
51
  return overall
52
 
53
  def count_comment_lines(lines):
 
28
  package dependencies you need to install and how to train \
29
  and evaluate the proposed model? Please strictly \
30
  answer yes or no.\n\nA:'
31
+ llm.predict("HELP", prompt)
32
 
33
 
34
  manual_fail = False
 
48
  (len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
49
  log(verbose, "ERROR", "Readme file missing information about package dependencies")
50
  overall = "No"
51
+
52
  return overall
53
 
54
  def count_comment_lines(lines):
evaluations/license.py CHANGED
@@ -1,4 +1,4 @@
1
- from .utils import log, model_predict
2
  import re
3
 
4
  def evaluate(verbose, llm, zip, readme):
@@ -10,10 +10,9 @@ def evaluate(verbose, llm, zip, readme):
10
  ans = [row for row in license.split("\n") if row != ""]
11
 
12
  if (llm):
13
- license = license[:50]
14
- prompt = f"Q: {license}. This was an excerpt from a license \
15
- file. Do you know the name of this license?"
16
- ans = model_predict(prompt)
17
  log(verbose, "LOG", f"Found license: {ans}")
18
  else:
19
  log(verbose, "LOG", f"Found license file: {license_files[0]}")
 
1
+ from .utils import log
2
  import re
3
 
4
  def evaluate(verbose, llm, zip, readme):
 
10
  ans = [row for row in license.split("\n") if row != ""]
11
 
12
  if (llm):
13
+ license = license
14
+ prompt = f"{license}. Please describe this type of license, what it allows and what it doesn't."
15
+ ans = llm.predict("HELP", prompt)
 
16
  log(verbose, "LOG", f"Found license: {ans}")
17
  else:
18
  log(verbose, "LOG", f"Found license file: {license_files[0]}")
evaluations/models.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from huggingface_hub import InferenceClient
3
+ import os
4
+
5
+ system_messages = { "STRICT": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with Yes or No.",
6
+ "HELP": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Please help me answer the following question." }
7
+
8
+ class LocalLLM():
9
+ def __init__(self, model_name):
10
+ self.pipe = pipeline("text-generation", model=model_name, max_new_tokens=1000, device_map={0: 0})
11
+
12
+ def predict(self, response_type, prompt):
13
+ messages = [
14
+ {"role": "system", "content": system_messages[response_type]},
15
+ {"role": "user", "content": prompt},
16
+ ]
17
+ res = self.pipe(messages)
18
+ res = res[0]["generated_text"]
19
+
20
+ res = [response for response in res if response["role"] == "assistant"][0]["content"]
21
+ res = res.strip()
22
+
23
+ return res
24
+
25
+ class RemoteLLM():
26
+ def __init__(self):
27
+ token = os.getenv("hfToken")
28
+ API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
29
+ headers = {"Authorization": f"Bearer {token}", "x-wait-for-model": "true"}
30
+
31
+ self.client = InferenceClient(
32
+ "meta-llama/Llama-3.1-8B-Instruct",
33
+ token=token,
34
+ )
35
+
36
+
37
+ def predict(self, response_type, prompt):
38
+ for message in self.client.chat_completion(
39
+ messages=[{"role": "system", "content": system_messages[response_type]},
40
+ {"role": "user", "content": prompt}],
41
+ max_tokens=500,
42
+ stream=True,
43
+ ):
44
+ return message.choices[0].delta.content
45
+
46
+ return ""
47
+
evaluations/pitfalls.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .utils import log, model_predict
2
+ import re
3
+
4
+ def evaluate(verbose, llm, zip, readme):
5
+ log(verbose, "TITLE", "\nLooking for common pitfalls...")
6
+
evaluations/repo_evaluations.py CHANGED
@@ -7,22 +7,11 @@ import os
7
  import numpy as np
8
  from huggingface_hub import InferenceClient
9
 
10
- API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
11
- headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
12
-
13
- client = InferenceClient(
14
- "meta-llama/Llama-3.1-8B-Instruct",
15
- token="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",
16
- )
17
-
18
  def evaluate(llm, verbose, repo_url, title=None, year=None):
19
  repository_zip_name = "data/repo.zip"
20
  token = os.getenv("githubToken")
21
- # token = userdata.get('githubToken')
22
 
23
- if (llm):
24
- init_llm(verbose)
25
- else:
26
  log(verbose, "LOG", "No LLM will be used for the evaluation.")
27
 
28
  results = { "pred_live": "Yes", "pred_dependencies": None, "pred_training": None, "pred_evaluation": None, "pred_weights": None, "pred_readme": None, "pred_license": None, "pred_stars": None, "pred_citations": None, "pred_valid": False}
@@ -54,9 +43,9 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
54
  readme = fetch_readme(zip)
55
  results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
56
 
57
-
58
  if (len(zip.namelist()) <= 2):
59
- log(verbose, "LOG", "Empty repository")
60
  results["pred_live"] = "No"
61
  results["pred_training"] = "No"
62
  results["pred_evaluation"] = "No"
@@ -69,7 +58,6 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
69
  results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
70
  results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
71
  results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
72
- results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
73
 
74
  return results
75
  except Exception as e:
@@ -94,7 +82,7 @@ def full_evaluation():
94
  full_results.append(row)
95
  return pd.DataFrame(full_results)
96
 
97
- def midl_evaluations():
98
  compare_to_gt = True
99
  paper_dump = pd.read_csv("data/dump.csv", sep="\t")
100
  verbose = 1
@@ -120,7 +108,7 @@ def midl_evaluations():
120
  print(f"\nEvaluating {idx+1} out of {len(paper_dump.index)} papers...")
121
  print(f'Paper title - "{row["title"]}" ({row["year"]})')
122
  print(f'Repository link - {row["url"]}')
123
- result = evaluate(None, verbose, row["url"])
124
  for column in result.keys():
125
  row[column] = result[column]
126
  full_results.append(row)
 
7
  import numpy as np
8
  from huggingface_hub import InferenceClient
9
 
 
 
 
 
 
 
 
 
10
  def evaluate(llm, verbose, repo_url, title=None, year=None):
11
  repository_zip_name = "data/repo.zip"
12
  token = os.getenv("githubToken")
 
13
 
14
+ if (not(llm)):
 
 
15
  log(verbose, "LOG", "No LLM will be used for the evaluation.")
16
 
17
  results = { "pred_live": "Yes", "pred_dependencies": None, "pred_training": None, "pred_evaluation": None, "pred_weights": None, "pred_readme": None, "pred_license": None, "pred_stars": None, "pred_citations": None, "pred_valid": False}
 
43
  readme = fetch_readme(zip)
44
  results["pred_stars"] = fetch_repo_stars(verbose, repo_url, token)
45
 
46
+ results["pred_license"] = license.evaluate(verbose, llm, zip, readme)
47
  if (len(zip.namelist()) <= 2):
48
+ log(verbose, "LOG", "The repository is empty.")
49
  results["pred_live"] = "No"
50
  results["pred_training"] = "No"
51
  results["pred_evaluation"] = "No"
 
58
  results["pred_weights"] = weights.evaluate(verbose, llm, zip, readme)
59
  results["pred_readme"] = documentation.evaluate(verbose, llm, zip, readme)
60
  results["pred_codetocomment"] = documentation.get_code_to_comment_ratio(zip)
 
61
 
62
  return results
63
  except Exception as e:
 
82
  full_results.append(row)
83
  return pd.DataFrame(full_results)
84
 
85
+ def midl_evaluations(model):
86
  compare_to_gt = True
87
  paper_dump = pd.read_csv("data/dump.csv", sep="\t")
88
  verbose = 1
 
108
  print(f"\nEvaluating {idx+1} out of {len(paper_dump.index)} papers...")
109
  print(f'Paper title - "{row["title"]}" ({row["year"]})')
110
  print(f'Repository link - {row["url"]}')
111
+ result = evaluate(model, verbose, row["url"])
112
  for column in result.keys():
113
  row[column] = result[column]
114
  full_results.append(row)
evaluations/utils.py CHANGED
@@ -6,16 +6,6 @@ import json
6
  import streamlit as st
7
 
8
 
9
- def model_predict(client, prompt):
10
- for message in client.chat_completion(
11
- messages=[{"role": "system", "content": "You are a chatbot evaluating github repositories, their python codes and corresponding readme files. Strictly answer the questions with Yes or No."}, {"role": "user", "content": prompt}],
12
- max_tokens=500,
13
- stream=True,
14
- ):
15
- return message.choices[0].delta.content
16
-
17
- return ""
18
-
19
 
20
  def get_api_link(url):
21
  username, repo_name = decompose_url(url)
@@ -72,10 +62,8 @@ def fetch_repo(verbose, repo_url, repo_name, token):
72
  if response.status_code == 200:
73
  with open(repo_name, 'wb') as file:
74
  file.write(response.content)
75
-
76
- log(verbose, "LOG", "Repository downloaded successfully")
77
  if (response.status_code == 404):
78
- log(verbose, "ERROR", "Repository private.")
79
 
80
  def fetch_readme(zip):
81
  readme_files = [readme for readme in zip.namelist() if ((readme.endswith("README.MD") | readme.endswith("README.md") | readme.endswith("readme.md")) & (len(readme.split("/")) == 2))]
 
6
  import streamlit as st
7
 
8
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def get_api_link(url):
11
  username, repo_name = decompose_url(url)
 
62
  if response.status_code == 200:
63
  with open(repo_name, 'wb') as file:
64
  file.write(response.content)
 
 
65
  if (response.status_code == 404):
66
+ log(verbose, "ERROR", "Repository private / Link broken.")
67
 
68
  def fetch_readme(zip):
69
  readme_files = [readme for readme in zip.namelist() if ((readme.endswith("README.MD") | readme.endswith("README.md") | readme.endswith("readme.md")) & (len(readme.split("/")) == 2))]
full_eval.py CHANGED
@@ -8,4 +8,4 @@ load_dotenv()
8
  token = os.getenv("githubToken")
9
 
10
  res = full_evaluation()
11
- res.to_csv("results.csv", sep="\t", index=False)
 
8
  token = os.getenv("githubToken")
9
 
10
  res = full_evaluation()
11
+ res.to_csv("data/results.csv", sep="\t", index=False)
midl.py CHANGED
@@ -1,11 +1,13 @@
1
  from evaluations.repo_evaluations import midl_evaluations
2
- # importing os module for environment variables
3
  import os
4
- # importing necessary functions from dotenv library
5
- from dotenv import load_dotenv
6
- # loading variables from .env file
7
  load_dotenv()
8
  token = os.getenv("githubToken")
9
 
10
- res = midl_evaluations()
 
 
 
 
11
  res.to_csv("results_midl.csv", sep="\t", index=False)
 
1
  from evaluations.repo_evaluations import midl_evaluations
2
+ from evaluations.models import LocalLLM
3
  import os
4
+ from dotenv import load_dotenv
 
 
5
  load_dotenv()
6
  token = os.getenv("githubToken")
7
 
8
+
9
+ # Load model directly
10
+
11
+ model = LocalLLM("codellama/CodeLlama-7b-Instruct-hf")
12
+ res = midl_evaluations(model)
13
  res.to_csv("results_midl.csv", sep="\t", index=False)