attilasimko commited on
Commit
6e3c928
·
1 Parent(s): f6e3be7

Push new changes

Browse files
evaluations/repo_evaluations.py CHANGED
@@ -10,15 +10,11 @@ from huggingface_hub import InferenceClient
10
  API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
11
  headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
12
 
13
-
14
  client = InferenceClient(
15
  "meta-llama/Llama-3.1-8B-Instruct",
16
  token="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",
17
  )
18
 
19
- def init_llm(verbose):
20
- log(verbose, "LOG", "Initializing LLM...")
21
-
22
  def evaluate(llm, verbose, repo_url, title=None, year=None):
23
  repository_zip_name = "data/repo.zip"
24
  token = os.getenv("githubToken")
@@ -35,11 +31,6 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
35
  if (get_api_link(repo_url) != ""):
36
  results["pred_valid"] = True
37
  else:
38
- results["pred_live"] = "No"
39
- results["pred_training"] = "No"
40
- results["pred_evaluation"] = "No"
41
- results["pred_weights"] = "No"
42
- results["pred_packages"] = "No"
43
  return results
44
 
45
  username, repo_name = decompose_url(repo_url)
@@ -86,17 +77,13 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
86
  results["pred_live"] = "No"
87
  return results
88
 
89
- def full_evaluations():
90
  paper_dump = pd.read_csv("data/dump.csv", sep="\t")
91
- repro = evaluate(None, False)
92
  full_results = []
93
 
94
- nth = 1
95
  for idx, row in paper_dump.iterrows():
96
- if (idx % nth != 0):
97
- continue
98
 
99
- if (row["url"] == ""):
100
  continue
101
 
102
  print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
@@ -105,6 +92,7 @@ def full_evaluations():
105
  row[column] = result[column]
106
 
107
  full_results.append(row)
 
108
 
109
  def midl_evaluations():
110
  compare_to_gt = True
@@ -138,21 +126,23 @@ def midl_evaluations():
138
  full_results.append(row)
139
  if (compare_to_gt):
140
  print("\nSummary:")
141
- if ((row["pred_dependencies"] is not None) & (row["dependencies"] != "")):
142
  eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
143
  print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
144
- if ((row["pred_training"] is not None) & (row["training"] != "")):
145
  eval_training.append(row["training"] == row["pred_training"])
146
  print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
147
- if ((row["pred_evaluation"] is not None) & (row["evaluation"] != "")):
148
  eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
149
  print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
150
- if ((row["pred_weights"] is not None) & (row["weights"] != "")):
151
  eval_weights.append(row["weights"] == row["pred_weights"])
152
  print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
153
- if ((row["pred_readme"] is not None) & (row["readme"] != "")):
154
  eval_readme.append(row["readme"] == row["pred_readme"])
155
  print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
156
- if ((row["pred_license"] is not None) & (row["license"] != "")):
157
  eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
158
- print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")
 
 
 
10
  API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
11
  headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
12
 
 
13
  client = InferenceClient(
14
  "meta-llama/Llama-3.1-8B-Instruct",
15
  token="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",
16
  )
17
 
 
 
 
18
  def evaluate(llm, verbose, repo_url, title=None, year=None):
19
  repository_zip_name = "data/repo.zip"
20
  token = os.getenv("githubToken")
 
31
  if (get_api_link(repo_url) != ""):
32
  results["pred_valid"] = True
33
  else:
 
 
 
 
 
34
  return results
35
 
36
  username, repo_name = decompose_url(repo_url)
 
77
  results["pred_live"] = "No"
78
  return results
79
 
80
+ def full_evaluation():
81
  paper_dump = pd.read_csv("data/dump.csv", sep="\t")
 
82
  full_results = []
83
 
 
84
  for idx, row in paper_dump.iterrows():
 
 
85
 
86
+ if (pd.isna(row["url"]) | (row["url"] == "")):
87
  continue
88
 
89
  print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
 
92
  row[column] = result[column]
93
 
94
  full_results.append(row)
95
+ return pd.dataFrame(full_results)
96
 
97
  def midl_evaluations():
98
  compare_to_gt = True
 
126
  full_results.append(row)
127
  if (compare_to_gt):
128
  print("\nSummary:")
129
+ if ((~pd.isna(row["dependencies"])) & (row["pred_dependencies"] is not None)):
130
  eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
131
  print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
132
+ if ((~pd.isna(row["training"])) & (row["pred_dependencies"] is not None)):
133
  eval_training.append(row["training"] == row["pred_training"])
134
  print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
135
+ if ((~pd.isna(row["evaluation"])) & (row["pred_dependencies"] is not None)):
136
  eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
137
  print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
138
+ if ((~pd.isna(row["weights"])) & (row["pred_dependencies"] is not None)):
139
  eval_weights.append(row["weights"] == row["pred_weights"])
140
  print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
141
+ if ((~pd.isna(row["readme"])) & (row["pred_dependencies"] is not None)):
142
  eval_readme.append(row["readme"] == row["pred_readme"])
143
  print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
144
+ if ((~pd.isna(row["license"])) & (row["pred_dependencies"] is not None)):
145
  eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
146
+ print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")
147
+
148
+ return pd.dataFrame(full_results)
evaluations/utils.py CHANGED
@@ -108,7 +108,10 @@ def log(verbose, log_type, log_text, hf=False):
108
 
109
  # Align line-break
110
  if (log_text.startswith("\n")):
111
- print("\n")
 
 
 
112
  log_text = log_text.lstrip('\n')
113
 
114
  if (log_type == "LOG"):
@@ -127,4 +130,7 @@ def log(verbose, log_type, log_text, hf=False):
127
  st.write(log_text)
128
  return
129
 
130
- raise Exception(log_text)
 
 
 
 
108
 
109
  # Align line-break
110
  if (log_text.startswith("\n")):
111
+ if (verbose == 1):
112
+ print("\n")
113
+ if (verbose == 2):
114
+ st.write("\n")
115
  log_text = log_text.lstrip('\n')
116
 
117
  if (log_type == "LOG"):
 
130
  st.write(log_text)
131
  return
132
 
133
+ raise Exception(log_text)
134
+
135
+ def init_llm(verbose):
136
+ log(verbose, "LOG", "Initializing LLM...")
evaluations/validating.py CHANGED
@@ -8,14 +8,12 @@ def evaluate(verbose, llm, zip, readme):
8
  'tensorflow': [
9
  r'tf\.keras\.models\.load_model', # TensorFlow model loading
10
  r'tf\.saved_model\.load',
11
- r'model\.predict', # Running inference
12
- r'model\(.+\)' # Direct model invocation for inference
13
  ],
14
  'pytorch': [
15
  r'torch\.load', # PyTorch model loading
16
  r'torch\.jit\.load', # PyTorch JIT model loading
17
- r'model\.eval', # Running inference
18
- r'model\(.+\)' # Direct model invocation for inference
19
  ]
20
  }
21
 
 
8
  'tensorflow': [
9
  r'tf\.keras\.models\.load_model', # TensorFlow model loading
10
  r'tf\.saved_model\.load',
11
+ r'\.predict', # Running inference
 
12
  ],
13
  'pytorch': [
14
  r'torch\.load', # PyTorch model loading
15
  r'torch\.jit\.load', # PyTorch JIT model loading
16
+ r'\.eval', # Running inference
 
17
  ]
18
  }
19
 
full_eval.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from evaluations.repo_evaluations import full_evaluation
2
+ # importing os module for environment variables
3
+ import os
4
+ # importing necessary functions from dotenv library
5
+ from dotenv import load_dotenv
6
+ # loading variables from .env file
7
+ load_dotenv()
8
+ token = os.getenv("githubToken")
9
+
10
+ res = full_evaluation()
11
+ res.to_csv("results.csv", sep="\t", index=False)
midl.py CHANGED
@@ -2,9 +2,10 @@ from evaluations.repo_evaluations import midl_evaluations
2
  # importing os module for environment variables
3
  import os
4
  # importing necessary functions from dotenv library
5
- from dotenv import load_dotenv, dotenv_values
6
  # loading variables from .env file
7
  load_dotenv()
8
  token = os.getenv("githubToken")
9
 
10
- midl_evaluations()
 
 
2
  # importing os module for environment variables
3
  import os
4
  # importing necessary functions from dotenv library
5
+ from dotenv import load_dotenv
6
  # loading variables from .env file
7
  load_dotenv()
8
  token = os.getenv("githubToken")
9
 
10
+ res = midl_evaluations()
11
+ res.to_csv("results_midl.csv", sep="\t", index=False)