Spaces:
Sleeping
Sleeping
Commit
·
6e3c928
1
Parent(s):
f6e3be7
Push new changes
Browse files- evaluations/repo_evaluations.py +12 -22
- evaluations/utils.py +8 -2
- evaluations/validating.py +2 -4
- full_eval.py +11 -0
- midl.py +3 -2
evaluations/repo_evaluations.py
CHANGED
@@ -10,15 +10,11 @@ from huggingface_hub import InferenceClient
|
|
10 |
API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
|
11 |
headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
|
12 |
|
13 |
-
|
14 |
client = InferenceClient(
|
15 |
"meta-llama/Llama-3.1-8B-Instruct",
|
16 |
token="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",
|
17 |
)
|
18 |
|
19 |
-
def init_llm(verbose):
|
20 |
-
log(verbose, "LOG", "Initializing LLM...")
|
21 |
-
|
22 |
def evaluate(llm, verbose, repo_url, title=None, year=None):
|
23 |
repository_zip_name = "data/repo.zip"
|
24 |
token = os.getenv("githubToken")
|
@@ -35,11 +31,6 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
|
|
35 |
if (get_api_link(repo_url) != ""):
|
36 |
results["pred_valid"] = True
|
37 |
else:
|
38 |
-
results["pred_live"] = "No"
|
39 |
-
results["pred_training"] = "No"
|
40 |
-
results["pred_evaluation"] = "No"
|
41 |
-
results["pred_weights"] = "No"
|
42 |
-
results["pred_packages"] = "No"
|
43 |
return results
|
44 |
|
45 |
username, repo_name = decompose_url(repo_url)
|
@@ -86,17 +77,13 @@ def evaluate(llm, verbose, repo_url, title=None, year=None):
|
|
86 |
results["pred_live"] = "No"
|
87 |
return results
|
88 |
|
89 |
-
def
|
90 |
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
|
91 |
-
repro = evaluate(None, False)
|
92 |
full_results = []
|
93 |
|
94 |
-
nth = 1
|
95 |
for idx, row in paper_dump.iterrows():
|
96 |
-
if (idx % nth != 0):
|
97 |
-
continue
|
98 |
|
99 |
-
if (row["url"] == ""):
|
100 |
continue
|
101 |
|
102 |
print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
|
@@ -105,6 +92,7 @@ def full_evaluations():
|
|
105 |
row[column] = result[column]
|
106 |
|
107 |
full_results.append(row)
|
|
|
108 |
|
109 |
def midl_evaluations():
|
110 |
compare_to_gt = True
|
@@ -138,21 +126,23 @@ def midl_evaluations():
|
|
138 |
full_results.append(row)
|
139 |
if (compare_to_gt):
|
140 |
print("\nSummary:")
|
141 |
-
if ((row["
|
142 |
eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
|
143 |
print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
|
144 |
-
if ((row["
|
145 |
eval_training.append(row["training"] == row["pred_training"])
|
146 |
print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
|
147 |
-
if ((row["
|
148 |
eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
|
149 |
print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
|
150 |
-
if ((row["
|
151 |
eval_weights.append(row["weights"] == row["pred_weights"])
|
152 |
print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
|
153 |
-
if ((row["
|
154 |
eval_readme.append(row["readme"] == row["pred_readme"])
|
155 |
print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
|
156 |
-
if ((row["
|
157 |
eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
|
158 |
-
print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")
|
|
|
|
|
|
10 |
API_URL = "https://api-inference.huggingface.co/models/openlm-research/open_llama_3b_v2"
|
11 |
headers = {"Authorization": "Bearer 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", "x-wait-for-model": "true"}
|
12 |
|
|
|
13 |
client = InferenceClient(
|
14 |
"meta-llama/Llama-3.1-8B-Instruct",
|
15 |
token="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",
|
16 |
)
|
17 |
|
|
|
|
|
|
|
18 |
def evaluate(llm, verbose, repo_url, title=None, year=None):
|
19 |
repository_zip_name = "data/repo.zip"
|
20 |
token = os.getenv("githubToken")
|
|
|
31 |
if (get_api_link(repo_url) != ""):
|
32 |
results["pred_valid"] = True
|
33 |
else:
|
|
|
|
|
|
|
|
|
|
|
34 |
return results
|
35 |
|
36 |
username, repo_name = decompose_url(repo_url)
|
|
|
77 |
results["pred_live"] = "No"
|
78 |
return results
|
79 |
|
80 |
+
def full_evaluation():
|
81 |
paper_dump = pd.read_csv("data/dump.csv", sep="\t")
|
|
|
82 |
full_results = []
|
83 |
|
|
|
84 |
for idx, row in paper_dump.iterrows():
|
|
|
|
|
85 |
|
86 |
+
if (pd.isna(row["url"]) | (row["url"] == "")):
|
87 |
continue
|
88 |
|
89 |
print(str(int(100 * idx / paper_dump["title"].count())) + "% done")
|
|
|
92 |
row[column] = result[column]
|
93 |
|
94 |
full_results.append(row)
|
95 |
+
return pd.dataFrame(full_results)
|
96 |
|
97 |
def midl_evaluations():
|
98 |
compare_to_gt = True
|
|
|
126 |
full_results.append(row)
|
127 |
if (compare_to_gt):
|
128 |
print("\nSummary:")
|
129 |
+
if ((~pd.isna(row["dependencies"])) & (row["pred_dependencies"] is not None)):
|
130 |
eval_dependencies.append(row["pred_dependencies"] == row["dependencies"])
|
131 |
print(f"Dependencies acc. - {row['pred_dependencies']} (GT:{row['dependencies']}) / {int(100 * np.mean(eval_dependencies))}%")
|
132 |
+
if ((~pd.isna(row["training"])) & (row["pred_dependencies"] is not None)):
|
133 |
eval_training.append(row["training"] == row["pred_training"])
|
134 |
print(f"Training acc. -{row['pred_training']} (GT:{row['training']}) / {int(100 * np.mean(eval_training))}%")
|
135 |
+
if ((~pd.isna(row["evaluation"])) & (row["pred_dependencies"] is not None)):
|
136 |
eval_evaluating.append(row["evaluation"] == row["pred_evaluation"])
|
137 |
print(f"Evaluating acc. - {row['pred_evaluation']} (GT:{row['evaluation']}) / {int(100 * np.mean(eval_evaluating))}%")
|
138 |
+
if ((~pd.isna(row["weights"])) & (row["pred_dependencies"] is not None)):
|
139 |
eval_weights.append(row["weights"] == row["pred_weights"])
|
140 |
print(f"Weights acc. - {row['pred_weights']} (GT:{row['weights']}) / {int(100 * np.mean(eval_weights))}%")
|
141 |
+
if ((~pd.isna(row["readme"])) & (row["pred_dependencies"] is not None)):
|
142 |
eval_readme.append(row["readme"] == row["pred_readme"])
|
143 |
print(f"README acc. - {row['pred_readme']} (GT:{row['readme']}) / {int(100 * np.mean(eval_readme))}%")
|
144 |
+
if ((~pd.isna(row["license"])) & (row["pred_dependencies"] is not None)):
|
145 |
eval_licensing.append(("No" if row["license"] == "No" else "Yes") == row["pred_license"])
|
146 |
+
print(f"LICENSE acc. - {row['pred_license']} (GT:{row['license']}) / {int(100 * np.mean(eval_licensing))}%")
|
147 |
+
|
148 |
+
return pd.dataFrame(full_results)
|
evaluations/utils.py
CHANGED
@@ -108,7 +108,10 @@ def log(verbose, log_type, log_text, hf=False):
|
|
108 |
|
109 |
# Align line-break
|
110 |
if (log_text.startswith("\n")):
|
111 |
-
|
|
|
|
|
|
|
112 |
log_text = log_text.lstrip('\n')
|
113 |
|
114 |
if (log_type == "LOG"):
|
@@ -127,4 +130,7 @@ def log(verbose, log_type, log_text, hf=False):
|
|
127 |
st.write(log_text)
|
128 |
return
|
129 |
|
130 |
-
raise Exception(log_text)
|
|
|
|
|
|
|
|
108 |
|
109 |
# Align line-break
|
110 |
if (log_text.startswith("\n")):
|
111 |
+
if (verbose == 1):
|
112 |
+
print("\n")
|
113 |
+
if (verbose == 2):
|
114 |
+
st.write("\n")
|
115 |
log_text = log_text.lstrip('\n')
|
116 |
|
117 |
if (log_type == "LOG"):
|
|
|
130 |
st.write(log_text)
|
131 |
return
|
132 |
|
133 |
+
raise Exception(log_text)
|
134 |
+
|
135 |
+
def init_llm(verbose):
|
136 |
+
log(verbose, "LOG", "Initializing LLM...")
|
evaluations/validating.py
CHANGED
@@ -8,14 +8,12 @@ def evaluate(verbose, llm, zip, readme):
|
|
8 |
'tensorflow': [
|
9 |
r'tf\.keras\.models\.load_model', # TensorFlow model loading
|
10 |
r'tf\.saved_model\.load',
|
11 |
-
r'
|
12 |
-
r'model\(.+\)' # Direct model invocation for inference
|
13 |
],
|
14 |
'pytorch': [
|
15 |
r'torch\.load', # PyTorch model loading
|
16 |
r'torch\.jit\.load', # PyTorch JIT model loading
|
17 |
-
r'
|
18 |
-
r'model\(.+\)' # Direct model invocation for inference
|
19 |
]
|
20 |
}
|
21 |
|
|
|
8 |
'tensorflow': [
|
9 |
r'tf\.keras\.models\.load_model', # TensorFlow model loading
|
10 |
r'tf\.saved_model\.load',
|
11 |
+
r'\.predict', # Running inference
|
|
|
12 |
],
|
13 |
'pytorch': [
|
14 |
r'torch\.load', # PyTorch model loading
|
15 |
r'torch\.jit\.load', # PyTorch JIT model loading
|
16 |
+
r'\.eval', # Running inference
|
|
|
17 |
]
|
18 |
}
|
19 |
|
full_eval.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from evaluations.repo_evaluations import full_evaluation
|
2 |
+
# importing os module for environment variables
|
3 |
+
import os
|
4 |
+
# importing necessary functions from dotenv library
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
# loading variables from .env file
|
7 |
+
load_dotenv()
|
8 |
+
token = os.getenv("githubToken")
|
9 |
+
|
10 |
+
res = full_evaluation()
|
11 |
+
res.to_csv("results.csv", sep="\t", index=False)
|
midl.py
CHANGED
@@ -2,9 +2,10 @@ from evaluations.repo_evaluations import midl_evaluations
|
|
2 |
# importing os module for environment variables
|
3 |
import os
|
4 |
# importing necessary functions from dotenv library
|
5 |
-
from dotenv import load_dotenv
|
6 |
# loading variables from .env file
|
7 |
load_dotenv()
|
8 |
token = os.getenv("githubToken")
|
9 |
|
10 |
-
midl_evaluations()
|
|
|
|
2 |
# importing os module for environment variables
|
3 |
import os
|
4 |
# importing necessary functions from dotenv library
|
5 |
+
from dotenv import load_dotenv
|
6 |
# loading variables from .env file
|
7 |
load_dotenv()
|
8 |
token = os.getenv("githubToken")
|
9 |
|
10 |
+
res = midl_evaluations()
|
11 |
+
res.to_csv("results_midl.csv", sep="\t", index=False)
|