Spaces:
Sleeping
Sleeping
from .utils import log | |
import re | |
import numpy as np | |
def evaluate(verbose, llm, zip, readme): | |
log(verbose, "LOG", "\nEvaluating code documentation...") | |
overall = "No" | |
code_to_comment_ratio = get_code_to_comment_ratio(zip) | |
log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.") | |
if (readme): | |
non_empty_rows = [row for row in readme.split("\n") if row != ""] | |
if (len(non_empty_rows) < 5): | |
log(verbose, "WARNING", "Readme file has very few lines") | |
return overall | |
if (count_code_lines(non_empty_rows) > 5): | |
log(verbose, "LOG", "Readme file contains python examples.") | |
overall = "Yes" | |
return overall | |
if (llm): | |
prompt = f'{readme}\n \ | |
Is this README file is enough to find what \ | |
package dependencies you need to install and how to train \ | |
and evaluate the proposed model? Please strictly \ | |
answer yes or no.\n\nA:' | |
manual_fail = False | |
if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)): | |
log(verbose, "ERROR", "Readme file missing training information") | |
overall = "No" | |
if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)): | |
log(verbose, "ERROR", "Readme file missing testing information") | |
overall = "No" | |
if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)): | |
log(verbose, "LOG", "Readme file contains links to examples") | |
overall = "Yes" | |
if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \ | |
(len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \ | |
(len(re.findall("requirement", readme, re.IGNORECASE)) == 0)): | |
log(verbose, "ERROR", "Readme file missing information about package dependencies") | |
overall = "No" | |
log(verbose, "ERROR", "Found no useful information in README file.") | |
return overall | |
def count_comment_lines(lines): | |
# Initialize counters | |
single_line_comments = 0 | |
multi_line_comments = 0 | |
in_multiline_comment = False | |
for line in lines: | |
stripped_line = line.strip() | |
# Check for single-line comments | |
if stripped_line.startswith('#'): | |
single_line_comments += 1 | |
# Check for multi-line comment (docstring) start or end | |
if stripped_line.startswith('"""') or stripped_line.startswith("'''"): | |
if not in_multiline_comment: | |
# Starting a new multi-line comment | |
in_multiline_comment = True | |
multi_line_comments += 1 # Count the start line itself | |
else: | |
# Ending an existing multi-line comment | |
in_multiline_comment = False | |
multi_line_comments += 1 # Count the end line itself | |
elif in_multiline_comment: | |
# Continue counting lines within a multi-line comment | |
multi_line_comments += 1 | |
return single_line_comments, multi_line_comments | |
def get_code_to_comment_ratio(zip): | |
python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))] | |
code_line_count = 0 | |
comment_line_count = 0 | |
for file in python_files: | |
file_lines = zip.open(file).read().decode("utf-8").split('\n') | |
sl_comm, ml_comm = count_comment_lines(file_lines) | |
comment_line_count += sl_comm + ml_comm | |
code_line_count += len(file_lines) - (sl_comm + ml_comm) | |
code_to_comment_ratio = 100 * comment_line_count / code_line_count | |
return code_to_comment_ratio | |
def count_code_lines(lines): | |
is_code_snippet = False | |
code_line_count = 0 | |
for line in lines: | |
stripped_line = line.strip() | |
if stripped_line.startswith('```'): | |
if not is_code_snippet: | |
is_code_snippet = True | |
code_line_count += 1 | |
else: | |
is_code_snippet = False | |
code_line_count += 1 | |
elif is_code_snippet: | |
code_line_count += 1 | |
return int(code_line_count / 2) | |