Spaces:
Sleeping
Sleeping
File size: 4,175 Bytes
77f290b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
from .utils import log
import re
import numpy as np
def evaluate(verbose, llm, zip, readme):
log(verbose, "LOG", "\nEvaluating code documentation...")
overall = "No"
code_to_comment_ratio = get_code_to_comment_ratio(zip)
log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")
if (readme):
non_empty_rows = [row for row in readme.split("\n") if row != ""]
if (len(non_empty_rows) < 5):
log(verbose, "WARNING", "Readme file has very few lines")
return overall
if (count_code_lines(non_empty_rows) > 5):
log(verbose, "LOG", "Readme file contains python examples.")
overall = "Yes"
return overall
if (llm):
prompt = f'{readme}\n \
Is this README file is enough to find what \
package dependencies you need to install and how to train \
and evaluate the proposed model? Please strictly \
answer yes or no.\n\nA:'
manual_fail = False
if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
log(verbose, "ERROR", "Readme file missing training information")
overall = "No"
if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
log(verbose, "ERROR", "Readme file missing testing information")
overall = "No"
if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
log(verbose, "LOG", "Readme file contains links to examples")
overall = "Yes"
if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
(len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
(len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
log(verbose, "ERROR", "Readme file missing information about package dependencies")
overall = "No"
log(verbose, "ERROR", "Found no useful information in README file.")
return overall
def count_comment_lines(lines):
# Initialize counters
single_line_comments = 0
multi_line_comments = 0
in_multiline_comment = False
for line in lines:
stripped_line = line.strip()
# Check for single-line comments
if stripped_line.startswith('#'):
single_line_comments += 1
# Check for multi-line comment (docstring) start or end
if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
if not in_multiline_comment:
# Starting a new multi-line comment
in_multiline_comment = True
multi_line_comments += 1 # Count the start line itself
else:
# Ending an existing multi-line comment
in_multiline_comment = False
multi_line_comments += 1 # Count the end line itself
elif in_multiline_comment:
# Continue counting lines within a multi-line comment
multi_line_comments += 1
return single_line_comments, multi_line_comments
def get_code_to_comment_ratio(zip):
python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))]
code_line_count = 0
comment_line_count = 0
for file in python_files:
file_lines = zip.open(file).read().decode("utf-8").split('\n')
sl_comm, ml_comm = count_comment_lines(file_lines)
comment_line_count += sl_comm + ml_comm
code_line_count += len(file_lines) - (sl_comm + ml_comm)
code_to_comment_ratio = 100 * comment_line_count / code_line_count
return code_to_comment_ratio
def count_code_lines(lines):
is_code_snippet = False
code_line_count = 0
for line in lines:
stripped_line = line.strip()
if stripped_line.startswith('```'):
if not is_code_snippet:
is_code_snippet = True
code_line_count += 1
else:
is_code_snippet = False
code_line_count += 1
elif is_code_snippet:
code_line_count += 1
return int(code_line_count / 2)
|