from .utils import log import re import numpy as np def evaluate(verbose, llm, zip, readme): log(verbose, "LOG", "\nEvaluating code documentation...") overall = "No" code_to_comment_ratio = get_code_to_comment_ratio(zip) log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.") if (readme): non_empty_rows = [row for row in readme.split("\n") if row != ""] if (len(non_empty_rows) < 5): log(verbose, "WARNING", "Readme file has very few lines") return overall if (count_code_lines(non_empty_rows) > 5): log(verbose, "LOG", "Readme file contains python examples.") overall = "Yes" return overall if (llm): prompt = f'{readme}\n \ Is this README file is enough to find what \ package dependencies you need to install and how to train \ and evaluate the proposed model? Please strictly \ answer yes or no.\n\nA:' manual_fail = False if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)): log(verbose, "ERROR", "Readme file missing training information") overall = "No" if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)): log(verbose, "ERROR", "Readme file missing testing information") overall = "No" if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)): log(verbose, "LOG", "Readme file contains links to examples") overall = "Yes" if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \ (len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \ (len(re.findall("requirement", readme, re.IGNORECASE)) == 0)): log(verbose, "ERROR", "Readme file missing information about package dependencies") overall = "No" log(verbose, "ERROR", "Found no useful information in README file.") return overall def count_comment_lines(lines): # Initialize counters single_line_comments = 0 multi_line_comments = 0 in_multiline_comment = False for line in lines: stripped_line = line.strip() # Check for single-line comments if stripped_line.startswith('#'): single_line_comments += 1 # Check for multi-line comment (docstring) start or end if stripped_line.startswith('"""') or stripped_line.startswith("'''"): if not in_multiline_comment: # Starting a new multi-line comment in_multiline_comment = True multi_line_comments += 1 # Count the start line itself else: # Ending an existing multi-line comment in_multiline_comment = False multi_line_comments += 1 # Count the end line itself elif in_multiline_comment: # Continue counting lines within a multi-line comment multi_line_comments += 1 return single_line_comments, multi_line_comments def get_code_to_comment_ratio(zip): python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))] code_line_count = 0 comment_line_count = 0 for file in python_files: file_lines = zip.open(file).read().decode("utf-8").split('\n') sl_comm, ml_comm = count_comment_lines(file_lines) comment_line_count += sl_comm + ml_comm code_line_count += len(file_lines) - (sl_comm + ml_comm) code_to_comment_ratio = 100 * comment_line_count / code_line_count return code_to_comment_ratio def count_code_lines(lines): is_code_snippet = False code_line_count = 0 for line in lines: stripped_line = line.strip() if stripped_line.startswith('```'): if not is_code_snippet: is_code_snippet = True code_line_count += 1 else: is_code_snippet = False code_line_count += 1 elif is_code_snippet: code_line_count += 1 return int(code_line_count / 2)