Spaces:

attilasimko
/

reproduce

Sleeping

App Files Files Community

reproduce / evaluations /documentation.py

attilasimko

What did I do before then?

77f290b 7 months ago

raw

history blame

4.18 kB

	from .utils import log
	import re
	import numpy as np

	def evaluate(verbose, llm, zip, readme):
	log(verbose, "LOG", "\nEvaluating code documentation...")
	overall = "No"


	code_to_comment_ratio = get_code_to_comment_ratio(zip)
	log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")


	if (readme):
	non_empty_rows = [row for row in readme.split("\n") if row != ""]
	if (len(non_empty_rows) < 5):
	log(verbose, "WARNING", "Readme file has very few lines")
	return overall

	if (count_code_lines(non_empty_rows) > 5):
	log(verbose, "LOG", "Readme file contains python examples.")
	overall = "Yes"
	return overall

	if (llm):
	prompt = f'{readme}\n \
	Is this README file is enough to find what \
	package dependencies you need to install and how to train \
	and evaluate the proposed model? Please strictly \
	answer yes or no.\n\nA:'


	manual_fail = False
	if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
	log(verbose, "ERROR", "Readme file missing training information")
	overall = "No"
	if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) \| (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
	log(verbose, "ERROR", "Readme file missing testing information")
	overall = "No"

	if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
	log(verbose, "LOG", "Readme file contains links to examples")
	overall = "Yes"

	if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
	(len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
	(len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
	log(verbose, "ERROR", "Readme file missing information about package dependencies")
	overall = "No"

	log(verbose, "ERROR", "Found no useful information in README file.")
	return overall

	def count_comment_lines(lines):
	# Initialize counters
	single_line_comments = 0
	multi_line_comments = 0
	in_multiline_comment = False

	for line in lines:
	stripped_line = line.strip()

	# Check for single-line comments
	if stripped_line.startswith('#'):
	single_line_comments += 1

	# Check for multi-line comment (docstring) start or end
	if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
	if not in_multiline_comment:
	# Starting a new multi-line comment
	in_multiline_comment = True
	multi_line_comments += 1 # Count the start line itself
	else:
	# Ending an existing multi-line comment
	in_multiline_comment = False
	multi_line_comments += 1 # Count the end line itself
	elif in_multiline_comment:
	# Continue counting lines within a multi-line comment
	multi_line_comments += 1

	return single_line_comments, multi_line_comments

	def get_code_to_comment_ratio(zip):
	python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") \| file_path.endswith(".ipynb"))]
	code_line_count = 0
	comment_line_count = 0
	for file in python_files:
	file_lines = zip.open(file).read().decode("utf-8").split('\n')
	sl_comm, ml_comm = count_comment_lines(file_lines)
	comment_line_count += sl_comm + ml_comm
	code_line_count += len(file_lines) - (sl_comm + ml_comm)
	code_to_comment_ratio = 100 * comment_line_count / code_line_count

	return code_to_comment_ratio

	def count_code_lines(lines):
	is_code_snippet = False
	code_line_count = 0

	for line in lines:
	stripped_line = line.strip()

	if stripped_line.startswith('```'):
	if not is_code_snippet:
	is_code_snippet = True
	code_line_count += 1
	else:
	is_code_snippet = False
	code_line_count += 1
	elif is_code_snippet:
	code_line_count += 1

	return int(code_line_count / 2)