File size: 4,175 Bytes
77f290b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from .utils import log
import re
import numpy as np

def evaluate(verbose, llm, zip, readme):
  log(verbose, "LOG", "\nEvaluating code documentation...")
  overall = "No"


  code_to_comment_ratio = get_code_to_comment_ratio(zip)
  log(verbose, "LOG", f"Your python scripts have a comment-to-code ratio of {np.round(code_to_comment_ratio, 2)}%.")


  if (readme):
      non_empty_rows = [row for row in readme.split("\n") if row != ""]
      if (len(non_empty_rows) < 5):
          log(verbose, "WARNING", "Readme file has very few lines")
          return overall

      if (count_code_lines(non_empty_rows) > 5):
          log(verbose, "LOG", "Readme file contains python examples.")
          overall = "Yes"
          return overall

      if (llm):
          prompt = f'{readme}\n \
          Is this README file is enough to find what \
          package dependencies you need to install and how to train \
          and evaluate the proposed model? Please strictly \
          answer yes or no.\n\nA:'


      manual_fail = False
      if ((len(re.findall("train", readme, re.IGNORECASE)) == 0)):
          log(verbose, "ERROR", "Readme file missing training information")
          overall = "No"
      if ((len(re.findall("demo", readme, re.IGNORECASE)) == 0) | (len(re.findall("evaluat", readme, re.IGNORECASE)) == 0)):
          log(verbose, "ERROR", "Readme file missing testing information")
          overall = "No"

      if ((len(re.findall("example", readme, re.IGNORECASE)) > 0)):
          log(verbose, "LOG", "Readme file contains links to examples")
          overall = "Yes"

      if ((len(re.findall("package", readme, re.IGNORECASE)) == 0) & \
          (len(re.findall("dependenc", readme, re.IGNORECASE)) == 0) & \
          (len(re.findall("requirement", readme, re.IGNORECASE)) == 0)):
          log(verbose, "ERROR", "Readme file missing information about package dependencies")
          overall = "No"

  log(verbose, "ERROR", "Found no useful information in README file.")
  return overall

def count_comment_lines(lines):
    # Initialize counters
    single_line_comments = 0
    multi_line_comments = 0
    in_multiline_comment = False

    for line in lines:
        stripped_line = line.strip()

        # Check for single-line comments
        if stripped_line.startswith('#'):
            single_line_comments += 1

        # Check for multi-line comment (docstring) start or end
        if stripped_line.startswith('"""') or stripped_line.startswith("'''"):
            if not in_multiline_comment:
                # Starting a new multi-line comment
                in_multiline_comment = True
                multi_line_comments += 1  # Count the start line itself
            else:
                # Ending an existing multi-line comment
                in_multiline_comment = False
                multi_line_comments += 1  # Count the end line itself
        elif in_multiline_comment:
            # Continue counting lines within a multi-line comment
            multi_line_comments += 1

    return single_line_comments, multi_line_comments

def get_code_to_comment_ratio(zip):
    python_files = [file_path for file_path in zip.namelist() if (file_path.endswith(".py") | file_path.endswith(".ipynb"))]
    code_line_count = 0
    comment_line_count = 0
    for file in python_files:
        file_lines = zip.open(file).read().decode("utf-8").split('\n')
        sl_comm, ml_comm = count_comment_lines(file_lines)
        comment_line_count += sl_comm + ml_comm
        code_line_count += len(file_lines) - (sl_comm + ml_comm)
    code_to_comment_ratio = 100 * comment_line_count / code_line_count

    return code_to_comment_ratio

def count_code_lines(lines):
    is_code_snippet = False
    code_line_count = 0

    for line in lines:
        stripped_line = line.strip()

        if stripped_line.startswith('```'):
            if not is_code_snippet:
                is_code_snippet = True
                code_line_count += 1
            else:
                is_code_snippet = False
                code_line_count += 1
        elif is_code_snippet:
            code_line_count += 1

    return int(code_line_count / 2)