File size: 17,118 Bytes
6571e75
bde8711
6571e75
7269140
 
cfd40e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7269140
bde8711
 
5ee61ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6571e75
7269140
a635b88
cfd40e9
5be1f85
7269140
 
568e135
a635b88
7269140
a635b88
bde8711
5ee61ec
 
bde8711
 
 
 
 
 
7269140
 
 
a635b88
7269140
 
 
 
 
a635b88
7269140
 
a635b88
7269140
 
a635b88
7269140
 
a635b88
7269140
 
a635b88
7269140
 
a635b88
7269140
 
a635b88
 
5ee61ec
 
 
 
a635b88
7269140
5ee61ec
a635b88
 
 
8cb26d4
a635b88
 
8cb26d4
a635b88
 
8cb26d4
a635b88
 
8cb26d4
a635b88
5ee61ec
 
 
 
 
a635b88
 
5ee61ec
a635b88
 
 
8cb26d4
a635b88
 
8cb26d4
a635b88
 
8cb26d4
a635b88
 
8cb26d4
a635b88
 
8cb26d4
a635b88
5ee61ec
 
 
 
 
a635b88
7269140
6cbc9d8
2cb848b
 
 
 
 
6cbc9d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2cb848b
 
 
6cbc9d8
 
 
 
 
 
 
 
 
2cb848b
 
 
6cbc9d8
 
 
 
 
 
 
 
 
 
bde8711
 
7269140
a635b88
7269140
a635b88
7269140
a635b88
 
 
 
 
7269140
a635b88
7269140
a635b88
 
 
7269140
a635b88
7269140
a635b88
 
7269140
a635b88
 
 
 
 
6571e75
a635b88
 
 
6571e75
a635b88
6571e75
a635b88
 
6571e75
a635b88
 
 
6571e75
a635b88
6571e75
a635b88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6571e75
7269140
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import yaml
from utils import set_type, set_operator_role_and_location, set_eu_market_status, check_within_scope

# Create some variables we will use throughout our analysis

project_variables = {
    "ai_project_type": {
        "ai_system": False,
        "gpai_model": False,
        "high_risk_ai_system": False,
        "gpai_model_systematic_risk": False
    },
    "operator_role": {
        "provider": False,
        "deployer": False,
        "importer": False,
        "distributor": False,
        "product_manufacturer": False,
        "eu_located": False
    },
    "eu_market_status": {
        "placed_on_market": False,
        "put_into_service": False,
        "output_used": False
    }
}

project_intended_purpose = None 

# TO-DO: A thesis of this paper is that we cannot declare a project compliant without looking at all of its component models and datasets. 
# What that means in practical terms is that we need to check all model and data CCs in addition to the project CC to render a decision of compliance.
# There are two ways we can go about this: 
# (1) We can have an orchestrator function that sits on top of run_compliance_analysis_on_project(), run_compliance_analysis_on_data(), and 
# run_compliance_analysis_on_model() and orchestrates them. In particular, it will have to first run run_compliance_analysis_on_project() to set 
# the values of some "dispositive characteristic" variables, which it can then pass into run_compliance_analysis_on_data() and run_compliance_analysis_on_model()
# to make sure that the analysis done there is dynamically appropriate. Importantly, it will also have to run run_compliance_analysis_on_data(), and 
# run_compliance_analysis_on_model() for each and every data and model CCs in the folder, passing in those "dispositive characteristic" variables 
# as arguments to ensure the analysis is apprpriate. 
# (2) We could treat run_compliance_analysis_on_project() as the orchestrator function. This would mean this function would first need to set all of the
# "dispositive characteristic" variables and then, after doing that, call compliance_analysis_on_data() and run_compliance_analysis_on_model() for all
# of the model and data CCs in the folder, passing in the "dispositive characteristic" variables as arguments.
#
# I slightly prefer option (1), so here is some pseudo-code for a potential orchestrator function:
# 
# def orchestrator():
#
#   this might be a good time to check to make sure there is at least one Project CC and also do do any   
#   some administrative stuff to make your life easier like maybe getting all the files in the folder into a list, etc.
#
#   Call run_compliance_analysis_on_project, passing in the sole Project CC as the argument
#       -This must set the "dispositive" variables (i.e., the project_variables above) by parsing them from the Project CC. It already does this as-is.
#       -This must also check to see if the project is out of scope. It does this as-is.
#       -This must also check for prohibited practices. This has been commented out, but the functionality is there as-is.
#       -Last but not least, this must run the internal check of the project CC based on the project_variables it has set. It is only partially doing this as-is. To finish the job, we must:
#           -Run the check for other types of models and systems: AI systems without high risk, GPAI without systemic risk, GPAI with systemic risk. It is only doing high-risk AI systems at the moment.
#           -Where the operator is a provider, ensure any additional requirements for providers are met (see the Project CC template for details)
#           -Where the operator is a deployer, ensure any additional requirements for deployers are met (see the Project CC template for details)
#
#   Call run_compliance_analysis_on_model() *for all model CCs in the folder*, passing in the ai_project_type variable and maybe project_intended_purpose 
#       -This should include a "cross comparison" of the intended uses listed in the model CC and the project_intended_purpose parsed from the Project CC, something that is not yet integrated 
#       -This function must check if GPAI requirements are met, if that value for ai_project_type is passed in -- it does not yet do this   
#
#   Call run_compliance_analysis_on_data() *for all data CCs in the folder*, passing in the ai_project_type variable and maybe project_intended_purpose
#       -This should include a "cross comparison" of the intended uses listed in the data CC and the project_intended_purpose parsed from the Project CC, something that is not yet integrated  
#       -This function must check if GPAI requirements are met, if that value for ai_project_type is passed in -- it does not yet do this 
#
#   This function could also more gracefully handle the internal exits/reports and generate a single, digestible compliance report that
#   tells the user where the compliance analysis failed. If we wanted to get really fancy, we could include error messages for each individual
#   entry in the yaml files, possibly citing the part of the Act that they need to reference (currently in comments that user does not see)


def run_compliance_analysis_on_project(project_cc_yaml): 

    # Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
    project_type = set_type(project_variables, project_cc_yaml)
    set_operator_role_and_location(project_variables, project_cc_yaml)
    set_eu_market_status(project_variables, project_cc_yaml)

    # Check if the project is within scope of the Act. If it's not, the analysis is over.
    if check_within_scope(project_variables, project_cc_yaml):
        msg = ("Project is within the scope of Act. Let's continue...") 
    else: 
        msg = ("Project is not within the scope of what is regulated by the Act.")

    # TO-DO: reactivate the prohibited practices check below 

    # # Check for prohibited practices. If any exist, the analysis is over.
    # if check_prohibited(project_cc_yaml) == True: 
    #     print("Project contains prohibited practices and is therefore non-compliant.")
    #     msg = ("Project is non-compliant due to a prohibited practice.")
    # else: 
    #     print("Project does not contain prohibited practies. Let's continue...")

    # If project is high-risk AI system, check that is has met all the requirements for such systems: 

    if project_type == "high_risk_ai_system":

    # Do this by examining the Project CC

        for key, value in project_cc_yaml['risk_management_system']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the risk management requirements under Article 9.")
        for key, value in project_cc_yaml['technical_documentation']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the risk management requirements under Article 11.")            
        for key, value in project_cc_yaml['record_keeping']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the risk management requirements under Article 12.")     
        for key, value in project_cc_yaml['transparency_and_provision_of_information_to_deployers']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the transparency requirements under Article 13.")  
        for key, value in project_cc_yaml['human_oversight']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the human oversight requirements under Article 14.")  
        for key, value in project_cc_yaml['accuracy_robustness_cybersecurity']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the accuracy, robustness, and cybersecurity requirements under Article 15.")  
        for key, value in project_cc_yaml['quality_management_system']:
            if not value:
                msg = ("Because of project-level characteristics, this high-risk AI system fails the accuracy, robustness, and cybersecurity requirements under Article 17.") 

    # TO-DO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
    # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
    # This will look a lot like what is happening above for high-risk AI systems. 
    
    return msg

def run_compliance_analysis_on_data(data_cc_yaml): # TO-DO: we probably have to pass ai_project_type and project_intended_purpose into this function
    
    for key, value in data_cc_yaml['data_and_data_governance']:
        if not value:
            msg = (f"Because of the dataset represented by , this high-risk AI system fails the data and data governance requirements under Article 10.")
    for key, value in data_cc_yaml['technical_documentation']:
        if not value:
            msg = (f"Because of the dataset represented by , this high-risk AI system fails the technical documentation requirements under Article 11.")
    for key, value in data_cc_yaml['transparency_and_provision_of_information_to_deployers']:
        if not value:
            msg = (f"Because of the dataset represented by , this high-risk  AI system fails the transparency requirements under Article 13.")
    for key, value in data_cc_yaml['quality_management_system']:
        if not value:
            msg = (f"Because of the dataset represented by , this high-risk  AI system fails the quality management requirements under Article 17.")

    # TO-DO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
    # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
    # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the 
    # intended uses.

    return msg
    
def run_compliance_analysis_on_model(model_cc_yaml):  # TO-DO: we probably have to pass ai_project_type and project_intended_purpose into this function
    
    for key, value in model_cc_yaml['risk_management_system']:
        if not value:
            msg = (f"Because of the model represented by , this high-risk AI system fails the risk management requirements under Article 9.")
    for key, value in data_cc_yaml['technical_documentation']:
        if not value:
            msg = (f"Because of the model represented by , this high-risk AI system fails the technical documentation requirements under Article 11.")
    for key, value in data_cc_yaml['transparency_and_provision_of_information_to_deployers']:
        if not value:
            msg = (f"Because of the model represented by , this high-risk  AI system fails the transparency requirements under Article 13.")
    for key, value in data_cc_yaml['accuracy_robustness_cybersecurity']:
        if not value:
            msg = (f"Because of the model represented by , this high-risk  AI system fails the quality management requirements under Article 15.")
    for key, value in data_cc_yaml['quality_management_system']:
        if not value:
            msg = (f"Because of the model represented by , this high-risk  AI system fails the quality management requirements under Article 17.")
   
    # TO-DO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
    # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
    # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the 
    # intended uses.
    
    return msg

def check_intended_purpose():
    
    # We want to run this function for everything classified as a high_risk_ai_system
    # We also need to run it for all 
    # Add any of the intended purposes of the overall project to a set of intended purposes
    
    # intended_purpose = ['safety_component',
    #                     "product_regulated_machinery",
    #                     "product_regulated_toy",
    #                     "product_regulated_watercraft",
    #                     "biometric_categorization",
    #                     "emotion_recognition",
    #                     "critical_infrastructure",
    #                     "admission",
    #                     "recruitment",
    #                     "public_assistance",
    #                     "victim_assessment",
    #                     "polygraph",
    #                     "judicial"]
    
    project_intended_purposes = []
    for key in project_cc_yaml['high_risk_ai_system']:
        if project_cc_yaml['high_risk_ai_system'][f'{key}']['value']:
            project_intended_purposes.append(key) 
    
    # For each Data CC, put the intended uses in a set and then make sure the Project's intended use is in the set

    msg = ''
    dataset_intended_purposes = []
    for key in data_cc_yaml['high_risk_ai_system']:
        if data_cc_yaml['high_risk_ai_system'][f'{key}']['value']:
            dataset_intended_purposes.append(key) 

    for purpose in project_intended_purposes:
        if purpose not in dataset_intended_purposes:
            msg = f"You are not compliant because {purpose} is not a valid purpose"

    # Now do the exact same thing for all models

    model_intended_purposes = []
    for key in model_cc_yaml['high_risk_ai_system']:
        if model_cc_yaml['high_risk_ai_system'][f'{key}']['value']:
            model_intended_purposes.append(key) 

    for purpose in project_intended_purposes:
        if purpose not in model_intended_purposes:
            msg = f"You are not compliant because {purpose} is not a valid purpose"

    return msg



    # # If the project is a GPAI model, check that is has met all the requirements for such systems: 

    # if gpai_model:

    # # Do this by examining the Project CC

    #     for key, value in project_cc_yaml['gpai_model_provider_obligations']:
    #         if not value:
    #             msg = ("GPAI model fails the transparency requirements under Article 53.")

    # # Do this by examining any and all Data CCs too

    #     for filename in os.listdir(folder_path):
    #         # Check if the search word is in the filename
    #         if "data_cc.md" in filename.lower():

    #             # If it is, load the yaml

    #             with open(folder_path + filename, 'r') as file:
    #                 data_cc_yaml = yaml.safe_load(file)

    #             for key, value in data_cc_yaml['gpai_requirements']['gpai_requirements']:
    #                 if not value:
    #                     msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")

    # # Do this by examining any and all Model CCs too
    
    #     for filename in os.listdir(folder_path):
    #         # Check if the search word is in the filename
    #         if "model_cc.md" in filename.lower():

    #             # If it is, load the yaml

    #             with open(folder_path + filename, 'r') as file:
    #                 model_cc_yaml = yaml.safe_load(file)

    #             for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models']:
    #                 if not value:
    #                     msg = (f"Because of the model represented by {filename}, this GPAI fails the transparency requirements under Article 53.")

    # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems: 

    # if gpai_model_systematic_risk:

    # # Do this by examining the Project CC

    #     for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
    #         if not value:
    #             msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")

    # # Do this by examining any and all Model CCs too

    #     for filename in os.listdir(folder_path):
    #         # Check if the search word is in the filename
    #         if "model_cc.md" in filename.lower():

    #             # If it is, load the yaml

    #             with open(folder_path + filename, 'r') as file:
    #                 model_cc_yaml = yaml.safe_load(file)

    #             for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models_with_systemic_risk']:
    #                 if not value:
    #                     msg = (f"Because of the model represented by {filename}, this GPAI model with systematic risk fails the transparency requirements under Article 55.")