#%% import pandas as pd import tomllib value_mapping = { 'his_SEX': {'female': 0, 'male': 1}, 'his_HISPANIC': {'no': 0, 'yes': 1}, 'his_NACCNIHR': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'mul': 5}, 'his_RACE': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'oth': 5}, 'his_RACESEC': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'oth': 5}, 'his_RACETER': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'oth': 5}, } label_names = ['NC', 'MCI', 'DE', 'AD', 'LBD', 'VD', 'PRD', 'FTD', 'NPH', 'SEF', 'PSY', 'TBI', 'ODE'] class CSVDataset: def __init__(self, dat_file, cnf_file): ''' ... ''' # load data csv df = pd.read_csv(dat_file) # value mapping for col, mapping in value_mapping.items(): df[col] = df[col].replace(mapping) # load toml file to get feature names with open(cnf_file, 'rb') as file: feature_names = tomllib.load(file)['feature'].keys() self.df = df self.df_features = df[feature_names] self.df_labels = df[label_names] def __len__(self): ''' ... ''' return len(self.df) def __getitem__(self, idx): ''' ... ''' row = self.df_features.iloc[idx] clean_row = row.dropna() feature_dict = clean_row.to_dict() row = self.df_labels.iloc[idx] clean_row = row.dropna() label_dict = clean_row.to_dict() return feature_dict, label_dict if __name__ == '__main__': # load dataset dset = CSVDataset('./nacc_test_with_np_cli.csv', './default_conf_new.toml') print(dset[1]) # %%