import pandas as pd from sklearn.preprocessing import StandardScaler import joblib import os script_path=os.path.abspath(__file__) script_dir=os.path.dirname(script_path) os.chdir(script_dir) pca_model_path = 'pca_model.pkl' loaded_pca = joblib.load(pca_model_path) file_path = 'TCGA-LGG.methylation450.tsv' new_data = pd.read_csv(file_path, sep='\t', index_col=0) new_data.dropna(inplace=True) scaler = StandardScaler() scaled_new_data = scaler.fit_transform(new_data.T) new_principal_components = loaded_pca.transform(scaled_new_data) sample_ids = new_data.columns new_principal_df = pd.DataFrame(data=new_principal_components, columns=[f'Principal Component {i+1}' for i in range(loaded_pca.n_components_)], index=sample_ids) print(new_principal_df) output_file_path = 'pca_principal_components.csv' new_principal_df.to_csv(output_file_path)