MethSurvPredictor / use_pre_pca.py
csycsycsy's picture
Upload 10 files
84db192 verified
raw
history blame
899 Bytes
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib
import os
script_path=os.path.abspath(__file__)
script_dir=os.path.dirname(script_path)
os.chdir(script_dir)
pca_model_path = 'pca_model.pkl'
loaded_pca = joblib.load(pca_model_path)
file_path = 'TCGA-LGG.methylation450.tsv'
new_data = pd.read_csv(file_path, sep='\t', index_col=0)
new_data.dropna(inplace=True)
scaler = StandardScaler()
scaled_new_data = scaler.fit_transform(new_data.T)
new_principal_components = loaded_pca.transform(scaled_new_data)
sample_ids = new_data.columns
new_principal_df = pd.DataFrame(data=new_principal_components, columns=[f'Principal Component {i+1}' for i in range(loaded_pca.n_components_)], index=sample_ids)
print(new_principal_df)
output_file_path = 'pca_principal_components.csv'
new_principal_df.to_csv(output_file_path)