|
import pandas as pd
|
|
from sklearn.preprocessing import StandardScaler
|
|
import joblib
|
|
import os
|
|
script_path=os.path.abspath(__file__)
|
|
script_dir=os.path.dirname(script_path)
|
|
os.chdir(script_dir)
|
|
|
|
pca_model_path = 'pca_model.pkl'
|
|
loaded_pca = joblib.load(pca_model_path)
|
|
|
|
|
|
|
|
file_path = 'TCGA-LGG.methylation450.tsv'
|
|
new_data = pd.read_csv(file_path, sep='\t', index_col=0)
|
|
|
|
|
|
|
|
new_data.dropna(inplace=True)
|
|
|
|
|
|
scaler = StandardScaler()
|
|
scaled_new_data = scaler.fit_transform(new_data.T)
|
|
|
|
|
|
new_principal_components = loaded_pca.transform(scaled_new_data)
|
|
|
|
|
|
sample_ids = new_data.columns
|
|
new_principal_df = pd.DataFrame(data=new_principal_components, columns=[f'Principal Component {i+1}' for i in range(loaded_pca.n_components_)], index=sample_ids)
|
|
|
|
|
|
print(new_principal_df)
|
|
|
|
output_file_path = 'pca_principal_components.csv'
|
|
new_principal_df.to_csv(output_file_path) |