-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcal_pca.py
51 lines (36 loc) · 1.09 KB
/
cal_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 16 21:40:08 2018
@author: YudongCai
@Email: [email protected]
"""
import click
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
def load_data(infile):
"""
读纯数字的输入矩阵
每行一个个体,每列一个特征
"""
df = pd.read_csv(infile, sep='\t', header=None)
return df.values
def pca(X):
pca = PCA(n_components=10)
pca.fit(X)
components_ay = pca.transform(X)
explained_variance_ratio = pca.explained_variance_ratio_
return components_ay, explained_variance_ratio
@click.command()
@click.argument('infile')
@click.argument('outprefix')
def main(infile, outprefix):
"""
输入的INFILE是纯数字矩阵,tab分割,每行一个个体,每列一个特征
"""
X = load_data(infile)
pcs, ratios = pca(X)
pd.DataFrame(pcs).to_csv(f'{outprefix}.pcs', sep='\t', header=None, index=False)
ratios.tofile(f'{outprefix}.explained_variance_ratio', sep='\n')
if __name__ == '__main__':
main()