导入数据
python读取txt为dataframe
import pandas as pd
import numpy as np
df = pd.read_csv('C:/Users/Alexia Lee/R_study/v0.1.release.txt', sep='\t')
df.head()
#结果
chr start end reads symbol sample release
0 chr7 139715931 139717015 220 HIPK2 VCaP-capt-SI_8045-C4D5HACXX v0.0
1 chr21 36247516 36248568 175 DOPEY2 VCaP-capt-SI_8045-C4D5HACXX v0.0
2 chr5 137985256 137988315 156 FAM13B VCaP-capt-SI_8045-C4D5HACXX v0.0
3 chr9 135881632 135883078 154 CAMSAP1 VCaP-capt-SI_8045-C4D5HACXX v0.0
4 chr1 117402185 117420649 139 MAN1A2 VCaP-capt-SI_8045-C4D5HACXX v0.0
#合并列
df['name1'] = df['chr'].str.cat(df['start'].astype(str), sep=' : ')
df['circRNA'] = df['name1'].str.cat(df['end'].astype(str), sep='|')
df.head()
#结果
chr start end reads ... sample release name1 circRNA
0 chr7 139715931 139717015 220 ... VCaP-capt-SI_8045-C4D5HACXX v0.0 chr7:139715931 chr7:139715931|139717015
1 chr21 36247516 36248568 175 ... VCaP-capt-SI_8045-C4D5HACXX v0.0 chr21:36247516 chr21:36247516|36248568
2 chr5 137985256 137988315 156 ... VCaP-capt-SI_8045-C4D5HACXX v0.0 chr5:137985256 chr5:137985256|137988315
3 chr9 135881632 135883078 154 ... VCaP-capt-SI_8045-C4D5HACXX v0.0 chr9:135881632 chr9:135881632|135883078
4 chr1 117402185 117420649 139 ... VCaP-capt-SI_8045-C4D5HACXX v0.0 chr1:117402185 chr1:117402185|117420649
处理数据
pivot函数
#将长数据转为宽数据
data = df.pivot(index='circRNA', columns='sample', values='reads')
data = df.pivot(index=['circRNA','release'], columns='sample', values='reads').loc["v0.0",:]
数据的输出
df.to_csv(r'C:/Users/Alexia Lee/R_study/circRNA_reads.csv')