实际研究中,Fst为0~0.05:群体间遗传分化很小,可以不考虑;
Fst为0.05~0.15,群体间存在中等程度的遗传分化;
Fst为0.15~0.25,群体间遗传分化较大;
Fst为0.25以上,群体间有很大的遗传分化。
大召唤术
# encoding:UTF-8
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
from matplotlib import rcParams
import math
import time
plt.rcParams['font.family']=['cmex10']
使用pca去除品系混合的个体,提取这些个体再做fst
pca_result_fst_法=pca_result_2[pca_result_2[3]>0.0352]
pca_result_fst_丹=pca_result_2[pca_result_2[2]>0.01]
pca_result_fst_加=pca_result_2[(pca_result_2[3]<0.0078)&(pca_result_2[2]<-0.0069)]
pca_result_fst=pd.concat([pca_result_fst_法,pca_result_fst_丹,pca_result_fst_加])
pca_result_fst[['0_x',1,"breed"]].to_csv(r'E:\新希望六和\系谱校正\长白亲缘关系\LL_2304id_3pinxi.txt',index=None,header=None,sep='\t')
使用plink质控,使用gcta计算fst
cd .\长白亲缘关系
plink --bfile all_LL_2423id --keep LL_2304id.txt --make-bed --out LL_2304id
gcta64 --bfile LL_2304id --fst --autosome-num 36 --sub-popu LL_2304id_3pinxi.txt --out LL_2304id_3pinxi
柱状FST,较慢
def fst_bar(filein):
data0 = pd.read_csv(open(filein),sep='\s+',header=0)
# 剔除fst为空、染色体位置未知、性染色体上的位点
data1=data0[data0['Fst']!='-nan(ind)']
data2=data1[((data1['Chr']!=0)&(data1['Chr']!=23)&(data1['Chr']!=24))]
datasort=data2.groupby('Chr',sort = True).apply(lambda x:x.sort_values('bp',ascending=True)).reset_index(drop=True)
datasort['Fst']=datasort['Fst'].astype(float)
datasort['Chr']=datasort['Chr'].astype(str)
# 设置x轴染色体名称
datasort=datasort.reset_index(drop=True)
datasort["i"]=datasort.index
chrom_df = datasort.groupby("Chr")["i"].median()
# 设置每条染色体颜色
colordict={'1':'#F08080','2':'#F4A460','3':'#FFD700','4':'#8FBC8F','5':'#228B22','6':'#3CB371','7':'#40E0D0','8':'#008B8B','9':'#AFEEEE',
'10':'#5F9EA0','11':'#87CEEB','12':'#4682B4','13':'#B0C4DE','14':'#6A5ACD','15':'#9370DB','16':'#E7BBDC','17':'#FFCEDA','18':'#F6A8A5'}
def func(x):
return colordict[x['Chr']]
colorlist = datasort.apply(func,axis=1)
# 横轴显示染色体编号
chr_list=[1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 2, 3, 4, 5, 6, 7, 8, 9]
print(filein)
print("Fst均值:",datasort['Fst'].mean())
a=time.time()
plt.figure(figsize=(20,5),dpi=80)
plt.ylim(0,1)
# 箱线图
plt.bar(np.arange(len(datasort)),datasort['Fst'],1,color=colorlist)
plt.xticks(chrom_df,labels=chr_list)
plt.show()
b=time.time()
print(b-a)
return datasort
filein=r".\长白亲缘关系\LL_2304id_3pinxi.fst"
fst_bar(filein)
image.png
散点图,较快
def fst_scatter(filein):
data0 = pd.read_csv(open(filein),sep='\s+',header=0)
# 剔除fst为空、染色体位置未知、性染色体上的位点
data1=data0[data0['Fst']!='-nan(ind)']
data2=data1[((data1['Chr']!=0)&(data1['Chr']!=23)&(data1['Chr']!=24))]
datasort=data2.groupby('Chr',sort = True).apply(lambda x:x.sort_values('bp',ascending=True)).reset_index(drop=True)
datasort['Fst']=datasort['Fst'].astype(float)
datasort['Chr']=datasort['Chr'].astype(str)
# 设置x轴染色体名称
datasort=datasort.reset_index(drop=True)
datasort["i"]=datasort.index
chrom_df = datasort.groupby("Chr")["i"].median()
# 设置每条染色体颜色
colordict={'1':'#F08080','2':'#F4A460','3':'#FFD700','4':'#8FBC8F','5':'#228B22','6':'#3CB371','7':'#40E0D0','8':'#008B8B','9':'#AFEEEE',
'10':'#5F9EA0','11':'#87CEEB','12':'#4682B4','13':'#B0C4DE','14':'#6A5ACD','15':'#9370DB','16':'#E7BBDC','17':'#FFCEDA','18':'#F6A8A5'}
def func(x):
return colordict[x['Chr']]
colorlist = datasort.apply(func,axis=1)
# 横轴显示染色体编号
chr_list=[1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 2, 3, 4, 5, 6, 7, 8, 9]
print(filein)
print("Fst均值:",datasort['Fst'].mean())
a=time.time()
plt.figure(figsize=(20,5),dpi=80)
plt.ylim(0,1)
# 箱线图
plt.scatter(np.arange(len(datasort)),datasort['Fst'],1,color=colorlist)
plt.xticks(chrom_df,labels=chr_list)
plt.show()
b=time.time()
print(b-a)
return datasort
filein=r".\长白亲缘关系\LL_2304id_3pinxi.fst"
datasort=fst_scatter(filein)
image.png
获取前5%的位点
# 取前5%的位点
datasort['Fst'].quantile(0.95)
fst_quantile5=datasort[datasort['Fst']>0.397559]
fst_quantile5
image.png
# fst前5%的位点的散点图
fst_quantile5=fst_quantile5.reset_index(drop=True)
colordict={'1':'#F08080','2':'#F4A460','3':'#FFD700','4':'#8FBC8F','5':'#228B22','6':'#3CB371','7':'#40E0D0','8':'#008B8B','9':'#AFEEEE',
'10':'#5F9EA0','11':'#87CEEB','12':'#4682B4','13':'#B0C4DE','14':'#6A5ACD','15':'#9370DB','16':'#E7BBDC','17':'#FFCEDA','18':'#F6A8A5'}
def func(x):
return colordict[x['Chr']]
colorlist = fst_quantile5.apply(func,axis=1)
# 横轴显示染色体编号
chr_list=[1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 2, 3, 4, 5, 6, 7, 8, 9]
fst_quantile5["i"]=fst_quantile5.index
chrom_df = fst_quantile5.groupby("Chr")["i"].median()
plt.figure(figsize=(20,5),dpi=80)
plt.ylim(0.35,1)
plt.scatter(np.arange(len(fst_quantile5)),fst_quantile5['Fst'],1,color=colorlist)
plt.xticks(chrom_df,labels=chr_list)
plt.show()
image.png