指定列数拆表
将表格拆成94份,每份50列,剩下的作为第95份,保存
1 读表,第一列作为行名
2 1-50,51-100,,,依次取列
3 取剩下的列
source /public/home/zzumgg03/huty/softwares/miniconda3/etc/profile.d/conda.sh
conda activate r403
# 拆分物种100份
df = read.table("01_input/data_relative_cutoff_CLR.txt", header=T, sep="\t", row.names=1)
for(i in 1:94)
{
start = 1
end = 50
tmp = df[,c(start:end)]
name = paste("02_batch/per50_batch",i,"txt", sep=".")
write.table(tmp, file=name, sep="\t", quote=F)
start = start + 50
end = end + 50
}
start = 94*50 + 1
end = ncol(df)
tmp = df[,c(start:end)]
name = paste("02_batch/per50_batch.95.txt", sep=".")
write.table(tmp, file=name, sep="\t", quote=F)
一对相关分析
#!/usr/bin/env Rscript
args <- commandArgs(T)
# args[1] 一个个分批表
# args[2] 对应的一个个输出结果
library(psych)
library(stringr)
# input
df = read.table(args[1], sep="\t", header=T, row.names=1)
apd = read.table("01_input/gene_tpm_apd.txt", header=T, sep="\t", row.names=1)
apd = data.frame(t(apd))
# function
correlate = function(df1, df2, output)
{
result=data.frame(print(corr.test(df1, df2, use="pairwise", method="spearman", adjust="none", alpha=.05, ci=TRUE, minlength=100), short=FALSE, digits=20))
# 整理结果
pair=rownames(result) # 行名
result2=data.frame(pair, result[, c(2, 4)]) # 提取信息
# 格式化结果, 将细菌代谢物拆成两列
result3=data.frame(str_split_fixed(result2$pair, "-", 2),
result2[, c(2, 3)])
colnames(result3)=c("feature_1", "feature_2", "r_value", "p_value")
# save
write.table(result3,
file = output,
sep="\t", row.names=F, quote=F)
}
correlate(apd, df, args[2])
多对相关脚本
#!/bin/bash
#SBATCH -p fat
#SBATCH --nodes=1
#SBATCH --ntasks=30
#SBATCH --ntasks-per-node=30
#SBATCH --mem=220G
source /public/home/zzumgg03/huty/softwares/miniconda3/etc/profile.d/conda.sh
conda activate r403
Rscript ./sc_asso.R 02_batch/per50_batch.1.txt 03_out/asso50_batch.1.txt &
Rscript ./sc_asso.R 02_batch/per50_batch.2.txt 03_out/asso50_batch.2.txt &
Rscript ./sc_asso.R 02_batch/per50_batch.3.txt 03_out/asso50_batch.3.txt &
...
网友评论