## RESAMPLING METHODS ## #applied to GWAS data (one genotype) # Data genos = as.matrix(read.table("./genos.txt")) phenos = as.matrix(read.table("./phenos.txt")) #make a binary group label from genotype 1500 table(genos[,1500]) group = ifelse(genos[,1500]==0,0,1) table(group) # Linear model mod = lm(phenos~group) summary(mod) # means for g1 = 0 and g1 = 1 groups mean(phenos[group==0]) mean(phenos[group==1]) m0 = mod$coef[1] m1 = sum(mod$coef) m0 m1 # Student's T test students=t.test(phenos~group,var.equal=TRUE) students m0 m1 #save the t-statistic and p-value for later stat=students$statistic pvalue=students$p.value # Permutation test sample(group) table(sample(group)) #repeat a few times mean(pheno[sample(group)==0] P=1000 tperm=NULL for(p in 1:P){ tperm[p]=t.test(phenos~sample(group),var.equal=TRUE)$statistic } #stat is positive, so first look at upper end of tperm distribution (1-sided test) mean(tperm>stat) #now do the 2-sided test, which is what t.test does by default permp=mean(abs(tperm)>stat) permp pvalue # Bootstrap test table(group[sample(1:length(group),replace=TRUE)]) #repeat a few times B=1000 tboot=NULL for(b in 1:B){ bstrap=sample(1:length(group),replace=TRUE) tboot[b]=t.test(phenos[bstrap]~group[bstrap])$statistic } #2-sided test bootp=mean(abs(tboot)>stat) #Wait - this is a huge p-value #tboot is not a null distribution yet! tbootnull=tboot-mean(tboot) bootp=mean(abs(tbootnull)>stat) bootp