Mercurial > repos > dereeper > mlmm
comparison MLMM.pl @ 1:380b364980f9 draft default tip
planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
| author | dereeper |
|---|---|
| date | Mon, 16 Apr 2018 08:50:05 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:6b7107812931 | 1:380b364980f9 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 use Getopt::Long; | |
| 5 use Bio::SeqIO; | |
| 6 | |
| 7 | |
| 8 my $usage = qq~Usage:$0 <args> [<opts>] | |
| 9 where <args> are: | |
| 10 -g, --geno <Genotype input> | |
| 11 -i, --info <SNP information. Genome position.> | |
| 12 -p, --pheno <Phenotype input> | |
| 13 -o, --out <output name> | |
| 14 -d, --directory <directory for MLMM R libraries> | |
| 15 -s, --step_number <number of steps. Maximum: 20. Default: 10> | |
| 16 -m, --method <Method: mbonf or extBIC. Default: mbonf> | |
| 17 ~; | |
| 18 $usage .= "\n"; | |
| 19 | |
| 20 my ($geno,$map,$pheno,$out,$dir,$steps,$method); | |
| 21 | |
| 22 | |
| 23 GetOptions( | |
| 24 "geno=s" => \$geno, | |
| 25 "info=s" => \$map, | |
| 26 "pheno=s" => \$pheno, | |
| 27 "out=s" => \$out, | |
| 28 "dir=s" => \$dir, | |
| 29 "steps=s" => \$steps, | |
| 30 "method=s" => \$method | |
| 31 ); | |
| 32 | |
| 33 | |
| 34 die $usage | |
| 35 if ( !$geno || !$map || !$pheno || !$out || !$dir || !$steps || !$method); | |
| 36 | |
| 37 my $max_steps = 10; | |
| 38 my $plot_opt = "mbonf"; | |
| 39 if ($method && $method ne 'mbonf' && $method ne 'extBIC') | |
| 40 { | |
| 41 print "Aborted: Method must be mbonf or extBIC.\n"; | |
| 42 exit; | |
| 43 } | |
| 44 else | |
| 45 { | |
| 46 $plot_opt = $method; | |
| 47 } | |
| 48 if ($steps && $steps !~/\d+/ && $steps > 20 && $steps < 2) | |
| 49 { | |
| 50 print "Aborted: Number of steps must be greater than 2 and lower than 20.\n"; | |
| 51 exit; | |
| 52 } | |
| 53 else | |
| 54 { | |
| 55 $max_steps = $steps; | |
| 56 } | |
| 57 | |
| 58 | |
| 59 my $chunk = 2; | |
| 60 | |
| 61 my $RSCRIPT_EXE = "Rscript"; | |
| 62 my $R_DIR = $dir; | |
| 63 | |
| 64 | |
| 65 my $head_trait = `head -1 $pheno`; | |
| 66 my @headers_traits = split(/\t/,$head_trait); | |
| 67 my $trait_name = $headers_traits[1]; | |
| 68 | |
| 69 | |
| 70 open( my $RCMD, ">rscript" ) or throw Error::Simple($!); | |
| 71 | |
| 72 | |
| 73 | |
| 74 | |
| 75 print $RCMD "Y_file <- \"" . $pheno . "\"\n"; | |
| 76 print $RCMD "X_file <- \"" . $geno . "\"\n"; | |
| 77 if($map) | |
| 78 { | |
| 79 print $RCMD "map_file <- \"$map\"\n"; | |
| 80 print $RCMD "map <- read.table(map_file, sep = \"\\t\", header = T)\n"; | |
| 81 } | |
| 82 print $RCMD "mlmm_data = list()\n"; | |
| 83 print $RCMD "mlmm_data\$chunk <- $chunk\n"; | |
| 84 print $RCMD "mlmm_data\$maxsteps <- $max_steps\n"; | |
| 85 print $RCMD "genot <- read.table(X_file, sep = \"\\t\", header = T)\n"; | |
| 86 print $RCMD "genot_mat <- as.matrix(genot[, 2:ncol(genot)])\n"; | |
| 87 print $RCMD "rownames(genot_mat) <- genot\$Ind_id\n"; | |
| 88 | |
| 89 print $RCMD "phenot <- read.table(Y_file, sep = \"\\t\", header = T)\n"; | |
| 90 | |
| 91 | |
| 92 | |
| 93 # missing data imputation | |
| 94 print $RCMD "genot_imp <- genot_mat\n"; | |
| 95 print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n"; | |
| 96 print $RCMD "for (i in 1:ncol(genot_imp)){genot_imp[is.na(genot_imp[,i]), i] <- average[i]}\n"; | |
| 97 | |
| 98 # kinship matrix computation | |
| 99 print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n"; | |
| 100 print $RCMD "stdev <- apply(genot_imp, 2, sd)\n"; | |
| 101 print $RCMD "genot_stand <- sweep(sweep(genot_imp, 2, average, \"-\"), 2, stdev, \"/\")\n"; | |
| 102 print $RCMD "K_mat <- (genot_stand %*% t(genot_stand)) / ncol(genot_stand)\n"; | |
| 103 print $RCMD "write.table(K_mat, '$out.kinship', sep='\\t', dec='.', quote=F, col.names=T, row.names=T)\n"; | |
| 104 | |
| 105 print $RCMD "source(\"" . $R_DIR. "/mlmm.r\")\n"; | |
| 106 print $RCMD "source(\"" . $R_DIR. "/emma.r\")\n"; | |
| 107 | |
| 108 # mlmm | |
| 109 print $RCMD "mygwas <- mlmm(Y = phenot\$$trait_name, X = genot_imp, K = K_mat, nbchunks=mlmm_data\$chunk, maxsteps=mlmm_data\$maxsteps)\n"; | |
| 110 | |
| 111 # plots | |
| 112 print $RCMD "pdf('$out.pdf')\n"; | |
| 113 print $RCMD "plot_step_table(mygwas, \"h2\")\n"; | |
| 114 print $RCMD "plot_step_table(mygwas, \"extBIC\")\n"; | |
| 115 print $RCMD "plot_step_table(mygwas, \"maxpval\")\n"; | |
| 116 print $RCMD "plot_step_RSS(mygwas)\n"; | |
| 117 # for (my $j = 1; $j <= ($max_steps - 1); $j++) | |
| 118 # { | |
| 119 # print $RCMD "plot_fwd_GWAS(mygwas, step = $j, snp_info = map, pval_filt = 0.1)\n"; | |
| 120 # } | |
| 121 print $RCMD "plot_opt_GWAS(mygwas, opt = \"extBIC\", snp_info = map, pval_filt = 0.1)\n"; | |
| 122 print $RCMD "plot_opt_GWAS(mygwas, opt = \"mbonf\", snp_info = map, pval_filt = 0.1)\n"; | |
| 123 #print $RCMD "qqplot_fwd_GWAS(mygwas, nsteps = mlmm_data\$maxsteps)\n"; | |
| 124 print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"extBIC\")\n"; | |
| 125 print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"mbonf\")\n"; | |
| 126 | |
| 127 # outputs | |
| 128 print $RCMD "write.table(mygwas\$RSSout, '$out.rss', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n"; | |
| 129 print $RCMD "write.table(mygwas\$step_table, '$out.steptable', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n"; | |
| 130 | |
| 131 $plot_opt = "\$opt_" . $plot_opt; | |
| 132 print $RCMD "pval = mygwas" . $plot_opt . "\$out\n"; | |
| 133 print $RCMD "colnames(pval) = c(\"Marker_name\", \"Pvalue\")\n"; | |
| 134 print $RCMD "info_tmp = map\n"; | |
| 135 print $RCMD "colnames(info_tmp) = c(\"Marker_name\", \"Chr\", \"Pos\")\n"; | |
| 136 print $RCMD "res_asso = pval\n"; | |
| 137 print $RCMD qq~ | |
| 138 if(exists("info_tmp")){ | |
| 139 res_asso = merge(info_tmp, res_asso, by="Marker_name") | |
| 140 if( !is.element("Trait", colnames(info_tmp)) ){ | |
| 141 m = matrix(data="traitname", ncol=1, nrow=nrow(res_asso), dimnames=list(c(), c("Trait"))) | |
| 142 res_asso = cbind(m, res_asso) | |
| 143 } | |
| 144 } | |
| 145 ~; | |
| 146 print $RCMD "res_asso = res_asso[order(res_asso[, \"Trait\"], res_asso[, \"Chr\"], res_asso[, \"Pos\"]), ]\n"; | |
| 147 print $RCMD "write.table(res_asso, '$out.res_asso', sep='\t', dec='.', quote=F, col.names=T, row.names=F)\n"; | |
| 148 close($RCMD); | |
| 149 | |
| 150 system("$RSCRIPT_EXE --vanilla rscript"); | |
| 151 | |
| 152 | |
| 153 | |
| 154 |
