Mercurial > repos > dereeper > mlmm
comparison mlmm/MLMM.pl @ 0:6b7107812931 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 02 Jul 2015 05:42:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6b7107812931 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Switch; | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 | |
9 my $usage = qq~Usage:$0 <args> [<opts>] | |
10 where <args> are: | |
11 -g, --geno <Genotype input> | |
12 -i, --info <SNP information. Genome position.> | |
13 -p, --pheno <Phenotype input> | |
14 -o, --out <output name> | |
15 -d, --directory <directory for MLMM R libraries> | |
16 -s, --step_number <number of steps. Maximum: 20. Default: 10> | |
17 -m, --method <Method: mbonf or extBIC. Default: mbonf> | |
18 ~; | |
19 $usage .= "\n"; | |
20 | |
21 my ($geno,$map,$pheno,$out,$dir,$steps,$method); | |
22 | |
23 | |
24 GetOptions( | |
25 "geno=s" => \$geno, | |
26 "info=s" => \$map, | |
27 "pheno=s" => \$pheno, | |
28 "out=s" => \$out, | |
29 "dir=s" => \$dir, | |
30 "steps=s" => \$steps, | |
31 "method=s" => \$method | |
32 ); | |
33 | |
34 | |
35 die $usage | |
36 if ( !$geno || !$map || !$pheno || !$out || !$dir || !$steps || !$method); | |
37 | |
38 my $max_steps = 10; | |
39 my $plot_opt = "mbonf"; | |
40 if ($method && $method ne 'mbonf' && $method ne 'extBIC') | |
41 { | |
42 print "Aborted: Method must be mbonf or extBIC.\n"; | |
43 exit; | |
44 } | |
45 else | |
46 { | |
47 $plot_opt = $method; | |
48 } | |
49 if ($steps && $steps !~/\d+/ && $steps > 20 && $steps < 2) | |
50 { | |
51 print "Aborted: Number of steps must be greater than 2 and lower than 20.\n"; | |
52 exit; | |
53 } | |
54 else | |
55 { | |
56 $max_steps = $steps; | |
57 } | |
58 | |
59 | |
60 my $chunk = 2; | |
61 | |
62 my $RSCRIPT_EXE = "Rscript"; | |
63 my $R_DIR = $dir; | |
64 | |
65 | |
66 my $head_trait = `head -1 $pheno`; | |
67 my @headers_traits = split(/\t/,$head_trait); | |
68 my $trait_name = $headers_traits[1]; | |
69 | |
70 | |
71 open( my $RCMD, ">rscript" ) or throw Error::Simple($!); | |
72 | |
73 | |
74 | |
75 | |
76 print $RCMD "Y_file <- \"" . $pheno . "\"\n"; | |
77 print $RCMD "X_file <- \"" . $geno . "\"\n"; | |
78 if($map) | |
79 { | |
80 print $RCMD "map_file <- \"$map\"\n"; | |
81 print $RCMD "map <- read.table(map_file, sep = \"\\t\", header = T)\n"; | |
82 } | |
83 print $RCMD "mlmm_data = list()\n"; | |
84 print $RCMD "mlmm_data\$chunk <- $chunk\n"; | |
85 print $RCMD "mlmm_data\$maxsteps <- $max_steps\n"; | |
86 print $RCMD "genot <- read.table(X_file, sep = \"\\t\", header = T)\n"; | |
87 print $RCMD "genot_mat <- as.matrix(genot[, 2:ncol(genot)])\n"; | |
88 print $RCMD "rownames(genot_mat) <- genot\$Ind_id\n"; | |
89 | |
90 print $RCMD "phenot <- read.table(Y_file, sep = \"\\t\", header = T)\n"; | |
91 | |
92 | |
93 | |
94 # missing data imputation | |
95 print $RCMD "genot_imp <- genot_mat\n"; | |
96 print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n"; | |
97 print $RCMD "for (i in 1:ncol(genot_imp)){genot_imp[is.na(genot_imp[,i]), i] <- average[i]}\n"; | |
98 | |
99 # kinship matrix computation | |
100 print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n"; | |
101 print $RCMD "stdev <- apply(genot_imp, 2, sd)\n"; | |
102 print $RCMD "genot_stand <- sweep(sweep(genot_imp, 2, average, \"-\"), 2, stdev, \"/\")\n"; | |
103 print $RCMD "K_mat <- (genot_stand %*% t(genot_stand)) / ncol(genot_stand)\n"; | |
104 print $RCMD "write.table(K_mat, '$out.kinship', sep='\\t', dec='.', quote=F, col.names=T, row.names=T)\n"; | |
105 | |
106 print $RCMD "source(\"" . $R_DIR. "/mlmm.r\")\n"; | |
107 print $RCMD "source(\"" . $R_DIR. "/emma.r\")\n"; | |
108 | |
109 # mlmm | |
110 print $RCMD "mygwas <- mlmm(Y = phenot\$$trait_name, X = genot_imp, K = K_mat, nbchunks=mlmm_data\$chunk, maxsteps=mlmm_data\$maxsteps)\n"; | |
111 | |
112 # plots | |
113 print $RCMD "pdf('$out.pdf')\n"; | |
114 print $RCMD "plot_step_table(mygwas, \"h2\")\n"; | |
115 print $RCMD "plot_step_table(mygwas, \"extBIC\")\n"; | |
116 print $RCMD "plot_step_table(mygwas, \"maxpval\")\n"; | |
117 print $RCMD "plot_step_RSS(mygwas)\n"; | |
118 # for (my $j = 1; $j <= ($max_steps - 1); $j++) | |
119 # { | |
120 # print $RCMD "plot_fwd_GWAS(mygwas, step = $j, snp_info = map, pval_filt = 0.1)\n"; | |
121 # } | |
122 print $RCMD "plot_opt_GWAS(mygwas, opt = \"extBIC\", snp_info = map, pval_filt = 0.1)\n"; | |
123 print $RCMD "plot_opt_GWAS(mygwas, opt = \"mbonf\", snp_info = map, pval_filt = 0.1)\n"; | |
124 #print $RCMD "qqplot_fwd_GWAS(mygwas, nsteps = mlmm_data\$maxsteps)\n"; | |
125 print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"extBIC\")\n"; | |
126 print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"mbonf\")\n"; | |
127 | |
128 # outputs | |
129 print $RCMD "write.table(mygwas\$RSSout, '$out.rss', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n"; | |
130 print $RCMD "write.table(mygwas\$step_table, '$out.steptable', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n"; | |
131 | |
132 $plot_opt = "\$opt_" . $plot_opt; | |
133 print $RCMD "pval = mygwas" . $plot_opt . "\$out\n"; | |
134 print $RCMD "colnames(pval) = c(\"Marker_name\", \"Pvalue\")\n"; | |
135 print $RCMD "info_tmp = map\n"; | |
136 print $RCMD "colnames(info_tmp) = c(\"Marker_name\", \"Chr\", \"Pos\")\n"; | |
137 print $RCMD "res_asso = pval\n"; | |
138 print $RCMD qq~ | |
139 if(exists("info_tmp")){ | |
140 res_asso = merge(info_tmp, res_asso, by="Marker_name") | |
141 if( !is.element("Trait", colnames(info_tmp)) ){ | |
142 m = matrix(data="traitname", ncol=1, nrow=nrow(res_asso), dimnames=list(c(), c("Trait"))) | |
143 res_asso = cbind(m, res_asso) | |
144 } | |
145 } | |
146 ~; | |
147 print $RCMD "res_asso = res_asso[order(res_asso[, \"Trait\"], res_asso[, \"Chr\"], res_asso[, \"Pos\"]), ]\n"; | |
148 print $RCMD "write.table(res_asso, '$out.res_asso', sep='\t', dec='.', quote=F, col.names=T, row.names=F)\n"; | |
149 close($RCMD); | |
150 | |
151 system("$RSCRIPT_EXE --vanilla rscript"); | |
152 | |
153 | |
154 | |
155 |