annotate mlmm/MLMM.pl @ 0:6b7107812931 draft

Uploaded
author dereeper
date Thu, 02 Jul 2015 05:42:38 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6b7107812931 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
6b7107812931 Uploaded
dereeper
parents:
diff changeset
2
6b7107812931 Uploaded
dereeper
parents:
diff changeset
3 use strict;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
4 use Switch;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
5 use Getopt::Long;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
6 use Bio::SeqIO;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
7
6b7107812931 Uploaded
dereeper
parents:
diff changeset
8
6b7107812931 Uploaded
dereeper
parents:
diff changeset
9 my $usage = qq~Usage:$0 <args> [<opts>]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
10 where <args> are:
6b7107812931 Uploaded
dereeper
parents:
diff changeset
11 -g, --geno <Genotype input>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
12 -i, --info <SNP information. Genome position.>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
13 -p, --pheno <Phenotype input>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
14 -o, --out <output name>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
15 -d, --directory <directory for MLMM R libraries>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
16 -s, --step_number <number of steps. Maximum: 20. Default: 10>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
17 -m, --method <Method: mbonf or extBIC. Default: mbonf>
6b7107812931 Uploaded
dereeper
parents:
diff changeset
18 ~;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
19 $usage .= "\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
20
6b7107812931 Uploaded
dereeper
parents:
diff changeset
21 my ($geno,$map,$pheno,$out,$dir,$steps,$method);
6b7107812931 Uploaded
dereeper
parents:
diff changeset
22
6b7107812931 Uploaded
dereeper
parents:
diff changeset
23
6b7107812931 Uploaded
dereeper
parents:
diff changeset
24 GetOptions(
6b7107812931 Uploaded
dereeper
parents:
diff changeset
25 "geno=s" => \$geno,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
26 "info=s" => \$map,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
27 "pheno=s" => \$pheno,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
28 "out=s" => \$out,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
29 "dir=s" => \$dir,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
30 "steps=s" => \$steps,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
31 "method=s" => \$method
6b7107812931 Uploaded
dereeper
parents:
diff changeset
32 );
6b7107812931 Uploaded
dereeper
parents:
diff changeset
33
6b7107812931 Uploaded
dereeper
parents:
diff changeset
34
6b7107812931 Uploaded
dereeper
parents:
diff changeset
35 die $usage
6b7107812931 Uploaded
dereeper
parents:
diff changeset
36 if ( !$geno || !$map || !$pheno || !$out || !$dir || !$steps || !$method);
6b7107812931 Uploaded
dereeper
parents:
diff changeset
37
6b7107812931 Uploaded
dereeper
parents:
diff changeset
38 my $max_steps = 10;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
39 my $plot_opt = "mbonf";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
40 if ($method && $method ne 'mbonf' && $method ne 'extBIC')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
41 {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
42 print "Aborted: Method must be mbonf or extBIC.\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
43 exit;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
44 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
45 else
6b7107812931 Uploaded
dereeper
parents:
diff changeset
46 {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
47 $plot_opt = $method;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
48 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
49 if ($steps && $steps !~/\d+/ && $steps > 20 && $steps < 2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
50 {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
51 print "Aborted: Number of steps must be greater than 2 and lower than 20.\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
52 exit;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
53 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
54 else
6b7107812931 Uploaded
dereeper
parents:
diff changeset
55 {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
56 $max_steps = $steps;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
57 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
58
6b7107812931 Uploaded
dereeper
parents:
diff changeset
59
6b7107812931 Uploaded
dereeper
parents:
diff changeset
60 my $chunk = 2;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
61
6b7107812931 Uploaded
dereeper
parents:
diff changeset
62 my $RSCRIPT_EXE = "Rscript";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
63 my $R_DIR = $dir;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
64
6b7107812931 Uploaded
dereeper
parents:
diff changeset
65
6b7107812931 Uploaded
dereeper
parents:
diff changeset
66 my $head_trait = `head -1 $pheno`;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
67 my @headers_traits = split(/\t/,$head_trait);
6b7107812931 Uploaded
dereeper
parents:
diff changeset
68 my $trait_name = $headers_traits[1];
6b7107812931 Uploaded
dereeper
parents:
diff changeset
69
6b7107812931 Uploaded
dereeper
parents:
diff changeset
70
6b7107812931 Uploaded
dereeper
parents:
diff changeset
71 open( my $RCMD, ">rscript" ) or throw Error::Simple($!);
6b7107812931 Uploaded
dereeper
parents:
diff changeset
72
6b7107812931 Uploaded
dereeper
parents:
diff changeset
73
6b7107812931 Uploaded
dereeper
parents:
diff changeset
74
6b7107812931 Uploaded
dereeper
parents:
diff changeset
75
6b7107812931 Uploaded
dereeper
parents:
diff changeset
76 print $RCMD "Y_file <- \"" . $pheno . "\"\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
77 print $RCMD "X_file <- \"" . $geno . "\"\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
78 if($map)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
79 {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
80 print $RCMD "map_file <- \"$map\"\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
81 print $RCMD "map <- read.table(map_file, sep = \"\\t\", header = T)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
82 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
83 print $RCMD "mlmm_data = list()\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
84 print $RCMD "mlmm_data\$chunk <- $chunk\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
85 print $RCMD "mlmm_data\$maxsteps <- $max_steps\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
86 print $RCMD "genot <- read.table(X_file, sep = \"\\t\", header = T)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
87 print $RCMD "genot_mat <- as.matrix(genot[, 2:ncol(genot)])\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
88 print $RCMD "rownames(genot_mat) <- genot\$Ind_id\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
89
6b7107812931 Uploaded
dereeper
parents:
diff changeset
90 print $RCMD "phenot <- read.table(Y_file, sep = \"\\t\", header = T)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
91
6b7107812931 Uploaded
dereeper
parents:
diff changeset
92
6b7107812931 Uploaded
dereeper
parents:
diff changeset
93
6b7107812931 Uploaded
dereeper
parents:
diff changeset
94 # missing data imputation
6b7107812931 Uploaded
dereeper
parents:
diff changeset
95 print $RCMD "genot_imp <- genot_mat\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
96 print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
97 print $RCMD "for (i in 1:ncol(genot_imp)){genot_imp[is.na(genot_imp[,i]), i] <- average[i]}\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
98
6b7107812931 Uploaded
dereeper
parents:
diff changeset
99 # kinship matrix computation
6b7107812931 Uploaded
dereeper
parents:
diff changeset
100 print $RCMD "average <- colMeans(genot_imp, na.rm = T)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
101 print $RCMD "stdev <- apply(genot_imp, 2, sd)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
102 print $RCMD "genot_stand <- sweep(sweep(genot_imp, 2, average, \"-\"), 2, stdev, \"/\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
103 print $RCMD "K_mat <- (genot_stand %*% t(genot_stand)) / ncol(genot_stand)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
104 print $RCMD "write.table(K_mat, '$out.kinship', sep='\\t', dec='.', quote=F, col.names=T, row.names=T)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
105
6b7107812931 Uploaded
dereeper
parents:
diff changeset
106 print $RCMD "source(\"" . $R_DIR. "/mlmm.r\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
107 print $RCMD "source(\"" . $R_DIR. "/emma.r\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
108
6b7107812931 Uploaded
dereeper
parents:
diff changeset
109 # mlmm
6b7107812931 Uploaded
dereeper
parents:
diff changeset
110 print $RCMD "mygwas <- mlmm(Y = phenot\$$trait_name, X = genot_imp, K = K_mat, nbchunks=mlmm_data\$chunk, maxsteps=mlmm_data\$maxsteps)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
111
6b7107812931 Uploaded
dereeper
parents:
diff changeset
112 # plots
6b7107812931 Uploaded
dereeper
parents:
diff changeset
113 print $RCMD "pdf('$out.pdf')\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
114 print $RCMD "plot_step_table(mygwas, \"h2\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
115 print $RCMD "plot_step_table(mygwas, \"extBIC\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
116 print $RCMD "plot_step_table(mygwas, \"maxpval\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
117 print $RCMD "plot_step_RSS(mygwas)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
118 # for (my $j = 1; $j <= ($max_steps - 1); $j++)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
119 # {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
120 # print $RCMD "plot_fwd_GWAS(mygwas, step = $j, snp_info = map, pval_filt = 0.1)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
121 # }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
122 print $RCMD "plot_opt_GWAS(mygwas, opt = \"extBIC\", snp_info = map, pval_filt = 0.1)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
123 print $RCMD "plot_opt_GWAS(mygwas, opt = \"mbonf\", snp_info = map, pval_filt = 0.1)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
124 #print $RCMD "qqplot_fwd_GWAS(mygwas, nsteps = mlmm_data\$maxsteps)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
125 print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"extBIC\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
126 print $RCMD "qqplot_opt_GWAS(mygwas, opt = \"mbonf\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
127
6b7107812931 Uploaded
dereeper
parents:
diff changeset
128 # outputs
6b7107812931 Uploaded
dereeper
parents:
diff changeset
129 print $RCMD "write.table(mygwas\$RSSout, '$out.rss', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
130 print $RCMD "write.table(mygwas\$step_table, '$out.steptable', sep='\\t', dec='.', quote=F, col.names=T, row.names=F)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
131
6b7107812931 Uploaded
dereeper
parents:
diff changeset
132 $plot_opt = "\$opt_" . $plot_opt;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
133 print $RCMD "pval = mygwas" . $plot_opt . "\$out\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
134 print $RCMD "colnames(pval) = c(\"Marker_name\", \"Pvalue\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
135 print $RCMD "info_tmp = map\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
136 print $RCMD "colnames(info_tmp) = c(\"Marker_name\", \"Chr\", \"Pos\")\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
137 print $RCMD "res_asso = pval\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
138 print $RCMD qq~
6b7107812931 Uploaded
dereeper
parents:
diff changeset
139 if(exists("info_tmp")){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
140 res_asso = merge(info_tmp, res_asso, by="Marker_name")
6b7107812931 Uploaded
dereeper
parents:
diff changeset
141 if( !is.element("Trait", colnames(info_tmp)) ){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
142 m = matrix(data="traitname", ncol=1, nrow=nrow(res_asso), dimnames=list(c(), c("Trait")))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
143 res_asso = cbind(m, res_asso)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
144 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
145 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
146 ~;
6b7107812931 Uploaded
dereeper
parents:
diff changeset
147 print $RCMD "res_asso = res_asso[order(res_asso[, \"Trait\"], res_asso[, \"Chr\"], res_asso[, \"Pos\"]), ]\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
148 print $RCMD "write.table(res_asso, '$out.res_asso', sep='\t', dec='.', quote=F, col.names=T, row.names=F)\n";
6b7107812931 Uploaded
dereeper
parents:
diff changeset
149 close($RCMD);
6b7107812931 Uploaded
dereeper
parents:
diff changeset
150
6b7107812931 Uploaded
dereeper
parents:
diff changeset
151 system("$RSCRIPT_EXE --vanilla rscript");
6b7107812931 Uploaded
dereeper
parents:
diff changeset
152
6b7107812931 Uploaded
dereeper
parents:
diff changeset
153
6b7107812931 Uploaded
dereeper
parents:
diff changeset
154
6b7107812931 Uploaded
dereeper
parents:
diff changeset
155