annotate velvet_optimiser_wrapper_vlsci.pl @ 2:28d2dd0f048b draft

Uploaded
author simon-gladman
date Tue, 05 Feb 2013 19:19:37 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
1 #!/usr/bin/perl
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
2
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
3 # velvet_optimiser_vlsci.pl
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
4 #
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
5 # Copyright 2012 Simon <simon@Hyperion>
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
6 #
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
7 # This program is free software; you can redistribute it and/or modify
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
8 # it under the terms of the GNU General Public License as published by
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
9 # the Free Software Foundation; either version 2 of the License, or
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
10 # (at your option) any later version.
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
11 #
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
12 # This program is distributed in the hope that it will be useful,
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
15 # GNU General Public License for more details.
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
16 #
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
18 # along with this program; if not, write to the Free Software
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
20 # MA 02110-1301, USA.
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
21 #
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
22 #
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
23
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
24 use strict;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
25 use warnings;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
26 use File::Copy;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
27
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
28 my @stuff = @ARGV;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
29 my %counts;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
30 my %shortMPs;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
31 my @reads;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
32 my $threadsToUse = 2; #According to Enis, this is the default number of threads that multithreaded programs use..
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
33
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
34 #TO DO! get velvet compile parameters!
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
35
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
36 foreach my $param (@stuff){
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
37 if($param =~ m/galaxy\.datatypes/){
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
38 $param =~m/datatypes\..+\.(.+)\s+object/;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
39 my $type = lc($1);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
40 print "$type\n";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
41 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
42 else {
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
43 print $param . "\n";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
44 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
45 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
46
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
47 #try and split some of it up..
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
48 #kmer stuff
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
49 my $sk = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
50 my $ek = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
51 my $xk = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
52
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
53 #TO DO! do checks etc on this stuff here!
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
54
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
55 #get the read file stuff
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
56 my $i = 0;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
57
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
58 while(!($stuff[0] =~ m/other:/)){
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
59 my $index = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
60 my $read_type = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
61 my $sep = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
62 my $file_type = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
63 $file_type =~m/datatypes\..+\.(.+)\s+object/;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
64 $file_type = lc($1);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
65 $file_type = "fastq" if $file_type =~ m/fastq/i;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
66 $counts{$read_type} ++;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
67 my $ffile = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
68 my $rfile = shift @stuff if $sep eq "True";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
69 my $sMP = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
70 $reads[$i] = "-$read_type";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
71 $reads[$i] .= $counts{$read_type} if $counts{$read_type} > 1;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
72 if ($sMP eq "shortMP_lib"){
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
73 my $temp = "-shortMatePaired";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
74 $temp .= $counts{$read_type} if $counts{$read_type} > 1;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
75 $shortMPs{$temp} = 1;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
76 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
77 $reads[$i] .= " -$file_type";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
78 $reads[$i] .= " -separate" if $sep eq "True";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
79 $reads[$i] .= " $ffile";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
80 $reads[$i] .= " $rfile" if $sep eq "True";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
81 $i ++;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
82 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
83
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
84 #get the other stuff
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
85 shift @stuff if $stuff[0] =~ m/other:/;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
86 my $amos = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
87 my $verbose = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
88
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
89 #get the advanced stuff if it exists!
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
90 my ($oFK, $oFC, $vgO, $minC, $maxC);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
91 if ($stuff[0] =~ m/advanced:/) {
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
92 shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
93 $oFK = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
94 $oFC = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
95 $vgO = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
96 $minC = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
97 $maxC = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
98 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
99
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
100 #get the output file names!
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
101 my $contigs_outfile = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
102 my $stats_outfile = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
103 my $afg_outfile = shift @stuff if $amos eq "amos";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
104 my $logfile = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
105 my $stderr = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
106 my $lgraph = shift @stuff;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
107
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
108 #build the command line...
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
109
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
110 my $cmd = "VelvetOptimiser.pl -d temp_vgo_files -s $sk -e $ek -x $xk";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
111 $cmd .= " -a" if($amos eq "amos");
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
112 $cmd .= " -v" if($verbose eq "verbose");
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
113
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
114 $cmd .= " -f '";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
115 foreach my $line (@reads){
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
116 $cmd .= " $line";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
117 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
118 $cmd .= "'";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
119
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
120 my $vgopt = $vgO;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
121 foreach my $key(keys %shortMPs){
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
122 $vgopt .= " $key" if $key;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
123 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
124
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
125 $cmd .= " -o '$vgopt'" if $vgopt;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
126
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
127 $cmd .= " -k $oFK" if $oFK;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
128 $cmd .= " -c $oFC" if $oFC;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
129 $cmd .= " -m $minC" if $minC;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
130 $cmd .= " -z $maxC" if $maxC;
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
131 $cmd .= " -t $threadsToUse";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
132
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
133 $cmd .= " 2> $stderr";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
134
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
135 print "\n$cmd\n";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
136
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
137 print "about to run the command!";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
138
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
139 if(system($cmd) == 0) {
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
140 #copy the files to the new places and delete the directory..
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
141 print "Copying output\n";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
142 copy("temp_vgo_files/contigs.fa", $contigs_outfile);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
143 copy("temp_vgo_files/stats.txt", $stats_outfile);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
144 copy(glob("temp_vgo_files/*Logfile.txt"), $logfile);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
145 copy("temp_vgo_files/velvet_asm.afg", $afg_outfile) if $amos eq "amos";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
146 copy("temp_vgo_files/LastGraph", $lgraph);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
147 system("rm -rf temp_vgo_files") == 0 or die "Couldn't delete temporary directory. $!";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
148 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
149 else {
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
150 print "There was a velvet optimiser error\n";
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
151 exit(1);
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
152 }
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
153
28d2dd0f048b Uploaded
simon-gladman
parents:
diff changeset
154 exit(0);