annotate srst2.pl @ 0:6f870ed59b6e draft

Uploaded
author nml
date Mon, 06 Feb 2017 12:31:04 -0500
parents
children 599a4dc309aa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6f870ed59b6e Uploaded
nml
parents:
diff changeset
1 #!/usr/bin/env perl
6f870ed59b6e Uploaded
nml
parents:
diff changeset
2
6f870ed59b6e Uploaded
nml
parents:
diff changeset
3
6f870ed59b6e Uploaded
nml
parents:
diff changeset
4 use strict;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
5 use warnings;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
6 use Cwd;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
7 use File::Copy;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
8
6f870ed59b6e Uploaded
nml
parents:
diff changeset
9 #The first 4 arguments should be in this format:
6f870ed59b6e Uploaded
nml
parents:
diff changeset
10 # /path/to/srst2.py bam_output scores_output pileup_output ...
6f870ed59b6e Uploaded
nml
parents:
diff changeset
11
6f870ed59b6e Uploaded
nml
parents:
diff changeset
12 my $binary = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
13 shift;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
14
6f870ed59b6e Uploaded
nml
parents:
diff changeset
15 my ($bam_results, $scores, $pileup, $job_type, $txt_results, $genes_results, $fullgenes_results, $name, $databases);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
16 my ($allele_results,$allele_type);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
17
6f870ed59b6e Uploaded
nml
parents:
diff changeset
18
6f870ed59b6e Uploaded
nml
parents:
diff changeset
19 $bam_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
20 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
21 $scores = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
22 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
23 $pileup = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
24 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
25
6f870ed59b6e Uploaded
nml
parents:
diff changeset
26 #Now that we shifted the first 4 arguments, get the rest depending on the job type. There are three:
6f870ed59b6e Uploaded
nml
parents:
diff changeset
27 #I pass a letter to tell us which job type was selected.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
28 $job_type = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
29 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
30
6f870ed59b6e Uploaded
nml
parents:
diff changeset
31 #If m, mlst only: we only have one mlst output file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
32 if($job_type eq "m")
6f870ed59b6e Uploaded
nml
parents:
diff changeset
33 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
34 $txt_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
35 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
36 $allele_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
37 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
38 $allele_type = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
39 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
40 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
41 #If g, genedb only: we have two outputs: genes and fullgenes. We also get the name of the database
6f870ed59b6e Uploaded
nml
parents:
diff changeset
42 elsif($job_type eq "g")
6f870ed59b6e Uploaded
nml
parents:
diff changeset
43 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
44 $genes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
45 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
46 $fullgenes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
47 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
48 $databases = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
49 shift (@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
50 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
51 #If b, both mlst and genedb: We will have three output files and the database name.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
52 else
6f870ed59b6e Uploaded
nml
parents:
diff changeset
53 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
54 $txt_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
55 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
56 $genes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
57 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
58 $fullgenes_results = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
59 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
60 $databases = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
61 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
62 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
63
6f870ed59b6e Uploaded
nml
parents:
diff changeset
64 #After we get the output files/database name, now we get the name of the reads
6f870ed59b6e Uploaded
nml
parents:
diff changeset
65 #This allows SRST2 to give meaningful output instead of just printing 'dataset_xxx' as the sample name
6f870ed59b6e Uploaded
nml
parents:
diff changeset
66 my $filename = $ARGV[0];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
67 shift(@ARGV);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
68
6f870ed59b6e Uploaded
nml
parents:
diff changeset
69 #This index offset is used to determine where the 'genedb' option is located.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
70 #If only a single-end input, the genedb will be 3 positions into the arguments:
6f870ed59b6e Uploaded
nml
parents:
diff changeset
71 # -input_se sample.fastq --genedb database.fasta
6f870ed59b6e Uploaded
nml
parents:
diff changeset
72 my $index_offset = 3;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
73
6f870ed59b6e Uploaded
nml
parents:
diff changeset
74 print @ARGV;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
75 #change the file extensions of the input reads so srst can use them
6f870ed59b6e Uploaded
nml
parents:
diff changeset
76
6f870ed59b6e Uploaded
nml
parents:
diff changeset
77 #Usually the file name looks like this: sample_S1_L001_R1_001.fastq
6f870ed59b6e Uploaded
nml
parents:
diff changeset
78 #If we use this file name, it confuses srst2 a lot. So we just extract
6f870ed59b6e Uploaded
nml
parents:
diff changeset
79 #the sample name to use as input file name.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
80 my @file_name = split /_/, $filename;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
81 $name = "temp_file_name";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
82
6f870ed59b6e Uploaded
nml
parents:
diff changeset
83 my ($for_read, $rev_read, $sing_read, $database);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
84 if ($ARGV[0] eq "--input_pe")
6f870ed59b6e Uploaded
nml
parents:
diff changeset
85 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
86 #Increment index offset if we paired-end inputs
6f870ed59b6e Uploaded
nml
parents:
diff changeset
87 $index_offset++;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
88
6f870ed59b6e Uploaded
nml
parents:
diff changeset
89 $for_read = $name."_1.dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
90 $rev_read = $name."_2.dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
91
6f870ed59b6e Uploaded
nml
parents:
diff changeset
92 symlink($ARGV[1], $for_read);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
93 symlink($ARGV[2], $rev_read);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
94
6f870ed59b6e Uploaded
nml
parents:
diff changeset
95 $ARGV[1] = $for_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
96 $ARGV[2] = $rev_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
97 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
98 else
6f870ed59b6e Uploaded
nml
parents:
diff changeset
99 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
100 $sing_read = $name.".dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
101
6f870ed59b6e Uploaded
nml
parents:
diff changeset
102 symlink($ARGV[1], $sing_read);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
103
6f870ed59b6e Uploaded
nml
parents:
diff changeset
104 $ARGV[1] = $sing_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
105
6f870ed59b6e Uploaded
nml
parents:
diff changeset
106 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
107 #If we are running a job to include genedb, use the database name for input file name
6f870ed59b6e Uploaded
nml
parents:
diff changeset
108 if ($job_type eq 'g' | $job_type eq 'b')
6f870ed59b6e Uploaded
nml
parents:
diff changeset
109 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
110 my @db_names = split /,/, $databases;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
111 my $num_db = @db_names;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
112 my %names_hash = ();
6f870ed59b6e Uploaded
nml
parents:
diff changeset
113 # loop through dbs to replace spaces with _ and check for duplicates
6f870ed59b6e Uploaded
nml
parents:
diff changeset
114 for (my $i = 0; $i < $num_db; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
115 $db_names[$i]=~s/ /_/g;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
116 if( exists($names_hash{$db_names[$i]}) ) {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
117 print STDERR "More than one database with the same name";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
118 exit(1);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
119 }else{
6f870ed59b6e Uploaded
nml
parents:
diff changeset
120 $names_hash{$db_names[$i]}=$db_names[$i];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
121 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
122 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
123
6f870ed59b6e Uploaded
nml
parents:
diff changeset
124
6f870ed59b6e Uploaded
nml
parents:
diff changeset
125 foreach my $db_name (@db_names){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
126 (my $base = $db_name) =~ s/\.[^.]+$//;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
127 $database = $base.".dat";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
128
6f870ed59b6e Uploaded
nml
parents:
diff changeset
129 symlink($ARGV[$index_offset], $database);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
130 $ARGV[$index_offset] = $database;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
131
6f870ed59b6e Uploaded
nml
parents:
diff changeset
132 $index_offset++;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
133 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
134 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
135
6f870ed59b6e Uploaded
nml
parents:
diff changeset
136 for (my $i =0; $i< @ARGV; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
137 if (index($ARGV[$i], "maxins") != -1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
138 my ($maxins, $minins);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
139 my @b2args = split(' ', $ARGV[$i]);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
140 for (my $j = 0; $j < @b2args; $j++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
141 if (index($b2args[$j], "maxins") != -1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
142 $maxins = $b2args[$j+1];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
143 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
144 if (index($b2args[$j], "minins") != -1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
145 $minins = $b2args[$j+1];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
146 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
147 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
148 if ($maxins - $minins < 0){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
149 print STDERR "--minins cannot be greater than --maxins";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
150 exit(1);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
151 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
152 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
153 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
154
6f870ed59b6e Uploaded
nml
parents:
diff changeset
155
6f870ed59b6e Uploaded
nml
parents:
diff changeset
156
6f870ed59b6e Uploaded
nml
parents:
diff changeset
157 my $command = "python $binary @ARGV";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
158
6f870ed59b6e Uploaded
nml
parents:
diff changeset
159 my $exit_code = system($command);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
160
6f870ed59b6e Uploaded
nml
parents:
diff changeset
161 my $cur_dir = getcwd();
6f870ed59b6e Uploaded
nml
parents:
diff changeset
162 # make arrays for using multiple custom databases (creates multiple output files - need to be concatenated)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
163 my (@genefiles, @bamfiles, @pileupfiles, @fullgenefiles, @scoresfiles);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
164
6f870ed59b6e Uploaded
nml
parents:
diff changeset
165 # go through files in the output directory to move/concatenate them as required.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
166
6f870ed59b6e Uploaded
nml
parents:
diff changeset
167 foreach my $file (<$cur_dir/*>)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
168 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
169 print $file, "\n";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
170 #Will cause problems if any files have 'mlst' in them
6f870ed59b6e Uploaded
nml
parents:
diff changeset
171 if ($file =~ /mlst/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
172 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
173 move($file, $txt_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
174 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
175 elsif ($file =~ /\.bam$/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
176 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
177 push @bamfiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
178 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
179 elsif ($file =~ /\.scores$/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
180 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
181 push @scoresfiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
182 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
183 elsif ($file =~ /\.pileup$/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
184 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
185 push @pileupfiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
186 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
187 elsif ($file =~ /__fullgenes__/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
188 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
189 push @fullgenefiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
190 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
191 elsif ($file =~ /__genes__/)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
192 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
193 push @genefiles, $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
194 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
195 elsif ($file =~ /all_consensus_alleles.fasta$/ && $allele_type eq 'all') {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
196 move($file,$allele_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
197 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
198 elsif ($file =~ /new_consensus_alleles.fasta$/ && $allele_type eq 'new'){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
199 move($file,$allele_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
200 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
201 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
202
6f870ed59b6e Uploaded
nml
parents:
diff changeset
203
6f870ed59b6e Uploaded
nml
parents:
diff changeset
204 my ($cmd, $temp_head, $temp_full, $cat_header, $final_bam, @headers );
6f870ed59b6e Uploaded
nml
parents:
diff changeset
205
6f870ed59b6e Uploaded
nml
parents:
diff changeset
206 # create new concatenated bam file with all bam files.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
207 if (@bamfiles > 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
208 my $counter = 0;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
209 $cat_header = "cat_header";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
210 while ($counter < @bamfiles) {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
211 $headers[$counter] = "bam_header".$counter;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
212 # make a header file for each bam results file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
213 my $cmd = "samtools view -H $bamfiles[$counter] > $headers[$counter]";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
214 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
215 if ($counter >= 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
216 # only keep the @hd and @pg from first file because the final concatenated file can only have one of each (doesn't matter location)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
217 $temp_head="cut_head".$counter;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
218 # cut off first row and last row of each file (the @HD and @PG)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
219 $cmd = "tail -n +2 $headers[$counter] | head -n -1 > $temp_head";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
220 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
221 unlink $headers[$counter];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
222 # replace the old header with the new cut header in the array
6f870ed59b6e Uploaded
nml
parents:
diff changeset
223 $headers[$counter] = $temp_head;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
224 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
225 $counter++;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
226 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
227 # combine all header files
6f870ed59b6e Uploaded
nml
parents:
diff changeset
228 $cmd = "cat ".join(" ",@headers)." > $cat_header";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
229 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
230
6f870ed59b6e Uploaded
nml
parents:
diff changeset
231 $final_bam = "final_bam";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
232 # concatenate all the bam files *must include the concatenated header file created above
6f870ed59b6e Uploaded
nml
parents:
diff changeset
233 $cmd = "samtools cat -h $cat_header -o $final_bam ".join(" ",@bamfiles)." ";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
234 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
235
6f870ed59b6e Uploaded
nml
parents:
diff changeset
236 # sort the bam file so it can be indexed
6f870ed59b6e Uploaded
nml
parents:
diff changeset
237 $cmd = "samtools sort $final_bam 'last_bam'";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
238 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
239
6f870ed59b6e Uploaded
nml
parents:
diff changeset
240 # move bam file to where Galaxy expects it.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
241 $cmd = "mv 'last_bam.bam' $bam_results";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
242 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
243 } else {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
244 # only one bam file, don't need to concatenate
6f870ed59b6e Uploaded
nml
parents:
diff changeset
245 move($bamfiles[0], $bam_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
246 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
247
6f870ed59b6e Uploaded
nml
parents:
diff changeset
248 # concatenate all pileup files
6f870ed59b6e Uploaded
nml
parents:
diff changeset
249 if (@pileupfiles > 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
250 $cmd = "cat ".join(" ",@pileupfiles)." > $pileup";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
251 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
252 } else {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
253 move($pileupfiles[0], $pileup);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
254 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
255
6f870ed59b6e Uploaded
nml
parents:
diff changeset
256 # perform find-replace to restore original user-specified file names
6f870ed59b6e Uploaded
nml
parents:
diff changeset
257 foreach my $gene (@genefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
258 my $data = read_file($gene);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
259 $data =~ s/temp_file_name/$file_name[0]/g;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
260 write_file($gene, $data);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
261 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
262
6f870ed59b6e Uploaded
nml
parents:
diff changeset
263 foreach my $gene (@fullgenefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
264 my $data = read_file($gene);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
265 $data =~ s/temp_file_name/$file_name[0]/g;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
266 write_file($gene, $data);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
267 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
268
6f870ed59b6e Uploaded
nml
parents:
diff changeset
269 # concatenate gene files with a space separating each file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
270 if (@genefiles > 1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
271 my $join = join(" <(echo) ", @genefiles);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
272 my @args = ("bash", "-c", "cat $join > $genes_results");
6f870ed59b6e Uploaded
nml
parents:
diff changeset
273 system(@args);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
274 } else {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
275 # only one gene results file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
276 move($genefiles[0], $genes_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
277 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
278
6f870ed59b6e Uploaded
nml
parents:
diff changeset
279 # concatenate full gene results files but only keep header in first file.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
280 if (@fullgenefiles >1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
281 for (my $i= 1; $i < @fullgenefiles; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
282 # go through all files but the first one to remove headers
6f870ed59b6e Uploaded
nml
parents:
diff changeset
283 # create a temp file to save the file after header is removed
6f870ed59b6e Uploaded
nml
parents:
diff changeset
284 $temp_full = "temp_full".$i;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
285 $cmd = "tail -n +2 $fullgenefiles[$i] > $temp_full";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
286 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
287 unlink $fullgenefiles[$i];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
288 $fullgenefiles[$i] = $temp_full;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
289 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
290 $cmd = "cat ". join(" ",@fullgenefiles)." > $fullgenes_results";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
291 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
292 } else{
6f870ed59b6e Uploaded
nml
parents:
diff changeset
293 # only one full gene results file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
294 move($fullgenefiles[0], $fullgenes_results);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
295 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
296
6f870ed59b6e Uploaded
nml
parents:
diff changeset
297 # concatenate full gene results files but only keep header in first file.
6f870ed59b6e Uploaded
nml
parents:
diff changeset
298 if (@scoresfiles >1){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
299 for (my $i= 1; $i < @scoresfiles; $i++){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
300 # go through all files but the first one to remove headers
6f870ed59b6e Uploaded
nml
parents:
diff changeset
301 # create a temp file to save the file after header is removed
6f870ed59b6e Uploaded
nml
parents:
diff changeset
302 $temp_full = "temp_full".$i;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
303 $cmd = "tail -n +2 $scoresfiles[$i] > $temp_full";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
304 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
305 unlink $scoresfiles[$i];
6f870ed59b6e Uploaded
nml
parents:
diff changeset
306 $scoresfiles[$i] = $temp_full;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
307 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
308 $cmd = "cat ". join(" ",@scoresfiles)." > $scores";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
309 system($cmd);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
310 } else{
6f870ed59b6e Uploaded
nml
parents:
diff changeset
311 # only one scores file
6f870ed59b6e Uploaded
nml
parents:
diff changeset
312 move($scoresfiles[0], $scores);
6f870ed59b6e Uploaded
nml
parents:
diff changeset
313 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
314
6f870ed59b6e Uploaded
nml
parents:
diff changeset
315 # cleanup srst2 output and temp files
6f870ed59b6e Uploaded
nml
parents:
diff changeset
316 foreach my $file (@fullgenefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
317 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
318 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
319 foreach my $file (@genefiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
320 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
321 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
322 foreach my $file (@pileupfiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
323 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
324 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
325 foreach my $file (@bamfiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
326 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
327 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
328 foreach my $file (@headers){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
329 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
330 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
331 foreach my $file (@scoresfiles){
6f870ed59b6e Uploaded
nml
parents:
diff changeset
332 unlink $file;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
333 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
334 unlink $temp_head;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
335 unlink $temp_full;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
336 unlink $cat_header;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
337 unlink $final_bam;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
338
6f870ed59b6e Uploaded
nml
parents:
diff changeset
339 #get rid of symlinks
6f870ed59b6e Uploaded
nml
parents:
diff changeset
340 if ($for_read)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
341 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
342 unlink $for_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
343 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
344 if ($rev_read)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
345 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
346 unlink $rev_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
347 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
348 if ($sing_read)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
349 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
350 unlink $sing_read;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
351 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
352 if ($database)
6f870ed59b6e Uploaded
nml
parents:
diff changeset
353 {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
354 unlink $database;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
355 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
356 $exit_code = $exit_code >> 8;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
357 exit $exit_code;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
358
6f870ed59b6e Uploaded
nml
parents:
diff changeset
359 sub read_file {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
360 my ($filename) = @_;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
361
6f870ed59b6e Uploaded
nml
parents:
diff changeset
362 open my $in, '<:encoding(UTF-8)', $filename or die "Could not open '$filename' for reading $!";
6f870ed59b6e Uploaded
nml
parents:
diff changeset
363 local $/ = undef;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
364 my $all = <$in>;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
365 close $in;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
366
6f870ed59b6e Uploaded
nml
parents:
diff changeset
367 return $all;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
368 }
6f870ed59b6e Uploaded
nml
parents:
diff changeset
369
6f870ed59b6e Uploaded
nml
parents:
diff changeset
370 sub write_file {
6f870ed59b6e Uploaded
nml
parents:
diff changeset
371 my ($filename, $content) = @_;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
372
6f870ed59b6e Uploaded
nml
parents:
diff changeset
373 open my $out, '>:encoding(UTF-8)', $filename or die "Could not open '$filename' for writing $!";;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
374 print $out $content;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
375 close $out;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
376
6f870ed59b6e Uploaded
nml
parents:
diff changeset
377 return;
6f870ed59b6e Uploaded
nml
parents:
diff changeset
378 }