comparison archer.pl @ 3:3af9b7634b2d draft default tip

Uploaded
author plus
date Thu, 29 May 2014 02:32:55 -0400
parents
children
comparison
equal deleted inserted replaced
2:d5aed166429d 3:3af9b7634b2d
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5
6 my($i, $j, $k);
7 my $parameters = {};
8
9 sub usage {
10 print "\nUsage: $0 \n\n\t ";
11 print "REQUIRED \n\t ";
12 print "-config <config_file> \n\n";
13 exit(1);
14 }
15 if(scalar(@ARGV) == 0){
16 usage();
17 }
18
19 # Parse the Command Line
20 &parse_command_line($parameters, @ARGV);
21
22 # Log File
23 my $log_file = $parameters->{config_file} . ".log";
24 unless ( open(LOG_FILE, ">$log_file") ) {
25 print "Cannot open file \"$log_file\" to write to!!\n\n";
26 exit;
27 }
28 print LOG_FILE "config = $parameters->{config_file}\n";
29
30 # Time Stamp
31 my $timestamp = localtime(time);
32 print LOG_FILE $timestamp, "\n";
33
34 # Parse Config File
35 my @samples = ();
36 my $number_of_samples = &parse_config_file(\@samples, \$parameters);
37 print LOG_FILE "directory = $parameters->{directory}\n\n";
38
39 my $files = {};
40 # Create Target Regions File
41 $files->{target_regions_file} = $parameters->{directory} . "/target_regions.dat";
42 &create_target_regions_file(\$parameters, \$files, \*LOG_FILE);
43
44 my @tags = ();
45 my($reads, $tag);
46 my($fastq_file_1, $fastq_file_2);
47 my %proceed = ();
48 my $alignment_string = '';
49 my $number_of_alignment_files = 0;
50 my $cmd = '';
51
52 for($i = 0; $i < $number_of_samples; $i++){
53
54 # Define Tags
55 &define_tags(\@samples, \@tags, \$reads, \$tag, \$fastq_file_1, \$fastq_file_2, \$parameters, \*LOG_FILE);
56
57 # Check if FASTQ Files Exist and are Non-empty
58 $proceed{$i} = &decide_to_proceed(\$reads, \$fastq_file_1, \$fastq_file_2);
59
60 # Create String of FASTQ Files to be Aligned by bwa_enz
61 &create_alignment_string($proceed{$i}, \$reads, \$number_of_alignment_files, \$alignment_string, \$fastq_file_1, \$fastq_file_2);
62 }
63
64 # Align the reads with bwa_enz
65 # Would be better to align paired reads together??
66 $cmd = &align_reads(\$parameters, $number_of_alignment_files, $alignment_string);
67 print LOG_FILE $cmd;
68 system($cmd);
69
70 for($i = 0; $i < $number_of_samples; $i++){
71
72 # Define Tags
73 &define_tags(\@samples, \@tags, \$reads, \$tag, \$fastq_file_1, \$fastq_file_2, \$parameters, \*LOG_FILE);
74
75 # Define Summary File Names
76 &define_summary_file_names(\$tag, \$parameters, \$files);
77
78 if ( $proceed{$i} == 1 ){
79
80 # Convert SAM -> BAM -> BED
81 &define_alignments_file_names(\$reads, \@tags, \$tag, \$parameters, \$files);
82 $cmd = &rename_sam_files(\$reads, \$files);
83 $cmd .= &alignments(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files);
84 print LOG_FILE $cmd;
85 system($cmd);
86
87 # De-duplicate the SAM File(s)
88 &define_de_duplication_file_names(\$reads, \@tags, \$tag, \$parameters, \$files);
89 $cmd = &de_duplication(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files);
90 print LOG_FILE $cmd;
91 system($cmd);
92 # What if only reverse read exists? Don't want to de-duplicate?
93
94 # Select On-/Off-Target Reads
95 # Split marked files into two files
96 &define_on_and_off_target_file_names(\$reads, \@tags, \$tag, \$parameters, \$files);
97 $cmd = &select_on_and_off_target_reads(\$reads, \$parameters, \$files);
98 print LOG_FILE $cmd;
99 system($cmd);
100
101 # Coverage and Start Sites
102 &define_coverage_and_start_site_file_names(\$reads, \@tags, \$parameters, \$files);
103 $cmd = &generate_coverage_and_start_sites(\$reads, \$parameters, \$files);
104 print LOG_FILE $cmd;
105 system($cmd);
106
107 # Proceed through the rest of the pipeline using the on-target reads
108
109 # Create Master Files - one line per read
110 &define_master_files_file_names(\$reads, \@tags, \$parameters, \$files);
111 $cmd = &generate_master_files(\$reads, \$parameters, \$files);
112 print LOG_FILE $cmd;
113 system($cmd);
114
115 # Select Fusion Reads - do not count mapping to a 'novel', i.e., not in refseq, region as a fusion
116 &define_fusion_reads_file_names(\$reads, \@tags, \$tag, \$parameters, \$files);
117 $cmd = &select_fusion_reads(\$reads, \$parameters, \$files);
118 print LOG_FILE $cmd;
119 system($cmd);
120
121 # Count Fusions
122 &define_count_fusions_file_names(\$tag, \$parameters, \$files);
123 $cmd = &count_fusions(\$reads, \$parameters, \$files);
124 print LOG_FILE $cmd;
125 system($cmd);
126
127 # Flanking Sequences
128 &define_flanking_sequences_file_names(\$tag, \$parameters, \$files);
129 $cmd = &flanking_sequences(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files);
130 print LOG_FILE $cmd;
131 system($cmd);
132
133 # BAM Dedup Files
134 &define_bam_dedup_files_file_names(\$reads, \@tags, \$parameters, \$files);
135 $cmd = &bam_dedup_files(\$reads, \$files);
136 print LOG_FILE $cmd;
137 system($cmd);
138
139 # Consensus Sequences - Fusion and Splice
140 #&define_consensus_sequences_file_names(\$tag, \$parameters, \$files);
141 #$cmd = &consensus_sequences(\$reads, \$fastq_file_1, \$fastq_file_2, \$tag, \$parameters, \$files);
142 #print LOG_FILE $cmd;
143 #system($cmd);
144
145 # Sort SAM Files
146 &define_sort_sam_files_file_names(\$reads, \@tags, \$parameters, \$files);
147 $cmd = &sort_sam_files(\$reads, \$files);
148 print LOG_FILE $cmd;
149 system($cmd);
150
151 # On-target Stats
152 &define_on_target_stats_file_names(\$tag, \$parameters, \$files);
153 $cmd = &on_target_stats(\$reads, \$parameters, \$files);
154 print LOG_FILE $cmd;
155 system($cmd);
156
157 # Total Molecule Counts
158 &define_total_molecule_counts_file_names(\$reads, \@tags, \$tag, \$parameters, \$files);
159 $cmd = &total_molecule_counts(\$reads, \$parameters, \$files);
160 print LOG_FILE $cmd;
161 system($cmd);
162
163 # De-duplicated Molecule Counts
164 &define_de_deduplicated_molecule_counts_file_names(\$reads, \@tags, \$tag, \$parameters, \$files);
165 $cmd = &de_duplicated_molecule_counts(\$reads, \$parameters, \$files);
166 print LOG_FILE $cmd;
167 system($cmd);
168
169 # All Molecule Counts
170 &define_all_molecule_counts_file_names(\$tag, \$parameters, \$files);
171 $cmd = &all_molecule_counts(\$parameters, \$files);
172 print LOG_FILE $cmd;
173 system($cmd);
174
175 # QC Check
176 &define_qc_check_file_names(\$tag, \$parameters, \$files);
177 $cmd = &qc_check(\$parameters, \$files);
178 print LOG_FILE $cmd;
179 system($cmd);
180
181 # Coverage Uniformity
182 &define_coverage_uniformity_file_names(\$tag, \$parameters, \$files);
183 $cmd = &coverage_uniformity(\$reads, \$parameters, \$files);
184 print LOG_FILE $cmd;
185 system($cmd);
186
187 # Summary
188 $cmd = &summary(\$tag, \$parameters, \$files);
189 print LOG_FILE $cmd;
190 system($cmd);
191
192 # Clean Up
193 $cmd = &clean_up(\$reads, \$parameters, \$files);
194 print LOG_FILE $cmd;
195 system($cmd);
196 }
197 else{
198 # Summary for Unprocessed Sample
199 $cmd = &summary_for_unprocessed_sample(\$reads, \$fastq_file_1, \$fastq_file_2, \$parameters, \$files);
200 print LOG_FILE $cmd;
201 system($cmd);
202 }
203
204 # Time Stamp
205 $timestamp = localtime(time);
206 print LOG_FILE $timestamp, "\n";
207 }
208
209 # Join Multiple Samples
210 #$cmd = &join_multiple_samples(\$parameters);
211 #print LOG_FILE $cmd;
212 #system($cmd);
213
214 close(LOG_FILE);
215
216 exit;
217
218 sub parse_command_line {
219 my($parameters, @ARGV) = @_;
220 my $next_arg;
221 while(scalar @ARGV > 0){
222 $next_arg = shift(@ARGV);
223 if($next_arg eq "-config"){ $parameters->{config_file} = shift (@ARGV); }
224 }
225 }
226
227
228 sub parse_config_file {
229 my $samples_ref = shift;
230 my $parameters_ref = shift;
231 my @values = ();
232 my $count = 0;
233 open( FILE, "< $$parameters_ref->{config_file}" ) or die "Can't open $$parameters_ref->{config_file} : $!";
234 while( <FILE> ) {
235 chomp;
236 if ( length($_) > 1 and $_ !~ /^\#/ ){
237 @values = ();
238 @values = split(/=/, $_);
239 if($values[0] eq 'sample'){
240 @$samples_ref[$count] = $values[1];
241 $count++;
242 }
243 else{
244 $$parameters_ref->{$values[0]} = $values[1];
245 }
246 }
247 }
248 my $num_samples = scalar @$samples_ref;
249 print "number of samples = $num_samples\n";
250 return $num_samples;
251 }
252
253
254 sub create_target_regions_file {
255 my $parameters_ref = shift;
256 my $files_ref = shift;
257 my $file_handle_ref = shift;
258 my $cmd = '';
259
260 if ( ($$parameters_ref->{control_regions_file} ne 'NULL') && ($$parameters_ref->{target_regions_file} ne 'NULL') ) {
261 $cmd = "cat $$parameters_ref->{control_regions_file} $$parameters_ref->{target_regions_file} > $$files_ref->{target_regions_file}\n";
262 print $file_handle_ref $cmd;
263 system($cmd);
264 }
265 elsif ( $$parameters_ref->{target_regions_file} ne 'NULL' ) {
266 $$files_ref->{target_regions_file} = $$parameters_ref->{target_regions_file};
267 }
268 elsif ( $$parameters_ref->{control_regions_file} ne 'NULL' ) {
269 $$files_ref->{target_regions_file} = $$parameters_ref->{control_regions_file};
270 }
271 else { # Create Target Regions File
272 my $label;
273 my $target_file = $$parameters_ref->{directory} . "/target_file.dat";
274 my $control_file = $$parameters_ref->{directory} . "/control_file.dat";
275 my $target_temp_outputfile_1 = $$parameters_ref->{directory} . "/target_temp_1.dat";
276 my $target_temp_outputfile_2 = $$parameters_ref->{directory} . "/target_temp_2.dat";
277 my $control_temp_outputfile_1 = $$parameters_ref->{directory} . "/control_temp_1.dat";
278 my $control_temp_outputfile_2 = $$parameters_ref->{directory} . "/control_temp_2.dat";
279 my $path_to_annotation_script = $$parameters_ref->{path} . "/archer/annotation/";
280 # Target Primers Fasta File
281 if ( -e $$parameters_ref->{target_primers} ) {
282 if ( -s $$parameters_ref->{target_primers} ) {
283 $label = 'fusion';
284 $cmd = "$$parameters_ref->{path}/create_target_regions_file.pl -target $$parameters_ref->{target_primers} -label $label -refseq $$parameters_ref->{refseq_file} -gtf_file $$parameters_ref->{gtf_file} -path $path_to_annotation_script -t1 $target_temp_outputfile_1 -t2 $target_temp_outputfile_2 -o $target_file\n";
285 print $file_handle_ref $cmd;
286 system($cmd);
287 }
288 }
289 # Control Primers Fasta File
290 if ( -e $$parameters_ref->{control_primers} ) {
291 if ( -s $$parameters_ref->{control_primers} ) {
292 $label = 'housekeeping';
293 $cmd = "$$parameters_ref->{path}/create_target_regions_file.pl -target $$parameters_ref->{control_primers} -label $label -refseq $$parameters_ref->{refseq_file} -gtf_file $$parameters_ref->{gtf_file} -path $path_to_annotation_script -t1 $control_temp_outputfile_1 -t2 $control_temp_outputfile_2 -o $control_file\n";
294 print $file_handle_ref $cmd;
295 system($cmd);
296 }
297 }
298 if ( -e $control_file ) {
299 if ( -s $control_file ) {
300 $cmd = "cp $control_file $$files_ref->{target_regions_file}\n";
301 if ( -e $target_file ) {
302 if ( -s $target_file ) {
303 $cmd .= "cat $target_file >> $$files_ref->{target_regions_file}\n";
304 }
305 }
306 print $file_handle_ref $cmd;
307 system($cmd);
308 }
309 }
310 elsif ( -e $target_file ) {
311 if ( -s $target_file ) {
312 $cmd = "cp $target_file $$files_ref->{target_regions_file}\n";
313 print $file_handle_ref $cmd;
314 system($cmd);
315 }
316 }
317 $cmd = '';
318 if ( -e $target_file ) {
319 $cmd .= "rm $target_temp_outputfile_1\n";
320 $cmd .= "rm $target_temp_outputfile_2\n";
321 }
322 if ( -e $control_file ) {
323 $cmd .= "rm $control_temp_outputfile_1\n";
324 $cmd .= "rm $control_temp_outputfile_2\n";
325 }
326 print $file_handle_ref $cmd;
327 system($cmd);
328
329 $cmd = '';
330 if ( -e $target_file ) {
331 $cmd .= "rm $target_file\n";
332 }
333 if ( -e $control_file ) {
334 $cmd .= "rm $control_file\n";
335 }
336 print $file_handle_ref $cmd;
337 system($cmd);
338
339 print $file_handle_ref "\n";
340 }
341 }
342
343
344 sub define_tags {
345 my $samples_array_ref = shift;
346 my $tags_array_ref = shift;
347 my $reads_ref = shift;
348 my $tag_ref = shift;
349 my $fastq_file_1_ref = shift;
350 my $fastq_file_2_ref = shift;
351 my $parameters_ref = shift;
352 my $file_handle_ref = shift;
353
354 @$tags_array_ref = ();
355 @$tags_array_ref = split(/\s+/, @$samples_array_ref[$i]); # Split samples on whitespace
356
357 if( (scalar @$tags_array_ref) == 1 ){
358 $$reads_ref = 'single';
359 $$tag_ref = $tags[0];
360 $$fastq_file_1_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq";
361 }
362 else{
363 $$reads_ref = 'paired';
364 $$tag_ref = @$tags_array_ref[0] . "_" . @$tags_array_ref[1];
365 $$fastq_file_1_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq";
366 $$fastq_file_2_ref = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fastq";
367 }
368
369 print $$tag_ref, "\n";
370
371 print $file_handle_ref $$tag_ref, "\n";
372 }
373
374
375 sub decide_to_proceed {
376 my $reads_ref = shift;
377 my $fastq_file_1_ref = shift;
378 my $fastq_file_2_ref = shift;
379 my $proceed = 0;
380
381 if ( $$reads_ref eq 'single' ) {
382 if ( -e $$fastq_file_1_ref ) {
383 if ( -s $$fastq_file_1_ref ) {
384 $proceed = 1;
385 }
386 }
387 else{
388 $$fastq_file_1_ref .= ".gz"; # See if fastq_file_1 exists in gzipped form
389 if ( -e $$fastq_file_1_ref ) {
390 if ( -s $$fastq_file_1_ref ) {
391 $proceed = 1;
392 }
393 }
394 }
395 }
396 if ( $$reads_ref eq 'paired' ) {
397 if ( -e $$fastq_file_1_ref ) {
398 if ( -s $$fastq_file_1_ref ) {
399 if ( -e $$fastq_file_2_ref ) {
400 if ( -s $$fastq_file_2_ref ) {
401 $proceed = 1;
402 }
403 }
404 else{
405 $$fastq_file_2_ref .= ".gz"; # See if fastq_file_2 exists in gzipped form
406 if ( -e $$fastq_file_2_ref ) {
407 if ( -s $$fastq_file_2_ref ) {
408 $proceed = 1;
409 }
410 }
411 }
412 }
413 }
414 else{
415 $$fastq_file_1_ref .= ".gz"; # See if fastq_file_1 exists in gzipped form
416 if ( -e $$fastq_file_1_ref ) {
417 if ( -s $$fastq_file_1_ref ) {
418 if ( -e $$fastq_file_2_ref ) {
419 if ( -s $$fastq_file_2_ref ) {
420 $proceed = 1;
421 }
422 }
423 else{
424 $$fastq_file_2_ref .= ".gz"; # See if fastq_file_2 exists in gzipped form
425 if ( -e $$fastq_file_2_ref ) {
426 if ( -s $$fastq_file_2_ref ) {
427 $proceed = 1;
428 }
429 }
430 }
431 }
432 }
433 }
434 }
435 return $proceed;
436 }
437
438
439 sub create_alignment_string {
440 my $proceed_value = shift;
441 my $reads_ref = shift;
442 my $number_of_alignment_files_ref = shift;
443 my $alignment_string_ref = shift;
444 my $fastq_file_1_ref = shift;
445 my $fastq_file_2_ref = shift;
446
447 if ( $proceed_value == 1 ) {
448 if ( $$number_of_alignment_files_ref == 0 ){
449 $$alignment_string_ref = $$fastq_file_1_ref;
450 }
451 else {
452 $$alignment_string_ref .= " " . $$fastq_file_1_ref;
453 }
454 $$number_of_alignment_files_ref++;
455 if ( $$reads_ref eq 'paired' ) {
456 $$alignment_string_ref .= " " . $$fastq_file_2_ref;
457 $$number_of_alignment_files_ref++;
458 }
459 }
460 }
461
462
463 sub align_reads {
464 my $parameters_ref = shift;
465 my $number_of_alignment_files_value = shift;
466 my $alignment_string_value = shift;
467 my $cmd_line = "echo Align Reads\n";
468 if ( $number_of_alignment_files_value > 0 ) {
469 $cmd_line .= "bwa_enz mem -Q 0 -m -D $$parameters_ref->{directory} $$parameters_ref->{reference_file} $alignment_string_value\n";
470 }
471 return $cmd_line;
472 }
473
474
475 sub rename_sam_files {
476 my $reads_ref = shift;
477 my $files_ref = shift;
478 my $cmd_line = "echo Rename SAM Files\n";
479 $cmd_line .= "mv $$files_ref->{sam_file_1_orig} $$files_ref->{sam_file_1_full}\n";
480 if ( $$reads_ref eq 'paired' ) {
481 $cmd_line .= "mv $$files_ref->{sam_file_2_orig} $$files_ref->{sam_file_2_full}\n";
482 }
483 return $cmd_line;
484 }
485
486
487 sub alignments {
488 my $reads_ref = shift;
489 my $fastq_file_1_ref = shift;
490 my $fastq_file_2_ref = shift;
491 my $parameters_ref = shift;
492 my $files_ref = shift;
493 my $cmd_line = "echo Alignments\n";
494
495 # -S input is SAM
496 # -b output is BAM
497 # -h print header for the SAM output
498
499 # $cmd_line .= "bwa mem $$parameters_ref->{reference_file} $$fastq_file_1_ref > $$files_ref->{sam_file_1_full}\n";
500 $cmd_line .= "samtools view -Shq 40 $$files_ref->{sam_file_1_full} > $$files_ref->{sam_file_1}\n";
501 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_1} > $$files_ref->{bam_file_1}\n";
502 $cmd_line .= "bamToBed -i $$files_ref->{bam_file_1} > $$files_ref->{bed_file_1_orig}\n";
503 if ( $$reads_ref eq 'single' ) {
504 $cmd_line .= "mv $$files_ref->{bed_file_1_orig} $$files_ref->{bed_file_combined}\n";
505 }
506 elsif ( $$reads_ref eq 'paired' ) {
507 # $cmd_line .= "bwa mem $$parameters_ref->{reference_file} $$fastq_file_2_ref > $$files_ref->{sam_file_2_full}\n";
508 $cmd_line .= "samtools view -Shq 40 $$files_ref->{sam_file_2_full} > $$files_ref->{sam_file_2}\n";
509 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_2} > $$files_ref->{bam_file_2}\n";
510 $cmd_line .= "bamToBed -i $$files_ref->{bam_file_2} > $$files_ref->{bed_file_2_orig}\n";
511 $cmd_line .= "cat $$files_ref->{bed_file_1_orig} $$files_ref->{bed_file_2_orig} > $$files_ref->{bed_file_combined}\n";
512 $cmd_line .= "rm $$files_ref->{bed_file_1_orig}\n";
513 $cmd_line .= "rm $$files_ref->{bed_file_2_orig}\n";
514 }
515 return $cmd_line;
516 }
517
518
519 sub de_duplication {
520 my $reads_ref = shift;
521 my $fastq_file_1_ref = shift;
522 my $fastq_file_2_ref = shift;
523 my $parameters_ref = shift;
524 my $files_ref = shift;
525 my $cmd_line = "echo De-duplication\n";
526
527 if ( $$reads_ref eq 'single' ) {
528 $cmd_line .= "sort -k4,4 $$files_ref->{bed_file_combined}|$$parameters_ref->{path}/dedup_pipeline.sh -p $$parameters_ref->{path} -b /dev/stdin -f $$fastq_file_1_ref > $$files_ref->{dedup_file}\n";
529 }
530 else{
531 $cmd_line .= "sort -k4,4 $$files_ref->{bed_file_combined}|$$parameters_ref->{path}/dedup_pipeline.sh -p $$parameters_ref->{path} -b /dev/stdin -f $$fastq_file_1_ref -2 $$fastq_file_2_ref > $$files_ref->{dedup_file}\n";
532 }
533 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_1} -o $$files_ref->{sam_dedup_file_1}\n";
534 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_1_full} -o $$files_ref->{sam_dedup_file_1_full}\n";
535 if ( $$reads_ref eq 'paired' ) {
536 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_2} -o $$files_ref->{sam_dedup_file_2}\n";
537 $cmd_line .= "$$parameters_ref->{path}/de_dup_2_hash.pl -dedup $$files_ref->{dedup_file} -sam $$files_ref->{sam_file_2_full} -o $$files_ref->{sam_dedup_file_2_full}\n";
538 }
539 return $cmd_line;
540 }
541
542
543 sub select_on_and_off_target_reads {
544 my $reads_ref = shift;
545 my $parameters_ref = shift;
546 my $files_ref = shift;
547 my $cmd_line = "echo Select On- and Off-target Reads\n";
548
549 # Create BED file of target regions
550 $cmd_line .= "$$parameters_ref->{path}/convert_target_regions_to_bed.pl -t $$files_ref->{target_regions_file} -o $$files_ref->{target_regions_bed_file}\n";
551
552 # All Reads
553 # Create a master file from the SAM file
554 # Join the master file
555 # Select the appropriate segment for each read id and create a BED file - need to have only one entry for each read in the BED file
556 # Convert BED files of reads to single points - do this so that only start of R2 and end of R1 are counted in overlapping with the target regions so that only the target region that overlaps with these end points will be counted as being hit - assumes that there are no overlapping regions in the target regions file
557 # Get intersection of reads with target regions
558 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_file_1} -o $$files_ref->{full_master_prejoin_file_1}\n";
559 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_prejoin_file_1} -o $$files_ref->{full_master_file_1}\n";
560 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_file_1}\n";
561 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_points_file_1}\n";
562 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_file_1} -wa -wb > $$files_ref->{intersect_file_1}\n";
563 if ( $$reads_ref eq 'paired' ) {
564 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_file_2} -o $$files_ref->{full_master_prejoin_file_2}\n";
565 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_prejoin_file_2} -o $$files_ref->{full_master_file_2}\n";
566 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_file_2}\n";
567 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_points_file_2}\n";
568 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_file_2} -wa -wb > $$files_ref->{intersect_file_2}\n";
569 $cmd_line .= "cat $$files_ref->{intersect_file_1} $$files_ref->{intersect_file_2} > $$files_ref->{intersect_file_combined}\n";
570 }
571 # Create SAM files of the on- and off-target reads. On-target files contain all reads in which at least one of R1/R2 is on-target. Off-target files contain the remaining reads.
572 # On-/Off-target Alone Read 1
573 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_1} -i $$files_ref->{intersect_file_1} -on $$files_ref->{sam_on_target_alone_file_1}\n";
574 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_1} -on $$files_ref->{sam_on_target_alone_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_1_marked}\n";
575 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_1_marked} > $$files_ref->{sam_off_target_alone_file_1}\n";
576 if ( $$reads_ref eq 'paired' ) {
577 # On-/Off-target Alone Read 2
578 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_2} -i $$files_ref->{intersect_file_2} -on $$files_ref->{sam_on_target_alone_file_2}\n";
579 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_2} -on $$files_ref->{sam_on_target_alone_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_2_marked}\n";
580 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_2_marked} > $$files_ref->{sam_off_target_alone_file_2}\n";
581 # On-/Off-target Either Read 1
582 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_1} -i $$files_ref->{intersect_file_combined} -on $$files_ref->{sam_on_target_file_1}\n";
583 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_1} -on $$files_ref->{sam_on_target_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_1_marked}\n";
584 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_1_marked} > $$files_ref->{sam_off_target_file_1}\n";
585 # On-/Off-target Either Read 2
586 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_file_2} -i $$files_ref->{intersect_file_combined} -on $$files_ref->{sam_on_target_file_2}\n";
587 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_file_2} -on $$files_ref->{sam_on_target_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_file_2_marked}\n";
588 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_file_2_marked} > $$files_ref->{sam_off_target_file_2}\n";
589 }
590
591 # De-duplicated Reads
592 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_file_1} -o $$files_ref->{full_master_dedup_prejoin_file_1}\n";
593 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_dedup_prejoin_file_1} -o $$files_ref->{full_master_dedup_file_1}\n";
594 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_dedup_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_dedup_file_1}\n";
595 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_dedup_file_1} -read $$reads_ref -tag 1 -o $$files_ref->{bed_points_dedup_file_1}\n";
596 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_dedup_file_1} -wa -wb > $$files_ref->{intersect_dedup_file_1}\n";
597 if ( $$reads_ref eq 'paired' ) {
598 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_file_2} -o $$files_ref->{full_master_dedup_prejoin_file_2}\n";
599 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{full_master_dedup_prejoin_file_2} -o $$files_ref->{full_master_dedup_file_2}\n";
600 $cmd_line .= "$$parameters_ref->{path}/convert_master_file_to_bed.pl -master $$files_ref->{full_master_dedup_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_dedup_file_2}\n";
601 $cmd_line .= "$$parameters_ref->{path}/convert_bed_to_single_points.pl -b $$files_ref->{bed_dedup_file_2} -read $$reads_ref -tag 2 -o $$files_ref->{bed_points_dedup_file_2}\n";
602 $cmd_line .= "intersectBed -a $$files_ref->{target_regions_bed_file} -b $$files_ref->{bed_points_dedup_file_2} -wa -wb > $$files_ref->{intersect_dedup_file_2}\n";
603 $cmd_line .= "cat $$files_ref->{intersect_dedup_file_1} $$files_ref->{intersect_dedup_file_2} > $$files_ref->{intersect_dedup_file_combined}\n";
604 }
605 # Create SAM files of the on- and off-target reads. On-target files contain all reads in which at least one of R1/R2 is on-target. Off-target files contain the remaining reads.
606 # On-/Off-target Alone Read 1
607 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_1} -i $$files_ref->{intersect_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_alone_file_1}\n";
608 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_alone_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_1_marked}\n";
609 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_1_marked} > $$files_ref->{sam_dedup_off_target_alone_file_1}\n";
610 if ( $$reads_ref eq 'paired' ) {
611 # On-/Off-target Alone Read 2
612 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_2} -i $$files_ref->{intersect_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_alone_file_2}\n";
613 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_alone_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_2_marked}\n";
614 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_2_marked} > $$files_ref->{sam_dedup_off_target_alone_file_2}\n";
615 # On-/Off-target Either Read 1
616 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_1} -i $$files_ref->{intersect_dedup_file_combined} -on $$files_ref->{sam_dedup_on_target_file_1}\n";
617 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_1} -on $$files_ref->{sam_dedup_on_target_file_1} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_1_marked}\n";
618 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_1_marked} > $$files_ref->{sam_dedup_off_target_file_1}\n";
619 # On-/Off-target Either Read 2
620 $cmd_line .= "$$parameters_ref->{path}/generate_on_target_sam_files_hash.pl -sam $$files_ref->{sam_dedup_file_2} -i $$files_ref->{intersect_dedup_file_combined} -on $$files_ref->{sam_dedup_on_target_file_2}\n";
621 $cmd_line .= "$$parameters_ref->{path}/mark_on_target_reads.pl -sam $$files_ref->{sam_dedup_file_2} -on $$files_ref->{sam_dedup_on_target_file_2} -temp_dir $$parameters_ref->{directory} -o $$files_ref->{sam_dedup_file_2_marked}\n";
622 $cmd_line .= "grep -v 'ON_TARGET' $$files_ref->{sam_dedup_file_2_marked} > $$files_ref->{sam_dedup_off_target_file_2}\n";
623 }
624 return $cmd_line;
625 }
626
627
628 sub generate_coverage_and_start_sites {
629 my $reads_ref = shift;
630 my $parameters_ref = shift;
631 my $files_ref = shift;
632 my $cmd_line = "echo Generate Coverage and Start Sites\n";
633
634 $cmd_line .= "bash $$parameters_ref->{path}/generateHistAndStartSiteInfo.sh $$files_ref->{sam_dedup_file_1} $$parameters_ref->{reference_file} $$parameters_ref->{reference_file_index} $$files_ref->{start_site_dedup_file_1} $$files_ref->{coverage_dedup_file_1} $$parameters_ref->{path}\n";
635
636 if ( $$reads_ref eq 'paired' ) {
637 $cmd_line .= "bash $$parameters_ref->{path}/generateHistAndStartSiteInfo.sh $$files_ref->{sam_dedup_file_2} $$parameters_ref->{reference_file} $$parameters_ref->{reference_file_index} $$files_ref->{start_site_dedup_file_2} $$files_ref->{coverage_dedup_file_2} $$parameters_ref->{path}\n";
638 }
639
640 return $cmd_line;
641 }
642
643
644 sub generate_master_files {
645 my $reads_ref = shift;
646 my $parameters_ref = shift;
647 my $files_ref = shift;
648 my $cmd_line = "echo Generate Master Files\n";
649
650 # Create a master file of all reads with one line per read
651 if ( $$reads_ref eq 'single' ) {
652 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_alone_file_1} -o $$files_ref->{master_dedup_no_annotation_file_1}\n";
653 }
654 else{
655 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_file_1} -o $$files_ref->{master_dedup_no_annotation_file_1}\n";
656 }
657
658 $cmd_line .= "python $$parameters_ref->{path}/archer/annotation/annotate.py --gtf_file $$parameters_ref->{gtf_file} --coordinate_file $$files_ref->{master_dedup_no_annotation_file_1} --outfile $$files_ref->{master_dedup_prejoin_file_1} --chromosome_indices 1,1 --coordinate_indices 5,6\n";
659
660 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{master_dedup_prejoin_file_1} -o $$files_ref->{master_dedup_file_1}\n";
661
662 if ( $$reads_ref eq 'paired' ) {
663 $cmd_line .= "$$parameters_ref->{path}/generate_master_file_without_annotation.pl -sam $$files_ref->{sam_dedup_on_target_file_2} -o $$files_ref->{master_dedup_no_annotation_file_2}\n";
664
665 $cmd_line .= "python $$parameters_ref->{path}/archer/annotation/annotate.py --gtf_file $$parameters_ref->{gtf_file} --coordinate_file $$files_ref->{master_dedup_no_annotation_file_2} --outfile $$files_ref->{master_dedup_prejoin_file_2} --chromosome_indices 1,1 --coordinate_indices 5,6\n";
666
667 $cmd_line .= "$$parameters_ref->{path}/join_master_file.pl -master $$files_ref->{master_dedup_prejoin_file_2} -o $$files_ref->{master_dedup_file_2}\n";
668 }
669
670 return $cmd_line;
671 }
672
673 sub select_fusion_reads {
674 my $reads_ref = shift;
675 my $parameters_ref = shift;
676 my $files_ref = shift;
677 my $cmd_line = "echo Select Fusion Reads\n";
678
679 $cmd_line .= "$$parameters_ref->{path}/select_fusion_reads.pl -master $$files_ref->{master_dedup_file_1} -tag 1 -o1 $$files_ref->{one_segment_reads_file_1} -os $$files_ref->{splice_reads_file_1} -of $$files_ref->{fusion_reads_file_1} -omf $$files_ref->{multi_fusion_reads_file_1}\n";
680
681 if ( $$reads_ref eq 'single' ) {
682 $cmd_line .= "cp $$files_ref->{fusion_reads_file_1} $$files_ref->{fusion_reads_file}\n";
683 $cmd_line .= "cp $$files_ref->{splice_reads_file_1} $$files_ref->{splice_reads_file}\n";
684 }
685
686 if ( $$reads_ref eq 'paired' ) {
687 $cmd_line .= "$$parameters_ref->{path}/select_fusion_reads.pl -master $$files_ref->{master_dedup_file_2} -tag 2 -o1 $$files_ref->{one_segment_reads_file_2} -os $$files_ref->{splice_reads_file_2} -of $$files_ref->{fusion_reads_file_2} -omf $$files_ref->{multi_fusion_reads_file_2}\n";
688 $cmd_line .= "cat $$files_ref->{fusion_reads_file_1} $$files_ref->{fusion_reads_file_2} > $$files_ref->{fusion_reads_file}\n";
689 $cmd_line .= "cat $$files_ref->{splice_reads_file_1} $$files_ref->{splice_reads_file_2} > $$files_ref->{splice_reads_file}\n";
690 }
691
692 return $cmd_line;
693 }
694
695 sub count_fusions {
696 my $reads_ref = shift;
697 my $parameters_ref = shift;
698 my $files_ref = shift;
699 my $cmd_line = "echo Count Fusions and Splice Events\n";
700
701 # Count the number of each type of fusion pair, i.e., Gene A Exon X with Gene B Exon Y, get the median value of the coordinate of each breakpoint, sort and output each type
702 $cmd_line .= "$$parameters_ref->{path}/count_fusions.pl -t $$files_ref->{target_regions_file} -fr $$files_ref->{fusion_reads_file} -min 30 -gtf $$parameters_ref->{gtf_file} -read $$reads_ref -limit 10 -min_occ 5 -ob $$files_ref->{fusion_counts_bare_file} -o $$files_ref->{fusion_counts_file}\n";
703
704 # Count the number of each type of splice pair - Use default values for -limit and -min_occ so that all splices will be reported
705 $cmd_line .= "$$parameters_ref->{path}/count_fusions.pl -t $$files_ref->{target_regions_file} -fr $$files_ref->{splice_reads_file} -min 30 -gtf $$parameters_ref->{gtf_file} -read $$reads_ref -ob $$files_ref->{splice_counts_bare_file} -o $$files_ref->{splice_counts_file}\n";
706
707 # Add splice evidence to fusion counts
708 $cmd_line .= "$$parameters_ref->{path}/add_splice_to_fusion_counts.pl -fcb $$files_ref->{fusion_counts_bare_file} -scb $$files_ref->{splice_counts_bare_file} -o $$files_ref->{fusion_counts_with_splice_bare_file} -om $$files_ref->{fusion_counts_with_splice_bare_file_machine}\n";
709
710 return $cmd_line;
711 }
712
713
714 sub flanking_sequences {
715 my $reads_ref = shift;
716 my $fastq_file_1_ref = shift;
717 my $fastq_file_2_ref = shift;
718 my $parameters_ref = shift;
719 my $files_ref = shift;
720 my $cmd_line = "echo Flanking Sequences\n";
721
722 if ( $$reads_ref eq 'single' ) {
723 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{fusion_counts_bare_file} -fr $$files_ref->{fusion_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -o $$files_ref->{flanking_sequences_file}\n";
724 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{splice_counts_bare_file} -fr $$files_ref->{splice_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -o $$files_ref->{flanking_splice_sequences_file}\n";
725 }
726 else{
727 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{fusion_counts_bare_file} -fr $$files_ref->{fusion_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -fastq_2 $$fastq_file_2_ref -o $$files_ref->{flanking_sequences_file}\n";
728 $cmd_line .= "$$parameters_ref->{path}/flanking_sequences.pl -fcb $$files_ref->{splice_counts_bare_file} -fr $$files_ref->{splice_reads_file} -read $$reads_ref -fastq_1 $$fastq_file_1_ref -fastq_2 $$fastq_file_2_ref -o $$files_ref->{flanking_splice_sequences_file}\n";
729 }
730 return $cmd_line;
731 }
732
733
734 sub bam_dedup_files {
735 my $reads_ref = shift;
736 my $files_ref = shift;
737 my $cmd_line = "echo BAM Dedup Files\n";
738 # Make sorted de-dup BAM files
739 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_1} > $$files_ref->{bam_dedup_file_1}\n";
740 $cmd_line .= "samtools sort $$files_ref->{bam_dedup_file_1} $$files_ref->{bam_dedup_sorted_file_1_name}\n";
741 $cmd_line .= "samtools index $$files_ref->{bam_dedup_sorted_file_1}\n";
742 if ( $$reads_ref eq 'paired' ) {
743 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_2} > $$files_ref->{bam_dedup_file_2}\n";
744 $cmd_line .= "samtools sort $$files_ref->{bam_dedup_file_2} $$files_ref->{bam_dedup_sorted_file_2_name}\n";
745 $cmd_line .= "samtools index $$files_ref->{bam_dedup_sorted_file_2}\n";
746 }
747 return $cmd_line;
748 }
749
750
751 sub consensus_sequences {
752 my $reads_ref = shift;
753 my $fastq_file_1_ref = shift;
754 my $fastq_file_2_ref = shift;
755 my $tag_ref = shift;
756 my $parameters_ref = shift;
757 my $files_ref = shift;
758 my $cmd_line = "echo Consensus Sequences\n";
759
760 # Fusion and Splice Consensus Sequences
761 if ( $$reads_ref eq 'single' ) {
762 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -a $$fastq_file_1_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_fusion_std_out_file} -e $$files_ref->{consensus_fusion_std_err_file}\n";
763 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -a $$fastq_file_1_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_splice_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_splice_std_out_file} -e $$files_ref->{consensus_splice_std_err_file}\n";
764 }
765 else{
766 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -2 $$files_ref->{bam_dedup_sorted_file_2} -a $$fastq_file_1_ref -b $$fastq_file_2_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_fusion_std_out_file} -e $$files_ref->{consensus_fusion_std_err_file}\n";
767 $cmd_line .= "bash $$parameters_ref->{path}/consensus_pipeline/batch_pipeline.sh -1 $$files_ref->{bam_dedup_sorted_file_1} -2 $$files_ref->{bam_dedup_sorted_file_2} -a $$fastq_file_1_ref -b $$fastq_file_2_ref -f $$parameters_ref->{reference_file} -r $$files_ref->{flanking_splice_sequences_file} -d $$parameters_ref->{path}/consensus_pipeline/ -s $$files_ref->{consensus_splice_std_out_file} -e $$files_ref->{consensus_splice_std_err_file}\n";
768 }
769
770 # Pair Fusion Candidates with Splice Sequences
771 $cmd_line .= "$$parameters_ref->{path}/pair_fusion_and_splice_sequences.pl -fc $$files_ref->{fusion_counts_bare_file} -sc $$files_ref->{splice_counts_bare_file} -tag $$tag_ref -o $$files_ref->{fusion_and_splice_consensus_file}\n";
772
773 return $cmd_line;
774 }
775
776
777 sub sort_sam_files {
778 my $reads_ref = shift;
779 my $files_ref = shift;
780 my $cmd_line = "echo Sort SAM Files\n";
781
782 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_alone_file_1} > $$files_ref->{sam_on_target_alone_file_1_linux_sorted}\n";
783 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_alone_file_1} > $$files_ref->{sam_off_target_alone_file_1_linux_sorted}\n";
784 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_alone_file_1} > $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted}\n";
785 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_alone_file_1} > $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted}\n";
786
787 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_1_full} > $$files_ref->{bam_file_1_full}\n";
788 $cmd_line .= "samtools sort -n $$files_ref->{bam_file_1_full} $$files_ref->{bam_file_1_full_prefix}\n";
789 $cmd_line .= "samtools view -h $$files_ref->{bam_file_1_full_sorted} > $$files_ref->{sam_file_1_full_sorted}\n";
790 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_1_full} > $$files_ref->{bam_dedup_file_1_full}\n";
791 $cmd_line .= "samtools sort -n $$files_ref->{bam_dedup_file_1_full} $$files_ref->{bam_dedup_file_1_full_prefix}\n";
792 $cmd_line .= "samtools view -h $$files_ref->{bam_dedup_file_1_full_sorted} > $$files_ref->{sam_dedup_file_1_full_sorted}\n";
793
794 if ( $$reads_ref eq 'paired' ) { # Need to make this possible to be reverse only too
795
796 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_file_1} > $$files_ref->{sam_on_target_file_1_linux_sorted}\n";
797 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_file_1} > $$files_ref->{sam_off_target_file_1_linux_sorted}\n";
798 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_file_1} > $$files_ref->{sam_dedup_on_target_file_1_linux_sorted}\n";
799 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_file_1} > $$files_ref->{sam_dedup_off_target_file_1_linux_sorted}\n";
800 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_alone_file_2} > $$files_ref->{sam_on_target_alone_file_2_linux_sorted}\n";
801 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_alone_file_2} > $$files_ref->{sam_off_target_alone_file_2_linux_sorted}\n";
802 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_alone_file_2} > $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted}\n";
803 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_alone_file_2} > $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted}\n";
804 $cmd_line .= "sort -k1,1 $$files_ref->{sam_on_target_file_2} > $$files_ref->{sam_on_target_file_2_linux_sorted}\n";
805 $cmd_line .= "sort -k1,1 $$files_ref->{sam_off_target_file_2} > $$files_ref->{sam_off_target_file_2_linux_sorted}\n";
806 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_on_target_file_2} > $$files_ref->{sam_dedup_on_target_file_2_linux_sorted}\n";
807 $cmd_line .= "sort -k1,1 $$files_ref->{sam_dedup_off_target_file_2} > $$files_ref->{sam_dedup_off_target_file_2_linux_sorted}\n";
808
809 $cmd_line .= "samtools view -bS $$files_ref->{sam_file_2_full} > $$files_ref->{bam_file_2_full}\n";
810 $cmd_line .= "samtools sort -n $$files_ref->{bam_file_2_full} $$files_ref->{bam_file_2_full_prefix}\n";
811 $cmd_line .= "samtools view -h $$files_ref->{bam_file_2_full_sorted} > $$files_ref->{sam_file_2_full_sorted}\n";
812 $cmd_line .= "samtools view -bS $$files_ref->{sam_dedup_file_2_full} > $$files_ref->{bam_dedup_file_2_full}\n";
813 $cmd_line .= "samtools sort -n $$files_ref->{bam_dedup_file_2_full} $$files_ref->{bam_dedup_file_2_full_prefix}\n";
814 $cmd_line .= "samtools view -h $$files_ref->{bam_dedup_file_2_full_sorted} > $$files_ref->{sam_dedup_file_2_full_sorted}\n";
815 }
816 return $cmd_line;
817 }
818
819
820 sub on_target_stats {
821 my $reads_ref = shift;
822 my $parameters_ref = shift;
823 my $files_ref = shift;
824 my $cmd_line = "echo On-target Stats\n";
825
826 if ( -e $$files_ref->{target_regions_file} ) {
827 if ( -s $$files_ref->{target_regions_file} ) {
828
829 if ( $$reads_ref eq 'single' ) { # Need to make this possible to be reverse only too
830
831 # Counts of on- and off-target reads
832 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} -off_alone_1 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} -o $$files_ref->{on_target_file}\n";
833 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} -off_alone_1 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} -o $$files_ref->{on_target_dedup_file}\n";
834
835 # On-target Stats and Housekeeping Stats
836 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_file_1} -o $$files_ref->{reads_per_exon_file} -oh $$files_ref->{housekeeping_file}\n";
837 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_dedup_file_1} -o $$files_ref->{reads_per_exon_dedup_file} -oh $$files_ref->{housekeeping_dedup_file} -om $$files_ref->{reads_per_exon_dedup_file_machine}\n";
838 }
839 else{
840 # Counts of on- and off-target reads
841 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} -on_alone_2 $$files_ref->{sam_on_target_alone_file_2_linux_sorted} -off_alone_1 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} -off_alone_2 $$files_ref->{sam_off_target_alone_file_2_linux_sorted} -on_1 $$files_ref->{sam_on_target_file_1_linux_sorted} -on_2 $$files_ref->{sam_on_target_file_2_linux_sorted} -off_1 $$files_ref->{sam_off_target_file_1_linux_sorted} -off_2 $$files_ref->{sam_off_target_file_2_linux_sorted} -o $$files_ref->{on_target_file}\n";
842 $cmd_line .= "$$parameters_ref->{path}/on_target_counts.pl -on_alone_1 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} -on_alone_2 $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted} -off_alone_1 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} -off_alone_2 $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted} -on_1 $$files_ref->{sam_dedup_on_target_file_1_linux_sorted} -on_2 $$files_ref->{sam_dedup_on_target_file_2_linux_sorted} -off_1 $$files_ref->{sam_dedup_off_target_file_1_linux_sorted} -off_2 $$files_ref->{sam_dedup_off_target_file_2_linux_sorted} -o $$files_ref->{on_target_dedup_file}\n";
843
844 # On-target Stats and Housekeeping Stats
845 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_file_1} -i2 $$files_ref->{intersect_file_2} -o $$files_ref->{reads_per_exon_file} -oh $$files_ref->{housekeeping_file}\n";
846
847 $cmd_line .= "$$parameters_ref->{path}/on_target_stats.pl -t $$files_ref->{target_regions_file} -i1 $$files_ref->{intersect_dedup_file_1} -i2 $$files_ref->{intersect_dedup_file_2} -o $$files_ref->{reads_per_exon_dedup_file} -oh $$files_ref->{housekeeping_dedup_file} -om $$files_ref->{reads_per_exon_dedup_file_machine}\n";
848 }
849 }
850 }
851 return $cmd_line;
852 }
853
854 sub total_molecule_counts {
855 my $reads_ref = shift;
856 my $parameters_ref = shift;
857 my $files_ref = shift;
858 my $cmd_line = "echo Total Molecule Counts\n";
859 if ( $$reads_ref eq 'single' ) {
860 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_file_1_full_sorted} -o $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n";
861 }
862 else {
863 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_file_1_full_sorted} -r2 $$files_ref->{sam_file_2_full_sorted} -o $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n";
864 }
865 return $cmd_line;
866 }
867
868
869 sub de_duplicated_molecule_counts {
870 my $reads_ref = shift;
871 my $parameters_ref = shift;
872 my $files_ref = shift;
873 my $cmd_line = "echo De-duplicated Molecule Counts\n";
874 if ( $$reads_ref eq 'single' ) {
875 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_dedup_file_1_full_sorted} -o $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n";
876 }
877 else {
878 $cmd_line .= "python $$parameters_ref->{path}/count_reads_and_alignments_v2.py -r1 $$files_ref->{sam_dedup_file_1_full_sorted} -r2 $$files_ref->{sam_dedup_file_2_full_sorted} -o $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n";
879 }
880 return $cmd_line;
881 }
882
883
884 sub all_molecule_counts {
885 my $parameters_ref = shift;
886 my $files_ref = shift;
887 my $cmd_line = "echo All Molecule Counts\n";
888 $cmd_line .= "$$parameters_ref->{path}/counts_2.pl -tamc $$files_ref->{total_and_aligned_molecule_count_per_tag_file} -uamc $$files_ref->{unique_and_aligned_molecule_count_per_tag_file} -otd $$files_ref->{on_target_dedup_file} -ot $$files_ref->{on_target_file} -o $$files_ref->{counts_file} -om $$files_ref->{counts_file_machine}\n";
889 return $cmd_line;
890 }
891
892 sub qc_check {
893 my $parameters_ref = shift;
894 my $files_ref = shift;
895 my $cmd_line = "echo QC Check\n";
896 $cmd_line .= "$$parameters_ref->{path}/qc_check.pl -hd $$files_ref->{housekeeping_dedup_file} -o $$files_ref->{qc_filter_file} -om $$files_ref->{qc_filter_file_machine}\n";
897 return $cmd_line;
898 }
899
900 sub coverage_uniformity {
901 my $reads_ref = shift;
902 my $parameters_ref = shift;
903 my $files_ref = shift;
904 my $cmd_line = "echo Coverage Uniformity\n";
905 $cmd_line .= "$$parameters_ref->{path}/coverage_uniformity.pl -hd $$files_ref->{housekeeping_dedup_file} -r $$reads_ref -o $$files_ref->{coverage_uniformity_file} -om $$files_ref->{coverage_uniformity_file_machine}\n";
906 return $cmd_line;
907 }
908
909
910 sub summary {
911 my $tag_ref = shift;
912 my $parameters_ref = shift;
913 my $files_ref = shift;
914 my $cmd_line = "echo Summary\n";
915
916 # my $sample_name = "'" . @$samples_array_ref[$sample_element_number] . "'";
917
918 $cmd_line .= "$$parameters_ref->{path}/summary.pl -s $$tag_ref -o $$files_ref->{summary_file} -om $$files_ref->{summary_file_machine}\n";
919 $cmd_line .= "cat $$files_ref->{qc_filter_file} >> $$files_ref->{summary_file}\n";
920 # $cmd_line .= "cat $$files_ref->{coverage_uniformity_file} >> $$files_ref->{summary_file}\n";
921 $cmd_line .= "cat $$files_ref->{counts_file} >> $$files_ref->{summary_file}\n";
922 if ( -e $$files_ref->{reads_per_exon_dedup_file} ) {
923 if ( -s $$files_ref->{reads_per_exon_dedup_file} ) {
924 $cmd_line .= "cat $$files_ref->{reads_per_exon_dedup_file} >> $$files_ref->{summary_file}\n";
925 }
926 }
927 $cmd_line .= "cat $$files_ref->{fusion_counts_with_splice_bare_file} >> $$files_ref->{summary_file}\n";
928 # $cmd_line .= "enscript -f Courier8 -p $$files_ref->{summary_file_ps} $$files_ref->{summary_file}\n";
929 # $cmd_line .= "ps2pdf $$files_ref->{summary_file_ps} $$files_ref->{summary_file_pdf}\n";
930
931 # Machine Readable Summary File
932 $cmd_line .= "cat $$files_ref->{qc_filter_file_machine} >> $$files_ref->{summary_file_machine}\n";
933 # $cmd_line .= "cat $$files_ref->{coverage_uniformity_file_machine} >> $$files_ref->{summary_file_machine}\n";
934 $cmd_line .= "cat $$files_ref->{counts_file_machine} >> $$files_ref->{summary_file_machine}\n";
935 if ( -e $$files_ref->{reads_per_exon_dedup_file_machine} ) {
936 if ( -s $$files_ref->{reads_per_exon_dedup_file_machine} ) {
937 $cmd_line .= "cat $$files_ref->{reads_per_exon_dedup_file_machine} >> $$files_ref->{summary_file_machine}\n";
938 }
939 }
940 $cmd_line .= "cat $$files_ref->{fusion_counts_with_splice_bare_file_machine} >> $$files_ref->{summary_file_machine}\n";
941
942 return $cmd_line;
943 }
944
945
946 sub clean_up {
947 my $reads_ref = shift;
948 my $parameters_ref = shift;
949 my $files_ref = shift;
950 my $file;
951 my $cmd_line = "echo Clean Up\n";
952
953 # Clean up Flanking Sequences Files
954 $cmd_line .= "$$parameters_ref->{path}/clean_up_flanking_sequences.pl -f $$files_ref->{flanking_sequences_file}\n";
955
956 # Clean up Flanking Splice Sequences Files
957 $cmd_line .= "$$parameters_ref->{path}/clean_up_flanking_sequences.pl -f $$files_ref->{flanking_splice_sequences_file}\n";
958
959 # Clean up
960 $cmd_line .= "rm $$files_ref->{sam_file_1_full}\n";
961 $cmd_line .= "rm $$files_ref->{bam_file_1}\n";
962 $cmd_line .= "rm $$files_ref->{bed_file_1}\n";
963 $cmd_line .= "rm $$files_ref->{bed_file_combined}\n";
964 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1}\n";
965 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_full}\n";
966 $cmd_line .= "rm $$files_ref->{bam_dedup_file_1}\n";
967 $cmd_line .= "rm $$files_ref->{full_master_prejoin_file_1}\n";
968 $cmd_line .= "rm $$files_ref->{full_master_file_1}\n";
969 $cmd_line .= "rm $$files_ref->{full_master_dedup_prejoin_file_1}\n";
970 $cmd_line .= "rm $$files_ref->{full_master_dedup_file_1}\n";
971 $cmd_line .= "rm $$files_ref->{bed_points_file_1}\n";
972 $cmd_line .= "rm $$files_ref->{bed_points_dedup_file_1}\n";
973 $cmd_line .= "rm $$files_ref->{intersect_file_1}\n";
974 $cmd_line .= "rm $$files_ref->{intersect_dedup_file_1}\n";
975 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_1}\n";
976 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_1}\n";
977 $cmd_line .= "rm $$files_ref->{master_dedup_no_annotation_file_1}\n";
978 $cmd_line .= "rm $$files_ref->{master_dedup_prejoin_file_1}\n";
979 $cmd_line .= "rm $$files_ref->{master_dedup_file_1}\n";
980 $cmd_line .= "rm $$files_ref->{fusion_reads_file}\n";
981 $cmd_line .= "rm $$files_ref->{fusion_counts_file}\n";
982 $cmd_line .= "rm $$files_ref->{fusion_counts_bare_file}\n";
983 # $file = $$files_ref->{flanking_sequences_file} . ".tmp~";
984 # $cmd_line .= "rm $file\n";
985 # $file = $$files_ref->{flanking_splice_sequences_file} . ".tmp~";
986 # $cmd_line .= "rm $file\n";
987 $cmd_line .= "rm $$files_ref->{splice_counts_file}\n";
988 $cmd_line .= "rm $$files_ref->{splice_counts_bare_file}\n";
989 $cmd_line .= "rm $$files_ref->{fusion_counts_with_splice_bare_file}\n";
990 $cmd_line .= "rm $$files_ref->{fusion_counts_with_splice_bare_file_machine}\n";
991 $cmd_line .= "rm $$files_ref->{splice_reads_file}\n";
992 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_1_linux_sorted}\n";
993 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_1_linux_sorted}\n";
994 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted}\n";
995 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted}\n";
996 $cmd_line .= "rm $$files_ref->{housekeeping_dedup_file}\n";
997 $cmd_line .= "rm $$files_ref->{on_target_dedup_file}\n";
998 $cmd_line .= "rm $$files_ref->{reads_per_exon_dedup_file}\n";
999 $cmd_line .= "rm $$files_ref->{reads_per_exon_dedup_file_machine}\n";
1000 $cmd_line .= "rm $$files_ref->{housekeeping_file}\n";
1001 $cmd_line .= "rm $$files_ref->{on_target_file}\n";
1002 $cmd_line .= "rm $$files_ref->{reads_per_exon_file}\n";
1003 $cmd_line .= "rm $$files_ref->{sam_file_1_marked}\n";
1004 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_marked}\n";
1005 $cmd_line .= "rm $$files_ref->{bam_file_1_full}\n";
1006 $cmd_line .= "rm $$files_ref->{bam_file_1_full_sorted}\n";
1007 $cmd_line .= "rm $$files_ref->{sam_file_1_full_sorted}\n";
1008 # $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_file_1}\n";
1009 $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_per_tag_file}\n";
1010 # $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_file_1}\n";
1011 $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_per_tag_file}\n";
1012 $cmd_line .= "rm $$files_ref->{bam_dedup_file_1_full}\n";
1013 $cmd_line .= "rm $$files_ref->{bam_dedup_file_1_full_sorted}\n";
1014 $cmd_line .= "rm $$files_ref->{sam_dedup_file_1_full_sorted}\n";
1015 $cmd_line .= "rm $$files_ref->{qc_filter_file}\n";
1016 $cmd_line .= "rm $$files_ref->{qc_filter_file_machine}\n";
1017 $cmd_line .= "rm $$files_ref->{coverage_uniformity_file}\n";
1018 $cmd_line .= "rm $$files_ref->{coverage_uniformity_file_machine}\n";
1019 $cmd_line .= "rm $$files_ref->{counts_file}\n";
1020 $cmd_line .= "rm $$files_ref->{counts_file_machine}\n";
1021 # $cmd_line .= "rm $$files_ref->{summary_file_ps}\n";
1022 if ( $$reads_ref eq 'paired' ) {
1023 $cmd_line .= "rm $$files_ref->{sam_file_2_full}\n";
1024 $cmd_line .= "rm $$files_ref->{bam_file_2}\n";
1025 $cmd_line .= "rm $$files_ref->{bed_file_2}\n";
1026 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2}\n";
1027 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_full}\n";
1028 $cmd_line .= "rm $$files_ref->{bam_dedup_file_2}\n";
1029 $cmd_line .= "rm $$files_ref->{full_master_prejoin_file_2}\n";
1030 $cmd_line .= "rm $$files_ref->{full_master_file_2}\n";
1031 $cmd_line .= "rm $$files_ref->{full_master_dedup_prejoin_file_2}\n";
1032 $cmd_line .= "rm $$files_ref->{full_master_dedup_file_2}\n";
1033 $cmd_line .= "rm $$files_ref->{bed_points_file_2}\n";
1034 $cmd_line .= "rm $$files_ref->{bed_points_dedup_file_2}\n";
1035 $cmd_line .= "rm $$files_ref->{intersect_file_2}\n";
1036 $cmd_line .= "rm $$files_ref->{intersect_dedup_file_2}\n";
1037 $cmd_line .= "rm $$files_ref->{intersect_file_combined}\n";
1038 $cmd_line .= "rm $$files_ref->{intersect_dedup_file_combined}\n";
1039 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_1}\n";
1040 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_1}\n";
1041 $cmd_line .= "rm $$files_ref->{sam_on_target_file_1}\n";
1042 $cmd_line .= "rm $$files_ref->{sam_off_target_file_1}\n";
1043 $cmd_line .= "rm $$files_ref->{sam_on_target_file_2}\n";
1044 $cmd_line .= "rm $$files_ref->{sam_off_target_file_2}\n";
1045 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_2}\n";
1046 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_2}\n";
1047 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_2}\n";
1048 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_2}\n";
1049 $cmd_line .= "rm $$files_ref->{master_dedup_no_annotation_file_2}\n";
1050 $cmd_line .= "rm $$files_ref->{master_dedup_prejoin_file_2}\n";
1051 $cmd_line .= "rm $$files_ref->{master_dedup_file_2}\n";
1052 $cmd_line .= "rm $$files_ref->{sam_on_target_file_1_linux_sorted}\n";
1053 $cmd_line .= "rm $$files_ref->{sam_off_target_file_1_linux_sorted}\n";
1054 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_file_1_linux_sorted}\n";
1055 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_file_1_linux_sorted}\n";
1056 $cmd_line .= "rm $$files_ref->{sam_on_target_alone_file_2_linux_sorted}\n";
1057 $cmd_line .= "rm $$files_ref->{sam_off_target_alone_file_2_linux_sorted}\n";
1058 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted}\n";
1059 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted}\n";
1060 $cmd_line .= "rm $$files_ref->{sam_on_target_file_2_linux_sorted}\n";
1061 $cmd_line .= "rm $$files_ref->{sam_off_target_file_2_linux_sorted}\n";
1062 $cmd_line .= "rm $$files_ref->{sam_dedup_on_target_file_2_linux_sorted}\n";
1063 $cmd_line .= "rm $$files_ref->{sam_dedup_off_target_file_2_linux_sorted}\n";
1064 $cmd_line .= "rm $$files_ref->{sam_file_2_marked}\n";
1065 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_marked}\n";
1066 $cmd_line .= "rm $$files_ref->{bam_file_2_full}\n";
1067 $cmd_line .= "rm $$files_ref->{bam_file_2_full_sorted}\n";
1068 $cmd_line .= "rm $$files_ref->{sam_file_2_full_sorted}\n";
1069 # $cmd_line .= "rm $$files_ref->{total_and_aligned_molecule_count_file_2}\n";
1070 # $cmd_line .= "rm $$files_ref->{unique_and_aligned_molecule_count_file_2}\n";
1071 $cmd_line .= "rm $$files_ref->{bam_dedup_file_2_full}\n";
1072 $cmd_line .= "rm $$files_ref->{bam_dedup_file_2_full_sorted}\n";
1073 $cmd_line .= "rm $$files_ref->{sam_dedup_file_2_full_sorted}\n";
1074 }
1075
1076 return $cmd_line;
1077 }
1078
1079
1080 sub summary_for_unprocessed_sample {
1081 my $reads_ref = shift;
1082 my $fastq_file_1_ref = shift;
1083 my $fastq_file_2_ref = shift;
1084 my $parameters_ref = shift;
1085 my $files_ref = shift;
1086 my $cmd_line = "echo Summary for Unprocessed Sample\n";
1087 $cmd_line .= "$$parameters_ref->{path}/summary_for_unprocessed_samples.pl -r $$reads_ref -f1 $$fastq_file_1_ref -f2 $$fastq_file_2_ref -o $$files_ref->{summary_file} -om $$files_ref->{summary_file_machine}\n";
1088 # $cmd_line .= "enscript -f Courier8 -p $$files_ref->{summary_file_ps} $$files_ref->{summary_file}\n";
1089 # $cmd_line .= "ps2pdf $$files_ref->{summary_file_ps} $$files_ref->{summary_file_pdf}\n";
1090 return $cmd_line;
1091 }
1092
1093
1094 sub join_multiple_samples {
1095 my $parameters_ref = shift;
1096 my $cmd_line = "echo Join Multiple Samples\n";
1097 $cmd_line = "$$parameters_ref->{path}/join_multisample_output.pl -config $$parameters_ref->{config_file} -o $$parameters_ref->{outputfile}\n";
1098 return $cmd_line;
1099 }
1100
1101 #sub define_alignment_file_names {
1102 # my $reads_ref = shift;
1103 # my $tags_array_ref = shift;
1104 # my $parameters_ref = shift;
1105 # my $files_ref = shift;
1106 # $$files_ref->{sam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam";
1107 # $$files_ref->{sam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full";
1108 # if ( $$reads_ref eq 'paired' ) {
1109 # $$files_ref->{sam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam";
1110 # $$files_ref->{sam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full";
1111 # }
1112 #}
1113
1114 sub define_alignments_file_names {
1115 my $reads_ref = shift;
1116 my $tags_array_ref = shift;
1117 my $tag_ref = shift;
1118 my $parameters_ref = shift;
1119 my $files_ref = shift;
1120 $$files_ref->{sam_file_1_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fastq.sam";
1121 $$files_ref->{sam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full";
1122 $$files_ref->{sam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam";
1123 $$files_ref->{bam_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam";
1124 $$files_ref->{bed_file_1_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bed.orig";
1125 if ( $$reads_ref eq 'paired' ) {
1126 $$files_ref->{sam_file_2_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fastq.sam";
1127 $$files_ref->{sam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full";
1128 $$files_ref->{sam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam";
1129 $$files_ref->{bam_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam";
1130 $$files_ref->{bed_file_2_orig} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bed.orig";
1131 }
1132 $$files_ref->{bed_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".combined.bed";
1133 }
1134
1135 sub define_de_duplication_file_names {
1136 my $reads_ref = shift;
1137 my $tags_array_ref = shift;
1138 my $tag_ref = shift;
1139 my $parameters_ref = shift;
1140 my $files_ref = shift;
1141 $$files_ref->{dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup_read_ids.dat";
1142 $$files_ref->{sam_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.dedup";
1143 $$files_ref->{sam_dedup_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.sam.full";
1144 if ( $$reads_ref eq 'paired' ) {
1145 $$files_ref->{sam_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.dedup";
1146 $$files_ref->{sam_dedup_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.sam.full";
1147 }
1148 }
1149
1150 sub define_on_and_off_target_file_names {
1151 my $reads_ref = shift;
1152 my $tags_array_ref = shift;
1153 my $tag_ref = shift;
1154 my $parameters_ref = shift;
1155 my $files_ref = shift;
1156 # File 1
1157 $$files_ref->{target_regions_bed_file} = $$parameters_ref->{directory} . "/target_regions.bed";
1158 $$files_ref->{full_master_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.prejoin.master.dat";
1159 $$files_ref->{full_master_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.master.dat";
1160 $$files_ref->{bed_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bed";
1161 $$files_ref->{bed_points_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".points.bed";
1162 $$files_ref->{intersect_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".intersect.dat";
1163 $$files_ref->{sam_on_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.alone.sam";
1164 $$files_ref->{sam_file_1_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.marked";
1165 $$files_ref->{sam_off_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.alone.sam";
1166 $$files_ref->{sam_on_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.sam";
1167 $$files_ref->{sam_off_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.sam";
1168 # File 1 Dedup
1169 $$files_ref->{full_master_dedup_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.dedup.prejoin.master.dat";
1170 $$files_ref->{full_master_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".full.dedup.master.dat";
1171 $$files_ref->{bed_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bed";
1172 $$files_ref->{bed_points_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.points.bed";
1173 $$files_ref->{intersect_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.intersect.dat";
1174 $$files_ref->{sam_dedup_on_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.alone.sam";
1175 $$files_ref->{sam_dedup_file_1_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.dedup.marked";
1176 $$files_ref->{sam_dedup_off_target_alone_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.alone.sam";
1177 $$files_ref->{sam_dedup_on_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.sam";
1178 $$files_ref->{sam_dedup_off_target_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.sam";
1179 # File 2
1180 if ( $$reads_ref eq 'paired' ) {
1181 $$files_ref->{full_master_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.prejoin.master.dat";
1182 $$files_ref->{full_master_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.master.dat";
1183 $$files_ref->{bed_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bed";
1184 $$files_ref->{bed_points_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".points.bed";
1185 $$files_ref->{intersect_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".intersect.dat";
1186 $$files_ref->{sam_on_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.alone.sam";
1187 $$files_ref->{sam_file_2_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.marked";
1188 $$files_ref->{sam_off_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.alone.sam";
1189 $$files_ref->{sam_on_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.sam";
1190 $$files_ref->{sam_off_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.sam";
1191 }
1192 # File 2 Dedup
1193 if ( $$reads_ref eq 'paired' ) {
1194 $$files_ref->{full_master_dedup_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.dedup.prejoin.master.dat";
1195 $$files_ref->{full_master_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".full.dedup.master.dat";
1196 $$files_ref->{bed_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bed";
1197 $$files_ref->{bed_points_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.points.bed";
1198 $$files_ref->{intersect_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.intersect.dat";
1199 $$files_ref->{sam_dedup_on_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.alone.sam";
1200 $$files_ref->{sam_dedup_file_2_marked} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.dedup.marked";
1201 $$files_ref->{sam_dedup_off_target_alone_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.alone.sam";
1202 $$files_ref->{sam_dedup_on_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.sam";
1203 $$files_ref->{sam_dedup_off_target_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.sam";
1204 }
1205 $$files_ref->{intersect_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".intersect_combined.dat";
1206 $$files_ref->{intersect_dedup_file_combined} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.intersect_combined.dat";
1207 }
1208
1209 sub define_coverage_and_start_site_file_names {
1210 my $reads_ref = shift;
1211 my $tags_array_ref = shift;
1212 my $parameters_ref = shift;
1213 my $files_ref = shift;
1214 $$files_ref->{start_site_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.start_site.bedgraph";
1215 $$files_ref->{coverage_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.coverage.bedgraph";
1216 if ( $$reads_ref eq 'paired' ) {
1217 $$files_ref->{start_site_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.start_site.bedgraph";
1218 $$files_ref->{coverage_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.coverage.bedgraph";
1219 }
1220 }
1221
1222 sub define_master_files_file_names {
1223 my $reads_ref = shift;
1224 my $tags_array_ref = shift;
1225 my $parameters_ref = shift;
1226 my $files_ref = shift;
1227 $$files_ref->{master_dedup_no_annotation_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.no_annotation.master.dat";
1228 $$files_ref->{master_dedup_prejoin_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.prejoin.master.dat";
1229 $$files_ref->{master_dedup_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.master.dat";
1230 if ( $$reads_ref eq 'paired' ) {
1231 $$files_ref->{master_dedup_no_annotation_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.no_annotation.master.dat";
1232 $$files_ref->{master_dedup_prejoin_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.prejoin.master.dat";
1233 $$files_ref->{master_dedup_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.master.dat";
1234 }
1235 }
1236
1237 sub define_fusion_reads_file_names {
1238 my $reads_ref = shift;
1239 my $tags_array_ref = shift;
1240 my $tag_ref = shift;
1241 my $parameters_ref = shift;
1242 my $files_ref = shift;
1243 $$files_ref->{one_segment_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".one_segment_reads.dat";
1244 $$files_ref->{splice_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".splice_reads.dat";
1245 $$files_ref->{fusion_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".fusion_reads.dat";
1246 $$files_ref->{multi_fusion_reads_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".multi_fusion_reads.dat";
1247 if ( $$reads_ref eq 'paired' ) {
1248 $$files_ref->{one_segment_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".one_segment_reads.dat";
1249 $$files_ref->{splice_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".splice_reads.dat";
1250 $$files_ref->{fusion_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".fusion_reads.dat";
1251 $$files_ref->{multi_fusion_reads_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".multi_fusion_reads.dat";
1252 }
1253 $$files_ref->{fusion_reads_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_reads.combined.dat";
1254 $$files_ref->{splice_reads_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_reads.combined.dat";
1255 }
1256
1257 sub define_count_fusions_file_names {
1258 my $tag_ref = shift;
1259 my $parameters_ref = shift;
1260 my $files_ref = shift;
1261 $$files_ref->{fusion_counts_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_bare.dat";
1262 $$files_ref->{fusion_counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts.dat";
1263 $$files_ref->{splice_counts_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_counts_bare.dat";
1264 $$files_ref->{splice_counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".splice_counts.dat";
1265 $$files_ref->{fusion_counts_with_splice_bare_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_with_splice_bare.dat";
1266 $$files_ref->{fusion_counts_with_splice_bare_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_counts_with_splice_bare.machine.dat";
1267 }
1268
1269 sub define_flanking_sequences_file_names {
1270 my $tag_ref = shift;
1271 my $parameters_ref = shift;
1272 my $files_ref = shift;
1273 $$files_ref->{flanking_sequences_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".flanking_sequences.dat";
1274 $$files_ref->{flanking_splice_sequences_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".flanking_splice_sequences.dat";
1275 }
1276
1277 sub define_consensus_sequences_file_names {
1278 my $tag_ref = shift;
1279 my $parameters_ref = shift;
1280 my $files_ref = shift;
1281 $$files_ref->{consensus_fusion_std_out_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_fusion_std_out.dat";
1282 $$files_ref->{consensus_fusion_std_err_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_fusion_std_err.dat";
1283 $$files_ref->{consensus_splice_std_out_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_splice_std_out.dat";
1284 $$files_ref->{consensus_splice_std_err_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".consensus_splice_std_err.dat";
1285 $$files_ref->{fusion_and_splice_consensus_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".fusion_and_splice_consensus_sequences.fasta";
1286 }
1287
1288 sub define_bam_dedup_files_file_names {
1289 my $reads_ref = shift;
1290 my $tags_array_ref = shift;
1291 my $parameters_ref = shift;
1292 my $files_ref = shift;
1293 $$files_ref->{bam_dedup_file_1} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.bam";
1294 $$files_ref->{bam_dedup_sorted_file_1_name} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.sorted";
1295 $$files_ref->{bam_dedup_sorted_file_1} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[0] . ".dedup.sorted.bam";
1296 if ( $$reads_ref eq 'paired' ) {
1297 $$files_ref->{bam_dedup_file_2} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.bam";
1298 $$files_ref->{bam_dedup_sorted_file_2_name} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.sorted";
1299 $$files_ref->{bam_dedup_sorted_file_2} = $$parameters_ref->{directory} . "/" . $$tags_array_ref[1] . ".dedup.sorted.bam";
1300 }
1301 }
1302
1303 sub define_sort_sam_files_file_names {
1304 my $reads_ref = shift;
1305 my $tags_array_ref = shift;
1306 my $parameters_ref = shift;
1307 my $files_ref = shift;
1308 $$files_ref->{sam_on_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.alone.linux_sorted.sam";
1309 $$files_ref->{sam_off_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.alone.linux_sorted.sam";
1310 $$files_ref->{sam_dedup_on_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.alone.linux_sorted.sam";
1311 $$files_ref->{sam_dedup_off_target_alone_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.alone.linux_sorted.sam";
1312 $$files_ref->{sam_on_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".on_target.linux_sorted.sam";
1313 $$files_ref->{sam_off_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".off_target.linux_sorted.sam";
1314 $$files_ref->{sam_dedup_on_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.on_target.linux_sorted.sam";
1315 $$files_ref->{sam_dedup_off_target_file_1_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.off_target.linux_sorted.sam";
1316 if ( $$reads_ref eq 'paired' ) {
1317 $$files_ref->{sam_on_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.alone.linux_sorted.sam";
1318 $$files_ref->{sam_off_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.alone.linux_sorted.sam";
1319 $$files_ref->{sam_dedup_on_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.alone.linux_sorted.sam";
1320 $$files_ref->{sam_dedup_off_target_alone_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.alone.linux_sorted.sam";
1321 $$files_ref->{sam_on_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".on_target.linux_sorted.sam";
1322 $$files_ref->{sam_off_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".off_target.linux_sorted.sam";
1323 $$files_ref->{sam_dedup_on_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.on_target.linux_sorted.sam";
1324 $$files_ref->{sam_dedup_off_target_file_2_linux_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.off_target.linux_sorted.sam";
1325 }
1326
1327 $$files_ref->{bam_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam.full";
1328 $$files_ref->{bam_file_1_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".bam.full.prefix";
1329 $$files_ref->{bam_file_1_full_sorted} = $$files_ref->{bam_file_1_full_prefix} . ".bam";
1330 $$files_ref->{sam_file_1_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".sam.full.sorted";
1331 if ( $$reads_ref eq 'paired' ) {
1332 $$files_ref->{bam_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam.full";
1333 $$files_ref->{bam_file_2_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".bam.full.prefix";
1334 $$files_ref->{bam_file_2_full_sorted} = $$files_ref->{bam_file_2_full_prefix} . ".bam";
1335 $$files_ref->{sam_file_2_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".sam.full.sorted";
1336 }
1337
1338 $$files_ref->{bam_dedup_file_1_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bam.full";
1339 $$files_ref->{bam_dedup_file_1_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.bam.full.prefix";
1340 $$files_ref->{bam_dedup_file_1_full_sorted} = $$files_ref->{bam_dedup_file_1_full_prefix} . ".bam";
1341 $$files_ref->{sam_dedup_file_1_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".dedup.sam.full.sorted";
1342 if ( $$reads_ref eq 'paired' ) {
1343 $$files_ref->{bam_dedup_file_2_full} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bam.full";
1344 $$files_ref->{bam_dedup_file_2_full_prefix} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.bam.full.prefix";
1345 $$files_ref->{bam_dedup_file_2_full_sorted} = $$files_ref->{bam_dedup_file_2_full_prefix} . ".bam";
1346 $$files_ref->{sam_dedup_file_2_full_sorted} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".dedup.sam.full.sorted";
1347 }
1348 }
1349
1350 sub define_on_target_stats_file_names {
1351 my $tag_ref = shift;
1352 my $parameters_ref = shift;
1353 my $files_ref = shift;
1354 $$files_ref->{on_target_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".on_target.dat";
1355 $$files_ref->{on_target_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.on_target.dat";
1356 $$files_ref->{reads_per_exon_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".reads_per_exon.dat";
1357 $$files_ref->{housekeeping_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".housekeeping.dat";
1358 $$files_ref->{reads_per_exon_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.reads_per_exon.dat";
1359 $$files_ref->{housekeeping_dedup_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".housekeeping.dedup.dat";
1360 $$files_ref->{reads_per_exon_dedup_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".dedup.reads_per_exon.machine.dat";
1361 }
1362
1363 sub define_total_molecule_counts_file_names {
1364 my $reads_ref = shift;
1365 my $tags_array_ref = shift;
1366 my $tag_ref = shift;
1367 my $parameters_ref = shift;
1368 my $files_ref = shift;
1369 $$files_ref->{total_and_aligned_molecule_count_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".total_and_aligned_molecule_count.dat";
1370 if ( $$reads_ref eq 'paired' ) {
1371 $$files_ref->{total_and_aligned_molecule_count_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".total_and_aligned_molecule_count.dat";
1372 }
1373 $$files_ref->{total_and_aligned_molecule_count_per_tag_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".per_tag.total_and_aligned_molecule_count.dat";
1374 }
1375
1376 sub define_de_deduplicated_molecule_counts_file_names {
1377 my $reads_ref = shift;
1378 my $tags_array_ref = shift;
1379 my $tag_ref = shift;
1380 my $parameters_ref = shift;
1381 my $files_ref = shift;
1382 $$files_ref->{unique_and_aligned_molecule_count_file_1} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[0] . ".unique_and_aligned_molecule_count.dat";
1383 if ( $$reads_ref eq 'paired' ) {
1384 $$files_ref->{unique_and_aligned_molecule_count_file_2} = $$parameters_ref->{directory} . "/" . @$tags_array_ref[1] . ".unique_and_aligned_molecule_count.dat";
1385 }
1386 $$files_ref->{unique_and_aligned_molecule_count_per_tag_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".per_tag.unique_and_aligned_molecule_count.dat";
1387 }
1388
1389 sub define_all_molecule_counts_file_names {
1390 my $tag_ref = shift;
1391 my $parameters_ref = shift;
1392 my $files_ref = shift;
1393 $$files_ref->{counts_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".counts.dat";
1394 $$files_ref->{counts_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".counts.machine.dat";
1395 }
1396
1397
1398 sub define_qc_check_file_names {
1399 my $tag_ref = shift;
1400 my $parameters_ref = shift;
1401 my $files_ref = shift;
1402 $$files_ref->{qc_filter_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".qc_filter.dat";
1403 $$files_ref->{qc_filter_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".qc_filter.machine.dat";
1404 }
1405
1406 sub define_coverage_uniformity_file_names {
1407 my $tag_ref = shift;
1408 my $parameters_ref = shift;
1409 my $files_ref = shift;
1410 $$files_ref->{coverage_uniformity_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".coverage_uniformity.dat";
1411 $$files_ref->{coverage_uniformity_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".coverage_uniformity.machine.dat";
1412 }
1413
1414 sub define_summary_file_names {
1415 my $tag_ref = shift;
1416 my $parameters_ref = shift;
1417 my $files_ref = shift;
1418 $$files_ref->{summary_file} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.dat";
1419 $$files_ref->{summary_file_ps} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.ps";
1420 $$files_ref->{summary_file_pdf} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.pdf";
1421 $$files_ref->{summary_file_machine} = $$parameters_ref->{directory} . "/" . $$tag_ref . ".summary.machine.dat";
1422 }