Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/spades.pl @ 0:965517909457 draft
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Wed, 22 Jan 2020 08:41:44 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:965517909457 |
---|---|
1 #!/usr/bin/env perl | |
2 ## A wrapper script to call spades.py and collect its output | |
3 use strict; | |
4 use warnings; | |
5 use File::Temp qw/ tempfile tempdir /; | |
6 use File::Copy; | |
7 use Getopt::Long; | |
8 | |
9 # Parse arguments | |
10 my ($out_contigs_file, | |
11 $out_contigs_stats, | |
12 $out_scaffolds_file, | |
13 $out_scaffolds_stats, | |
14 $out_log_file, | |
15 $new_name, | |
16 @sysargs) = @ARGV; | |
17 | |
18 ## GetOptions not compatible with parsing the rest of the arguments in an array. | |
19 ## Keeping the not-so-nice parse-in-one-go method, without named arguments. | |
20 # GetOptions( | |
21 # 'contigs-file=s' => \$out_contigs_file, | |
22 # 'contigs-stats=s' => \$out_contigs_stats, | |
23 # 'scaffolds-file=s' => \$out_scaffolds_file, | |
24 # 'scaffolds-stats=s' => \$out_scaffolds_stats, | |
25 # 'out_log_file=s' => \$out_log_file, | |
26 # ); | |
27 | |
28 # my @sysargs = @ARGV; | |
29 | |
30 # Create temporary folder to store files, delete after use | |
31 #my $output_dir = tempdir( CLEANUP => 0 ); | |
32 my $output_dir = 'output_dir'; | |
33 # Link "dat" files as fastq, otherwise spades complains about file format | |
34 | |
35 # Create log handle | |
36 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; | |
37 | |
38 # Run program | |
39 # To do: record time | |
40 runSpades(@sysargs); | |
41 collectOutput($new_name); | |
42 extractCoverageLength($out_contigs_file, $out_contigs_stats); | |
43 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); | |
44 print $log "Done\n"; | |
45 close $log; | |
46 exit 0; | |
47 | |
48 # Run spades | |
49 sub runSpades { | |
50 my $cmd = join(" ", @_) . " -o $output_dir"; | |
51 my $return_code = system($cmd); | |
52 if ($return_code) { | |
53 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; | |
54 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; | |
55 } | |
56 return 0; | |
57 } | |
58 | |
59 # Collect output | |
60 sub collectOutput{ | |
61 my ($new_name) = @_; | |
62 | |
63 # Collects output | |
64 #if a new name is given for the contigs and scaffolds, change them before moving them | |
65 if ( $new_name ne 'NODE') { | |
66 renameContigs($new_name); | |
67 } | |
68 else { | |
69 if ( -e "$output_dir/contigs.fasta") { | |
70 move "$output_dir/contigs.fasta", $out_contigs_file; | |
71 } | |
72 if ( -e "$output_dir/scaffolds.fasta") { | |
73 move "$output_dir/scaffolds.fasta", $out_scaffolds_file; | |
74 } | |
75 } | |
76 | |
77 open LOG, '<', "$output_dir/spades.log" | |
78 or die "Cannot open log file $output_dir/spades.log: $?"; | |
79 print $log $_ while (<LOG>); | |
80 return 0; | |
81 } | |
82 | |
83 #Change name in contig and scaffolds file | |
84 sub renameContigs{ | |
85 my ($name) = @_; | |
86 | |
87 open my $in, '<',"$output_dir/contigs.fasta" or die $!; | |
88 open my $out,'>', $out_contigs_file; | |
89 | |
90 while ( my $line = <$in>) { | |
91 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. | |
92 #also move the remainder of the length | |
93 if ( $line =~ />NODE_(\d+)_(.+)/) { | |
94 $line = ">$name" . "_$1 $2\n"; | |
95 } | |
96 print $out $line; | |
97 } | |
98 close $in; | |
99 close $out; | |
100 | |
101 | |
102 open $in, '<',"$output_dir/scaffolds.fasta" or die $!; | |
103 open $out,'>', $out_scaffolds_file; | |
104 | |
105 while ( my $line = <$in>) { | |
106 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. | |
107 #also move the remainder of the length | |
108 if ( $line =~ />NODE_(\d+)_(.+)/) { | |
109 $line = ">$name" . "_$1 $2\n"; | |
110 } | |
111 print $out $line; | |
112 } | |
113 close $in; | |
114 close $out; | |
115 | |
116 } | |
117 | |
118 | |
119 # Extract | |
120 sub extractCoverageLength{ | |
121 my ($in, $out) = @_; | |
122 open FASTA, '<', $in or die $!; | |
123 open TAB, '>', $out or die $!; | |
124 print TAB "#name\tlength\tcoverage\n"; | |
125 while (<FASTA>){ | |
126 next unless /^>/; | |
127 chomp; | |
128 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/); | |
129 my ($name,$n, $l, $cov) = ($1,$2, $3, $4); | |
130 print TAB "$name" . "_$n\t$l\t$cov\n"; | |
131 } | |
132 close TAB; | |
133 } |