Mercurial > repos > lionelguy > spades
comparison tools/spades_3_5_0/spades.pl @ 12:85c6121d92a5 draft
Uploaded
author | takadonet |
---|---|
date | Tue, 16 Dec 2014 12:57:49 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
11:1027f26f52d5 | 12:85c6121d92a5 |
---|---|
1 #!/usr/bin/env perl | |
2 ## A wrapper script to call spades.py and collect its output | |
3 use strict; | |
4 use warnings; | |
5 use File::Temp qw/ tempfile tempdir /; | |
6 use File::Copy; | |
7 use Getopt::Long; | |
8 | |
9 # Parse arguments | |
10 my ($out_contigs_file, | |
11 $out_contigs_stats, | |
12 $out_scaffolds_file, | |
13 $out_scaffolds_stats, | |
14 $out_log_file, | |
15 $new_name, | |
16 @sysargs) = @ARGV; | |
17 | |
18 ## GetOptions not compatible with parsing the rest of the arguments in an array. | |
19 ## Keeping the not-so-nice parse-in-one-go method, without named arguments. | |
20 # GetOptions( | |
21 # 'contigs-file=s' => \$out_contigs_file, | |
22 # 'contigs-stats=s' => \$out_contigs_stats, | |
23 # 'scaffolds-file=s' => \$out_scaffolds_file, | |
24 # 'scaffolds-stats=s' => \$out_scaffolds_stats, | |
25 # 'out_log_file=s' => \$out_log_file, | |
26 # ); | |
27 | |
28 # my @sysargs = @ARGV; | |
29 | |
30 # Create temporary folder to store files, delete after use | |
31 #my $output_dir = tempdir( CLEANUP => 0 ); | |
32 my $output_dir = 'output_dir'; | |
33 # Link "dat" files as fastq, otherwise spades complains about file format | |
34 | |
35 # Create log handle | |
36 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; | |
37 | |
38 # Run program | |
39 # To do: record time | |
40 runSpades(@sysargs); | |
41 collectOutput($new_name); | |
42 extractCoverageLength($out_contigs_file, $out_contigs_stats); | |
43 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); | |
44 print $log "Done\n"; | |
45 close $log; | |
46 exit 0; | |
47 | |
48 # Run spades | |
49 sub runSpades { | |
50 my $cmd = join(" ", @_) . " -o $output_dir"; | |
51 my $return_code = system($cmd); | |
52 if ($return_code) { | |
53 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; | |
54 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; | |
55 } | |
56 return 0; | |
57 } | |
58 | |
59 # Collect output | |
60 sub collectOutput{ | |
61 my ($new_name) = @_; | |
62 | |
63 # To do: check that the files are there | |
64 # Collects output | |
65 if ( not -e "$output_dir/contigs.fasta") { | |
66 die "Could not find contigs.fasta file\n"; | |
67 } | |
68 if ( not -e "$output_dir/scaffolds.fasta") { | |
69 die "Could not find scaffolds.fasta file\n"; | |
70 } | |
71 | |
72 #if a new name is given for the contigs and scaffolds, change them before moving them | |
73 if ( $new_name ne 'NODE') { | |
74 renameContigs($new_name); | |
75 } | |
76 else { | |
77 move "$output_dir/contigs.fasta", $out_contigs_file; | |
78 move "$output_dir/scaffolds.fasta", $out_scaffolds_file; | |
79 } | |
80 | |
81 | |
82 | |
83 open LOG, '<', "$output_dir/spades.log" | |
84 or die "Cannot open log file $output_dir/spades.log: $?"; | |
85 print $log $_ while (<LOG>); | |
86 return 0; | |
87 } | |
88 | |
89 #Change name in contig and scaffolds file | |
90 sub renameContigs{ | |
91 my ($name) = @_; | |
92 | |
93 open my $in, '<',"$output_dir/contigs.fasta" or die $!; | |
94 open my $out,'>', $out_contigs_file; | |
95 | |
96 while ( my $line = <$in>) { | |
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. | |
98 #also move the remainder of the length | |
99 if ( $line =~ />NODE_(\d+)_(.+)/) { | |
100 $line = ">$name" . "_$1 $2\n"; | |
101 } | |
102 print $out $line; | |
103 } | |
104 close $in; | |
105 close $out; | |
106 | |
107 | |
108 open $in, '<',"$output_dir/scaffolds.fasta" or die $!; | |
109 open $out,'>', $out_scaffolds_file; | |
110 | |
111 while ( my $line = <$in>) { | |
112 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. | |
113 #also move the remainder of the length | |
114 if ( $line =~ />NODE_(\d+)_(.+)/) { | |
115 $line = ">$name" . "_$1 $2\n"; | |
116 } | |
117 print $out $line; | |
118 } | |
119 close $in; | |
120 close $out; | |
121 | |
122 } | |
123 | |
124 | |
125 # Extract | |
126 sub extractCoverageLength{ | |
127 my ($in, $out) = @_; | |
128 open FASTA, '<', $in or die $!; | |
129 open TAB, '>', $out or die $!; | |
130 print TAB "#name\tlength\tcoverage\n"; | |
131 while (<FASTA>){ | |
132 next unless /^>/; | |
133 chomp; | |
134 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/); | |
135 my ($name,$n, $l, $cov) = ($1,$2, $3, $4); | |
136 print TAB "$name" . "_$n\t$l\t$cov\n"; | |
137 } | |
138 close TAB; | |
139 } |