Mercurial > repos > nml > plasmidspades
annotate spades.pl @ 0:27b90e43e2d8 draft
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
author | nml |
---|---|
date | Mon, 06 Jun 2016 15:13:06 -0400 |
parents | |
children |
rev | line source |
---|---|
0
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
2 ## A wrapper script to call spades.py and collect its output |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
3 use strict; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
4 use warnings; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
5 use File::Temp qw/ tempfile tempdir /; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
6 use File::Copy; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
7 use Getopt::Long; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
8 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
9 # Parse arguments |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
10 my ($out_contigs_file, |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
11 $out_contigs_stats, |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
12 $out_scaffolds_file, |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
13 $out_scaffolds_stats, |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
14 $out_log_file, |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
15 $new_name, |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
16 @sysargs) = @ARGV; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
17 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
18 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
19 my $output_dir = 'output_dir'; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
20 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
21 # Create log handle |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
22 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
23 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
24 # Run program |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
25 runSpades(@sysargs); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
26 collectOutput($new_name); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
27 extractCoverageLength($out_contigs_file, $out_contigs_stats); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
28 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
29 print $log "Done\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
30 close $log; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
31 exit 0; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
32 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
33 # Run spades |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
34 sub runSpades { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
35 my $cmd = join(" ", @_) . " -o $output_dir"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
36 my $return_code = system($cmd); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
37 if ($return_code) { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
38 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
39 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
40 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
41 return 0; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
42 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
43 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
44 # Collect output |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
45 sub collectOutput{ |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
46 my ($new_name) = @_; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
47 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
48 # To do: check that the files are there |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
49 # Collects output |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
50 if ( not -e "$output_dir/contigs.fasta") { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
51 die "Could not find contigs.fasta file\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
52 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
53 if ( not -e "$output_dir/scaffolds.fasta") { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
54 die "Could not find scaffolds.fasta file\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
55 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
56 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
57 #if a new name is given for the contigs and scaffolds, change them before moving them |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
58 if ( $new_name ne 'NODE') { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
59 renameContigs($new_name); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
60 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
61 else { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
62 move "$output_dir/contigs.fasta", $out_contigs_file; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
64 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
65 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
66 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
67 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
68 open LOG, '<', "$output_dir/spades.log" |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
69 or die "Cannot open log file $output_dir/spades.log: $?"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
70 print $log $_ while (<LOG>); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
71 return 0; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
72 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
73 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
74 #Change name in contig and scaffolds file |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
75 sub renameContigs{ |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
76 my ($name) = @_; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
77 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
78 open my $in, '<',"$output_dir/contigs.fasta" or die $!; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
79 open my $out,'>', $out_contigs_file; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
80 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
81 while ( my $line = <$in>) { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
82 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
83 #also move the remainder of the length |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
84 if ( $line =~ />NODE_(\d+)_(.+)/) { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
85 $line = ">$name" . "_$1 $2\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
86 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
87 print $out $line; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
88 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
89 close $in; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
90 close $out; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
91 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
92 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
93 open $in, '<',"$output_dir/scaffolds.fasta" or die $!; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
94 open $out,'>', $out_scaffolds_file; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
95 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
96 while ( my $line = <$in>) { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number. |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
98 #also move the remainder of the length |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
99 if ( $line =~ />NODE_(\d+)_(.+)/) { |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
100 $line = ">$name" . "_$1 $2\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
101 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
102 print $out $line; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
103 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
104 close $in; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
105 close $out; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
106 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
107 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
108 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
109 |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
110 # Extract |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
111 sub extractCoverageLength{ |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
112 my ($in, $out) = @_; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
113 open FASTA, '<', $in or die $!; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
114 open TAB, '>', $out or die $!; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
115 print TAB "#name\tlength\tcoverage\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
116 while (<FASTA>){ |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
117 next unless /^>/; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
118 chomp; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
119 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)_(component_\d+)/); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
120 my ($name,$n, $l, $cov,$component) = ($1,$2, $3, $4,$5); |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
121 print TAB "$name" . "_$n" . "_$component\t$l\t$cov\n"; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
122 } |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
123 close TAB; |
27b90e43e2d8
planemo upload commit 6cd8dfa9e518c63a0b0e3fd5167424cffd3829fc
nml
parents:
diff
changeset
|
124 } |