annotate spades.pl @ 0:addd8265834b draft default tip

planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
author nml
date Tue, 09 Aug 2016 10:52:40 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
1 #!/usr/bin/env perl
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
2 ## A wrapper script to call spades.py and collect its output
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
3 use strict;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
4 use warnings;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
5 use File::Temp qw/ tempfile tempdir /;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
6 use File::Copy;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
7 use Getopt::Long;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
8
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
9 # Parse arguments
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
10 my ($out_contigs_file,
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
11 $out_paths_file,
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
12 $out_log_file,
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
13 $new_name,
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
14 @sysargs) = @ARGV;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
15
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
16
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
17 my $output_dir = 'output_dir';
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
18
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
19 # Create log handle
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
20 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
21
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
22 # Run program
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
23 runSpades(@sysargs);
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
24 collectOutput($new_name);
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
25 print $log "Done\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
26 close $log;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
27 exit 0;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
28
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
29 # Run spades
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
30 sub runSpades {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
31 my $cmd = join(" ", @_) . " -o $output_dir";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
32 my $return_code = system($cmd);
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
33 if ($return_code) {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
34 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
35 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
36 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
37 return 0;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
38 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
39
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
40 # Collect output
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
41 sub collectOutput{
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
42 my ($new_name) = @_;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
43
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
44 # To do: check that the files are there
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
45 # Collects output
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
46 if ( not -e "$output_dir/transcripts.fasta") {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
47 die "Could not find transcripts.fasta file\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
48 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
49 if ( not -e "$output_dir/transcripts.paths") {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
50 die "Could not find transcripts.paths file\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
51 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
52
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
53 #if a new name is given for the contigs, change them before moving them
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
54 if ( $new_name ne 'NODE') {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
55 renameContigs($new_name);
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
56 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
57 else {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
58 move "$output_dir/transcripts.fasta", $out_contigs_file;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
59 move "$output_dir/transcripts.paths", $out_paths_file;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
60 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
61
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
62
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
63
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
64 open LOG, '<', "$output_dir/spades.log"
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
65 or die "Cannot open log file $output_dir/spades.log: $?";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
66 print $log $_ while (<LOG>);
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
67 return 0;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
68 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
69
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
70 #Change name in contig and fastg file
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
71 sub renameContigs{
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
72 my ($name) = @_;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
73
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
74 open my $in, '<',"$output_dir/transcripts.fasta" or die $!;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
75 open my $out,'>', $out_contigs_file;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
76
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
77 while ( my $line = <$in>) {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
78 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
79 #also move the remainder of the length
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
80 if ( $line =~ />NODE_(\d+)_(.+)/) {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
81 $line = ">$name" . "_$1 $2\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
82 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
83 print $out $line;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
84 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
85 close $in;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
86 close $out;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
87
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
88
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
89 open $in, '<',"$output_dir/transcripts.paths" or die $!;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
90 open $out,'>', $out_paths_file;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
91
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
92 while ( my $line = <$in>) {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
93 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
94 #also move the remainder of the length
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
95 if ( $line =~ />NODE_(\d+)_(.+)/) {
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
96 $line = ">$name" . "_$1 $2\n";
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
97 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
98 print $out $line;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
99 }
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
100 close $in;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
101 close $out;
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
102
addd8265834b planemo upload commit 18fef9393a17a3442ab7927d76b301bb43ec3de4
nml
parents:
diff changeset
103 }