annotate tools/spades_3_1_1/spades.pl @ 10:7d72faef9af0 draft

Uploaded
author takadonet
date Fri, 10 Oct 2014 14:40:43 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
1 #!/usr/bin/env perl
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
2 ## A wrapper script to call spades.py and collect its output
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
3 use strict;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
4 use warnings;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
5 use File::Temp qw/ tempfile tempdir /;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
6 use File::Copy;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
7 use Getopt::Long;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
8
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
9 # Parse arguments
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
10 my ($out_contigs_file,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
11 $out_contigs_stats,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
12 $out_scaffolds_file,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
13 $out_scaffolds_stats,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
14 $out_log_file,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
15 $new_name,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
16 @sysargs) = @ARGV;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
17
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
18 ## GetOptions not compatible with parsing the rest of the arguments in an array.
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
19 ## Keeping the not-so-nice parse-in-one-go method, without named arguments.
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
20 # GetOptions(
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
21 # 'contigs-file=s' => \$out_contigs_file,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
22 # 'contigs-stats=s' => \$out_contigs_stats,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
23 # 'scaffolds-file=s' => \$out_scaffolds_file,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
24 # 'scaffolds-stats=s' => \$out_scaffolds_stats,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
25 # 'out_log_file=s' => \$out_log_file,
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
26 # );
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
27
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
28 # my @sysargs = @ARGV;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
29
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
30 # Create temporary folder to store files, delete after use
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
31 #my $output_dir = tempdir( CLEANUP => 0 );
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
32 my $output_dir = 'output_dir';
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
33 # Link "dat" files as fastq, otherwise spades complains about file format
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
34
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
35 # Create log handle
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
36 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
37
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
38 # Run program
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
39 # To do: record time
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
40 runSpades(@sysargs);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
41 collectOutput($new_name);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
42 extractCoverageLength($out_contigs_file, $out_contigs_stats);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
43 extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
44 print $log "Done\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
45 close $log;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
46 exit 0;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
47
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
48 # Run spades
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
49 sub runSpades {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
50 my $cmd = join(" ", @_) . " -o $output_dir";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
51 my $return_code = system($cmd);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
52 if ($return_code) {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
53 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
54 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
55 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
56 return 0;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
57 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
58
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
59 # Collect output
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
60 sub collectOutput{
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
61 my ($new_name) = @_;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
62
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
63 # To do: check that the files are there
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
64 # Collects output
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
65 if ( not -e "$output_dir/contigs.fasta") {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
66 die "Could not find contigs.fasta file\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
67 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
68 if ( not -e "$output_dir/scaffolds.fasta") {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
69 die "Could not find scaffolds.fasta file\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
70 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
71
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
72 #if a new name is given for the contigs and scaffolds, change them before moving them
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
73 if ( $new_name ne 'NODE') {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
74 renameContigs($new_name);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
75 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
76 else {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
77 move "$output_dir/contigs.fasta", $out_contigs_file;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
78 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
79 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
80
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
81
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
82
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
83 open LOG, '<', "$output_dir/spades.log"
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
84 or die "Cannot open log file $output_dir/spades.log: $?";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
85 print $log $_ while (<LOG>);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
86 return 0;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
87 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
88
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
89 #Change name in contig and scaffolds file
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
90 sub renameContigs{
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
91 my ($name) = @_;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
92
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
93 open my $in, '<',"$output_dir/contigs.fasta" or die $!;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
94 open my $out,'>', $out_contigs_file;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
95
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
96 while ( my $line = <$in>) {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
97 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
98 #also move the remainder of the length
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
99 if ( $line =~ />NODE_(\d+)_(.+)/) {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
100 $line = ">$name" . "_$1 $2\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
101 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
102 print $out $line;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
103 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
104 close $in;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
105 close $out;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
106
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
107
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
108 open $in, '<',"$output_dir/scaffolds.fasta" or die $!;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
109 open $out,'>', $out_scaffolds_file;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
110
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
111 while ( my $line = <$in>) {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
112 #remove the NODE_ so we can rebuilt the display_id with our contig name with the contig number.
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
113 #also move the remainder of the length
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
114 if ( $line =~ />NODE_(\d+)_(.+)/) {
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
115 $line = ">$name" . "_$1 $2\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
116 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
117 print $out $line;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
118 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
119 close $in;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
120 close $out;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
121
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
122 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
123
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
124
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
125 # Extract
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
126 sub extractCoverageLength{
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
127 my ($in, $out) = @_;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
128 open FASTA, '<', $in or die $!;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
129 open TAB, '>', $out or die $!;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
130 print TAB "#name\tlength\tcoverage\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
131 while (<FASTA>){
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
132 next unless /^>/;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
133 chomp;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
134 die "Not all elements found in $_\n" if (! m/^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*)/);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
135 my ($name,$n, $l, $cov) = ($1,$2, $3, $4);
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
136 print TAB "$name" . "_$n\t$l\t$cov\n";
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
137 }
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
138 close TAB;
7d72faef9af0 Uploaded
takadonet
parents:
diff changeset
139 }