8
|
1 #!/usr/bin/env perl
|
|
2 ## A wrapper script to call spades.py and collect its output
|
|
3 use strict;
|
|
4 use warnings;
|
|
5 use File::Temp qw/ tempfile tempdir /;
|
|
6 use File::Copy;
|
|
7 use Getopt::Long;
|
|
8
|
|
9 # Parse arguments
|
|
10 my ($out_contigs_file,
|
|
11 $out_contigs_stats,
|
|
12 $out_scaffolds_file,
|
|
13 $out_scaffolds_stats,
|
|
14 $out_log_file,
|
|
15 @sysargs) = @ARGV;
|
|
16
|
|
17 ## GetOptions not compatible with parsing the rest of the arguments in an array.
|
|
18 ## Keeping the not-so-nice parse-in-one-go method, without named arguments.
|
|
19 # GetOptions(
|
|
20 # 'contigs-file=s' => \$out_contigs_file,
|
|
21 # 'contigs-stats=s' => \$out_contigs_stats,
|
|
22 # 'scaffolds-file=s' => \$out_scaffolds_file,
|
|
23 # 'scaffolds-stats=s' => \$out_scaffolds_stats,
|
|
24 # 'out_log_file=s' => \$out_log_file,
|
|
25 # );
|
|
26
|
|
27 # my @sysargs = @ARGV;
|
|
28
|
|
29 # Create temporary folder to store files, delete after use
|
|
30 #my $output_dir = tempdir( CLEANUP => 0 );
|
|
31 my $output_dir = 'output_dir';
|
|
32 # Link "dat" files as fastq, otherwise spades complains about file format
|
|
33
|
|
34 # Create log handle
|
|
35 open my $log, '>', $out_log_file or die "Cannot write to $out_log_file: $?\n";
|
|
36
|
|
37 # Run program
|
|
38 # To do: record time
|
|
39 &runSpades(@sysargs);
|
|
40 &collectOutput();
|
|
41 &extractCoverageLength($out_contigs_file, $out_contigs_stats);
|
|
42 &extractCoverageLength($out_scaffolds_file, $out_scaffolds_stats);
|
|
43 print $log "Done\n";
|
|
44 close $log;
|
|
45 exit 0;
|
|
46
|
|
47 # Run spades
|
|
48 sub runSpades {
|
|
49 my $cmd = join(" ", @_) . " -o $output_dir";
|
|
50 my $return_code = system($cmd);
|
|
51 if ($return_code) {
|
|
52 print $log "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
|
|
53 die "Failed with code $return_code\nCommand $cmd\nMessage: $?\n";
|
|
54 }
|
|
55 return 0;
|
|
56 }
|
|
57
|
|
58 # Collect output
|
|
59 sub collectOutput{
|
|
60 # To do: check that the files are there
|
|
61 # Collects output
|
|
62 move "$output_dir/contigs.fasta", $out_contigs_file;
|
|
63 move "$output_dir/scaffolds.fasta", $out_scaffolds_file;
|
|
64 open LOG, '<', "$output_dir/spades.log"
|
|
65 or die "Cannot open log file $output_dir/spades.log: $?";
|
|
66 print $log $_ while (<LOG>);
|
|
67 return 0;
|
|
68 }
|
|
69
|
|
70 # Extract
|
|
71 sub extractCoverageLength{
|
|
72 my ($in, $out) = @_;
|
|
73 open FASTA, '<', $in or die $!;
|
|
74 open TAB, '>', $out or die $!;
|
|
75 print TAB "#name\tlength\tcoverage\n";
|
|
76 while (<FASTA>){
|
|
77 next unless /^>/;
|
|
78 chomp;
|
|
79 die "Not all elements found in $_\n" if (! m/^>NODE_(\d+)_length_(\d+)_cov_(\d+\.*\d*)/);
|
|
80 my ($n, $l, $cov) = ($1, $2, $3);
|
|
81 print TAB "NODE_$n\t$l\t$cov\n";
|
|
82 }
|
|
83 close TAB;
|
|
84 }
|