Mercurial > repos > nml > combine_assemblystats
annotate combine_stats.pl @ 0:1855203c2e6c draft default tip
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
author | nml |
---|---|
date | Wed, 08 Nov 2017 16:38:50 -0500 |
parents | |
children |
rev | line source |
---|---|
0
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
1 #!/usr/bin/env perl |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
2 use strict; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
3 use warnings; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
4 use autodie; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
5 use Getopt::Long; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
6 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
7 #quick and dirty script to combine a list of assembly stats tab files into a simple csv file where each row is one strain |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
8 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
9 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
10 my ($files,$output) = prepare_inputs(); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
11 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
12 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
13 my @strains = sort { $a cmp $b } keys %{$files}; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
14 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
15 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
16 #get first file so we can determine the header |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
17 my $first = shift @strains; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
18 my $top_header; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
19 my $second_header; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
20 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
21 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
22 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
23 open my $out,'>',$output; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
24 process($first,$files->{$first},$out,1); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
25 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
26 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
27 foreach my $name( @strains) { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
28 process($name,$files->{$name},$out); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
29 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
30 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
31 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
32 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
33 close $out; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
34 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
35 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
36 exit; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
37 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
38 sub process { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
39 my ($name,$file,$out,$header) = @_; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
40 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
41 my @header = ("Strain"); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
42 my @values = ($name); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
43 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
44 open my $in,'<',$file; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
45 while ( <$in>) { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
46 chomp; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
47 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
48 if (length $_ ==0) { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
49 next; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
50 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
51 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
52 #if we hit this section, we are done reading this file since the rest we do not care about |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
53 if ( $_ =~ /Simple Din.*repeats/) { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
54 last; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
55 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
56 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
57 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
58 my ($key,$value) = split /:/; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
59 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
60 #trim out the tabs |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
61 $key =~ s/\t//g; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
62 $value =~ s/\t//g; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
63 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
64 if ( $value) { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
65 push @header,$key; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
66 push @values,$value; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
67 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
68 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
69 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
70 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
71 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
72 close $in; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
73 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
74 #check to see if we are printing out the header |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
75 if ( $header) { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
76 print $out join ("\t",@header) . "\n"; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
77 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
78 print $out join ("\t",@values) . "\n"; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
79 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
80 return; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
81 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
82 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
83 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
84 sub prepare_inputs { |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
85 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
86 my ($output,%files); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
87 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
88 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
89 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
90 if (!GetOptions('stats=s' => \%files, |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
91 'output=s' => \$output |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
92 )){ |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
93 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
94 die "Invalid options given\n"; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
95 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
96 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
97 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
98 if ( scalar keys %files == 0){ |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
99 die "No files given\n"; |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
100 } |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
101 |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
102 return (\%files,$output); |
1855203c2e6c
planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
nml
parents:
diff
changeset
|
103 } |