comparison combine_stats.pl @ 0:1855203c2e6c draft default tip

planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
author nml
date Wed, 08 Nov 2017 16:38:50 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1855203c2e6c
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use autodie;
5 use Getopt::Long;
6
7 #quick and dirty script to combine a list of assembly stats tab files into a simple csv file where each row is one strain
8
9
10 my ($files,$output) = prepare_inputs();
11
12
13 my @strains = sort { $a cmp $b } keys %{$files};
14
15
16 #get first file so we can determine the header
17 my $first = shift @strains;
18 my $top_header;
19 my $second_header;
20
21
22
23 open my $out,'>',$output;
24 process($first,$files->{$first},$out,1);
25
26
27 foreach my $name( @strains) {
28 process($name,$files->{$name},$out);
29 }
30
31
32
33 close $out;
34
35
36 exit;
37
38 sub process {
39 my ($name,$file,$out,$header) = @_;
40
41 my @header = ("Strain");
42 my @values = ($name);
43
44 open my $in,'<',$file;
45 while ( <$in>) {
46 chomp;
47
48 if (length $_ ==0) {
49 next;
50 }
51
52 #if we hit this section, we are done reading this file since the rest we do not care about
53 if ( $_ =~ /Simple Din.*repeats/) {
54 last;
55 }
56
57
58 my ($key,$value) = split /:/;
59
60 #trim out the tabs
61 $key =~ s/\t//g;
62 $value =~ s/\t//g;
63
64 if ( $value) {
65 push @header,$key;
66 push @values,$value;
67 }
68
69 }
70
71
72 close $in;
73
74 #check to see if we are printing out the header
75 if ( $header) {
76 print $out join ("\t",@header) . "\n";
77 }
78 print $out join ("\t",@values) . "\n";
79
80 return;
81 }
82
83
84 sub prepare_inputs {
85
86 my ($output,%files);
87
88
89
90 if (!GetOptions('stats=s' => \%files,
91 'output=s' => \$output
92 )){
93
94 die "Invalid options given\n";
95 }
96
97
98 if ( scalar keys %files == 0){
99 die "No files given\n";
100 }
101
102 return (\%files,$output);
103 }