diff combine_stats.pl @ 0:1855203c2e6c draft default tip

planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
author nml
date Wed, 08 Nov 2017 16:38:50 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/combine_stats.pl	Wed Nov 08 16:38:50 2017 -0500
@@ -0,0 +1,103 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use autodie;
+use Getopt::Long;
+
+#quick and dirty script to combine a list of assembly stats tab files into a simple csv file where each row is one strain
+
+
+my ($files,$output) = prepare_inputs();
+
+
+my @strains = sort { $a cmp $b } keys %{$files};
+
+
+#get first file so we can determine the header
+my $first = shift @strains;
+my $top_header;
+my $second_header;
+
+
+
+open my $out,'>',$output;
+process($first,$files->{$first},$out,1);
+
+
+foreach my $name( @strains) {
+    process($name,$files->{$name},$out);
+}
+
+
+
+close $out;
+
+
+exit;
+
+sub process {
+    my ($name,$file,$out,$header) = @_;
+
+    my @header = ("Strain");
+    my @values = ($name);
+    
+    open my $in,'<',$file;
+    while ( <$in>) {
+        chomp;
+
+        if (length $_ ==0) {
+            next;
+        }
+
+        #if we hit this section, we are done reading this file since the rest we do not care about
+        if ( $_ =~ /Simple Din.*repeats/) {
+            last;
+        }
+        
+        
+        my ($key,$value) = split /:/;
+
+        #trim out the tabs
+        $key =~ s/\t//g;
+        $value =~ s/\t//g;
+
+        if ( $value) {
+            push @header,$key;
+            push @values,$value;
+        }
+
+    }
+
+
+    close $in;
+
+    #check to see if we are printing out the header
+    if ( $header) {
+        print $out join ("\t",@header) . "\n";
+    }
+    print $out join ("\t",@values) . "\n";
+    
+    return;
+}
+
+
+sub prepare_inputs {
+
+    my ($output,%files);
+    
+
+
+    if (!GetOptions('stats=s' => \%files,
+                    'output=s' => \$output
+                )){
+        
+        die "Invalid options given\n";
+    }
+    
+    
+    if ( scalar keys %files == 0){
+        die "No files given\n";
+    }
+
+    return (\%files,$output);
+}