view combine_stats.pl @ 0:1855203c2e6c draft default tip

planemo upload for repository https://github.com/phac-nml/galaxy_tools commit 132092ff7fe1c4810d1221054419389180b81657
author nml
date Wed, 08 Nov 2017 16:38:50 -0500
parents
children
line wrap: on
line source

#!/usr/bin/env perl
use strict;
use warnings;
use autodie;
use Getopt::Long;

#quick and dirty script to combine a list of assembly stats tab files into a simple csv file where each row is one strain


my ($files,$output) = prepare_inputs();


my @strains = sort { $a cmp $b } keys %{$files};


#get first file so we can determine the header
my $first = shift @strains;
my $top_header;
my $second_header;



open my $out,'>',$output;
process($first,$files->{$first},$out,1);


foreach my $name( @strains) {
    process($name,$files->{$name},$out);
}



close $out;


exit;

sub process {
    my ($name,$file,$out,$header) = @_;

    my @header = ("Strain");
    my @values = ($name);
    
    open my $in,'<',$file;
    while ( <$in>) {
        chomp;

        if (length $_ ==0) {
            next;
        }

        #if we hit this section, we are done reading this file since the rest we do not care about
        if ( $_ =~ /Simple Din.*repeats/) {
            last;
        }
        
        
        my ($key,$value) = split /:/;

        #trim out the tabs
        $key =~ s/\t//g;
        $value =~ s/\t//g;

        if ( $value) {
            push @header,$key;
            push @values,$value;
        }

    }


    close $in;

    #check to see if we are printing out the header
    if ( $header) {
        print $out join ("\t",@header) . "\n";
    }
    print $out join ("\t",@values) . "\n";
    
    return;
}


sub prepare_inputs {

    my ($output,%files);
    


    if (!GetOptions('stats=s' => \%files,
                    'output=s' => \$output
                )){
        
        die "Invalid options given\n";
    }
    
    
    if ( scalar keys %files == 0){
        die "No files given\n";
    }

    return (\%files,$output);
}