Mercurial > repos > jgarbe > rgttable
diff rgttable.pl @ 0:74c942b5f0da draft default tip
Uploaded
author | jgarbe |
---|---|
date | Wed, 27 Nov 2013 14:44:13 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgttable.pl Wed Nov 27 14:44:13 2013 -0500 @@ -0,0 +1,88 @@ +#!/usr/bin/perl -w + +########################################################### +# rgttable.pl +# John Garbe +# October 2013 +# +# Read in a cuffdiff output file (any read_group_tracking file) and print out a file with +# tab-delimited columns: featureID, group1-sample1 fpkm, group1-sample2 fpkm, group2-sample1 fpkm, ... +# +########################################################### + +=head1 NAME + +rgttable.pl - Read in a cuffdiff output file (any read_group_tracking file) and print out a file with + tab-delimited columns: featureID, group1-rep1 fpkm, group1-rep2 fpkm, group2-rep1 fpkm, group2-rep2 fpkm, etc. + +=head1 SYNOPSIS + +rgttable.pl [-c column] genes.read_group_tracking [output.txt] + +=head1 DESCRIPTION + +This script converts read group tracking files produced by Cuffdiff into a format useful +for importing into other programs. This works with the isoforms, genes, cds, and tss_groups +read group tracking files generated by cuffdiff + +=cut + +use Getopt::Std; + +$usage = "USAGE: rgttable.pl [-c column_number] genes.read_group_tracking [output.txt]\n"; + +die $usage if ($#ARGV < 0); + +# parameters +getopts('c:', \%opts) or die "$usage\n"; +$column = $opts{'c'} || 7; # column to extract +$column--; # convert from 1-based index to 0-based index + +# open up input file +open IFILE, "<$ARGV[0]" or die "cannot open input file $ARGV[0]\n"; +if ($#ARGV == 1) { + open OFILE, ">$ARGV[1]" or die "cannot open outputfile $ARGV[1]\n"; +} else { + open OFILE, ">-" or die "cannot open stdout\n"; +} + +# open up output files - don't use this, write to STDOUT instead +# open OFILE, ">$ARGV[0].txt" or die "cannot open $ARGV[0].out\n"; + +# run through the input file, which has these columns: +# tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status +$header = <IFILE>; +@line = split /\t/, $header; +# if an output file is specified, write to stdout for Galaxy compatibility +if ($#ARGV == 1) { + print STDOUT "Extracting column " . ($column+1) . ": $line[$column]\n"; +} else { + print STDERR "Extracting column " . ($column+1) . ": $line[$column]\n"; +} +while ($line = <IFILE>) { + chomp $line; +# $linecount++; + @line = split /\t/, $line; + $data{$line[0]}{$line[1]}{$line[2]} = $line[$column]; +} + +$header = 1; +# print out the data +for $feature (sort keys %data) { + if ($header) { + print OFILE "tracking_id"; + } else { + print OFILE "$feature"; + } + for $condition (sort keys %{ $data{$feature} }) { + for $replicate (sort keys %{ $data{$feature}{$condition} }) { + if ($header) { + print OFILE "\t$condition-$replicate"; + } else { + print OFILE "\t$data{$feature}{$condition}{$replicate}"; + } + } + } + print OFILE "\n"; + $header = 0; +}