diff rgttable.pl @ 0:74c942b5f0da draft default tip

Uploaded
author jgarbe
date Wed, 27 Nov 2013 14:44:13 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgttable.pl	Wed Nov 27 14:44:13 2013 -0500
@@ -0,0 +1,88 @@
+#!/usr/bin/perl -w
+
+###########################################################
+# rgttable.pl
+# John Garbe
+# October 2013
+#
+# Read in a cuffdiff output file (any read_group_tracking file) and print out a file with 
+# tab-delimited columns: featureID, group1-sample1 fpkm, group1-sample2 fpkm, group2-sample1 fpkm, ...
+#
+###########################################################
+
+=head1 NAME
+
+rgttable.pl - Read in a cuffdiff output file (any read_group_tracking file) and print out a file with
+ tab-delimited columns: featureID, group1-rep1 fpkm, group1-rep2 fpkm, group2-rep1 fpkm, group2-rep2 fpkm, etc.
+
+=head1 SYNOPSIS
+
+rgttable.pl [-c column] genes.read_group_tracking [output.txt]
+
+=head1 DESCRIPTION
+
+This script converts read group tracking files produced by Cuffdiff into a format useful 
+for importing into other programs. This works with the isoforms, genes, cds, and tss_groups 
+read group tracking files generated by cuffdiff
+
+=cut
+
+use Getopt::Std;
+
+$usage = "USAGE: rgttable.pl [-c column_number] genes.read_group_tracking [output.txt]\n";
+
+die $usage if ($#ARGV < 0);
+
+# parameters
+getopts('c:', \%opts) or die "$usage\n";
+$column = $opts{'c'} || 7; # column to extract
+$column--; # convert from 1-based index to 0-based index
+
+# open up input file
+open IFILE, "<$ARGV[0]" or die "cannot open input file $ARGV[0]\n";
+if ($#ARGV == 1) {
+    open OFILE, ">$ARGV[1]" or die "cannot open outputfile $ARGV[1]\n";
+} else {
+    open OFILE, ">-" or die "cannot open stdout\n"; 
+}
+
+# open up output files - don't use this, write to STDOUT instead
+# open OFILE, ">$ARGV[0].txt" or die "cannot open $ARGV[0].out\n";
+
+# run through the input file, which has these columns:
+# tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status
+$header = <IFILE>;
+@line = split /\t/, $header;
+# if an output file is specified, write to stdout for Galaxy compatibility
+if ($#ARGV == 1) { 
+    print STDOUT "Extracting column " . ($column+1) . ": $line[$column]\n";
+} else {
+    print STDERR "Extracting column " . ($column+1) . ": $line[$column]\n";
+}
+while ($line = <IFILE>) {
+    chomp $line;
+#    $linecount++;
+    @line = split /\t/, $line;
+    $data{$line[0]}{$line[1]}{$line[2]} = $line[$column];
+}
+
+$header = 1;
+# print out the data
+for $feature (sort keys %data) {
+    if ($header) {
+	print OFILE "tracking_id";
+    } else {
+	print OFILE "$feature";
+    }
+    for $condition (sort keys %{ $data{$feature} }) {
+	for $replicate (sort keys %{ $data{$feature}{$condition} }) {
+	    if ($header) {
+		print OFILE "\t$condition-$replicate";
+	    } else {
+		print OFILE "\t$data{$feature}{$condition}{$replicate}";
+	    }
+	}
+    }
+    print OFILE "\n";
+    $header = 0;
+}