view rgttable.pl @ 0:74c942b5f0da draft default tip

Uploaded
author jgarbe
date Wed, 27 Nov 2013 14:44:13 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl -w

###########################################################
# rgttable.pl
# John Garbe
# October 2013
#
# Read in a cuffdiff output file (any read_group_tracking file) and print out a file with 
# tab-delimited columns: featureID, group1-sample1 fpkm, group1-sample2 fpkm, group2-sample1 fpkm, ...
#
###########################################################

=head1 NAME

rgttable.pl - Read in a cuffdiff output file (any read_group_tracking file) and print out a file with
 tab-delimited columns: featureID, group1-rep1 fpkm, group1-rep2 fpkm, group2-rep1 fpkm, group2-rep2 fpkm, etc.

=head1 SYNOPSIS

rgttable.pl [-c column] genes.read_group_tracking [output.txt]

=head1 DESCRIPTION

This script converts read group tracking files produced by Cuffdiff into a format useful 
for importing into other programs. This works with the isoforms, genes, cds, and tss_groups 
read group tracking files generated by cuffdiff

=cut

use Getopt::Std;

$usage = "USAGE: rgttable.pl [-c column_number] genes.read_group_tracking [output.txt]\n";

die $usage if ($#ARGV < 0);

# parameters
getopts('c:', \%opts) or die "$usage\n";
$column = $opts{'c'} || 7; # column to extract
$column--; # convert from 1-based index to 0-based index

# open up input file
open IFILE, "<$ARGV[0]" or die "cannot open input file $ARGV[0]\n";
if ($#ARGV == 1) {
    open OFILE, ">$ARGV[1]" or die "cannot open outputfile $ARGV[1]\n";
} else {
    open OFILE, ">-" or die "cannot open stdout\n"; 
}

# open up output files - don't use this, write to STDOUT instead
# open OFILE, ">$ARGV[0].txt" or die "cannot open $ARGV[0].out\n";

# run through the input file, which has these columns:
# tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status
$header = <IFILE>;
@line = split /\t/, $header;
# if an output file is specified, write to stdout for Galaxy compatibility
if ($#ARGV == 1) { 
    print STDOUT "Extracting column " . ($column+1) . ": $line[$column]\n";
} else {
    print STDERR "Extracting column " . ($column+1) . ": $line[$column]\n";
}
while ($line = <IFILE>) {
    chomp $line;
#    $linecount++;
    @line = split /\t/, $line;
    $data{$line[0]}{$line[1]}{$line[2]} = $line[$column];
}

$header = 1;
# print out the data
for $feature (sort keys %data) {
    if ($header) {
	print OFILE "tracking_id";
    } else {
	print OFILE "$feature";
    }
    for $condition (sort keys %{ $data{$feature} }) {
	for $replicate (sort keys %{ $data{$feature}{$condition} }) {
	    if ($header) {
		print OFILE "\t$condition-$replicate";
	    } else {
		print OFILE "\t$data{$feature}{$condition}{$replicate}";
	    }
	}
    }
    print OFILE "\n";
    $header = 0;
}