0
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 ###########################################################
|
|
4 # rgttable.pl
|
|
5 # John Garbe
|
|
6 # October 2013
|
|
7 #
|
|
8 # Read in a cuffdiff output file (any read_group_tracking file) and print out a file with
|
|
9 # tab-delimited columns: featureID, group1-sample1 fpkm, group1-sample2 fpkm, group2-sample1 fpkm, ...
|
|
10 #
|
|
11 ###########################################################
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 rgttable.pl - Read in a cuffdiff output file (any read_group_tracking file) and print out a file with
|
|
16 tab-delimited columns: featureID, group1-rep1 fpkm, group1-rep2 fpkm, group2-rep1 fpkm, group2-rep2 fpkm, etc.
|
|
17
|
|
18 =head1 SYNOPSIS
|
|
19
|
|
20 rgttable.pl [-c column] genes.read_group_tracking [output.txt]
|
|
21
|
|
22 =head1 DESCRIPTION
|
|
23
|
|
24 This script converts read group tracking files produced by Cuffdiff into a format useful
|
|
25 for importing into other programs. This works with the isoforms, genes, cds, and tss_groups
|
|
26 read group tracking files generated by cuffdiff
|
|
27
|
|
28 =cut
|
|
29
|
|
30 use Getopt::Std;
|
|
31
|
|
32 $usage = "USAGE: rgttable.pl [-c column_number] genes.read_group_tracking [output.txt]\n";
|
|
33
|
|
34 die $usage if ($#ARGV < 0);
|
|
35
|
|
36 # parameters
|
|
37 getopts('c:', \%opts) or die "$usage\n";
|
|
38 $column = $opts{'c'} || 7; # column to extract
|
|
39 $column--; # convert from 1-based index to 0-based index
|
|
40
|
|
41 # open up input file
|
|
42 open IFILE, "<$ARGV[0]" or die "cannot open input file $ARGV[0]\n";
|
|
43 if ($#ARGV == 1) {
|
|
44 open OFILE, ">$ARGV[1]" or die "cannot open outputfile $ARGV[1]\n";
|
|
45 } else {
|
|
46 open OFILE, ">-" or die "cannot open stdout\n";
|
|
47 }
|
|
48
|
|
49 # open up output files - don't use this, write to STDOUT instead
|
|
50 # open OFILE, ">$ARGV[0].txt" or die "cannot open $ARGV[0].out\n";
|
|
51
|
|
52 # run through the input file, which has these columns:
|
|
53 # tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status
|
|
54 $header = <IFILE>;
|
|
55 @line = split /\t/, $header;
|
|
56 # if an output file is specified, write to stdout for Galaxy compatibility
|
|
57 if ($#ARGV == 1) {
|
|
58 print STDOUT "Extracting column " . ($column+1) . ": $line[$column]\n";
|
|
59 } else {
|
|
60 print STDERR "Extracting column " . ($column+1) . ": $line[$column]\n";
|
|
61 }
|
|
62 while ($line = <IFILE>) {
|
|
63 chomp $line;
|
|
64 # $linecount++;
|
|
65 @line = split /\t/, $line;
|
|
66 $data{$line[0]}{$line[1]}{$line[2]} = $line[$column];
|
|
67 }
|
|
68
|
|
69 $header = 1;
|
|
70 # print out the data
|
|
71 for $feature (sort keys %data) {
|
|
72 if ($header) {
|
|
73 print OFILE "tracking_id";
|
|
74 } else {
|
|
75 print OFILE "$feature";
|
|
76 }
|
|
77 for $condition (sort keys %{ $data{$feature} }) {
|
|
78 for $replicate (sort keys %{ $data{$feature}{$condition} }) {
|
|
79 if ($header) {
|
|
80 print OFILE "\t$condition-$replicate";
|
|
81 } else {
|
|
82 print OFILE "\t$data{$feature}{$condition}{$replicate}";
|
|
83 }
|
|
84 }
|
|
85 }
|
|
86 print OFILE "\n";
|
|
87 $header = 0;
|
|
88 }
|