view rsem/rsem-wrapper-1.1.17.pl @ 0:4edac0183857

Initial commit from tarball version 1.17
author victor
date Mon, 05 Mar 2012 11:12:34 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl 


use Data::Dumper;
use Getopt::Long;
use Pod::Usage;



#pod2usage(-verbose => 1) if ($help == 1);
#if (@ARGV == 0) {
#	pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2);
#}

my $rsem_version = "/opt/rsem-1.1.17";
my $minL = 1;
my $maxL = 1000;
my $NMB = 1024;

# Extra file output #beta
# --isoformfile $isoforms 
# --thetafile $theta 
# --cntfile $cnt 
# --modelfile $model 
# --bamfile $bam_res

GetOptions(
    "log=s"                  => \$log,
    "bam_genome=s"           => \$bam_genome,
    "bamtype=s"              => \$bamtype,
    "isoformfile=s"          => \$isoforms,
    "reference=s"            => \$dbref,
    "sampling-for-bam=s"     => \$samplingbam,
    "thetafile=s"            => \$theta,
    "cntfile=s"              => \$cnt,
    "modelfile=s"            => \$model,
    "bamfile=s"              => \$bamfile,
    "output=s"               => \$output,
    "single_fasta=s"         => \$single_fasta,
    "fasta1=s"               => \$fasta1,
    "fasta2=s"               => \$fasta2,
    "single_fastq=s"         => \$single_fastq,
    "fastq1=s"               => \$fastq1,
    "fastq2=s"               => \$fastq2,
    "no-qualities"           => \$no_qual,
    "paired-end"             => \$paired_end,
    "sam"                    => \$is_sam,
    "bam"                    => \$is_bam,
    "sam-header-info=s"      => \$fn_list,
    "tag=s"                  => \$tagName,
    "seed-length=i"          => \$L,
    "bowtie-path=s"          => \$bowtie_path,
    "bowtie-n=i"             => \$C,
    "bowtie-e=i"             => \$E,
    "bowtie-m=i"             => \$maxHits,
    "phred33-quals"          => \$phred33,
    "phred64-quals"          => \$phred64,
    "solexa-quals"           => \$solexa,
    "forward-prob=f"         => \$probF,
    "fragment-length-min=i"  => \$minL,
    "fragment-length-max=i"  => \$maxL,
    "fragment-length-mean=f" => \$mean,
    "fragment-length-sd=f"   => \$sd,
    "estimate-rspd=s"        => \$estRSPD,
    "num-rspd-bins=i"        => \$B,
    "p|num-threads=i"        => \$nThreads,
    "output-genome-bam"      => \$genBamF,
    "calc-ci=s"              => \$calcCI,
    "ci-memory=i"            => \$NMB,
    "time"                   => \$mTime,
    "q|quiet"                => \$quiet,
) or pod2usage( -exitval => 2, -verbose => 2 );

#check parameters and options

if ($is_sam || $is_bam) {
    pod2usage(-msg => "from rsem-wrapper->Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 4);
    pod2usage(-msg => "--sam and --bam cannot be active at the same time!", -exitval => 2, -verbose => 2) if ($is_sam == 1&& $is_bam == 1);
    pod2usage(-msg => "--bowtie-path, --bowtie-n, --bowtie-e, --bowtie-m, --phred33-quals, --phred64-quals or --solexa-quals cannot be set if input is SAM/BAM format!", -exitval => 2, -verbose => 2) if ($bowtie_path ne "" || $C != 2 || $E != 99999999 || $maxHits != 200 || $phred33 || $phred64 || $solexa);
}
#else {
#    pod2usage(-msg => "from rsem-wraper->Invalid number of arguments!", -exitval => 2, -verbose => 2) 
#	if (!$paired_end && scalar(@ARGV) != 1 || $paired_end && scalar(@ARGV) != 1);    
#    pod2usage(-msg => "Only one of --phred33-quals --phred64-quals/--solexa1.3-quals --solexa-suqls can be active!", -exitval => 2, -verbose => 2) if ($phred33 + $phred64 + $solexa > 1);    
#    podwusage(-msg => "--sam , --bam or --sam-header-info cannot be set if use bowtie aligner to produce alignments!", -exitval => 2, -verbose => 2) if ($is_sam || $is_bam || $fn_list ne "");
#}

pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
pod2usage(-msg => "Min fragment length should be at least 1!", -exitval => 2, -verbose => 2) if ($minL < 1);
pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);

# IO Redirection to log file
use IO::Handle;
open OUTPUT, '>', $log or die "cant open file $log $!\n";;
open ERROR,  '>>', $log  or die "cant open file $log $!\n";
STDOUT->fdopen( \*OUTPUT, 'w' ) or die "cant open file $!\n";
STDERR->fdopen( \*ERROR,  'w' ) or die "cant open file $!\n";
# 

my @options;

# generates new output called sample_name.genome.bam 
# with alignments
# mapped to genomic coordinates and annotated with their posterior
# probabilities. In addition, RSEM will call samtools (included in
# RSEM package) to sort and index the bam file.
# 'sample_name.genome.sorted.bam' and
# 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off)

if ($bamtype eq "yes") {
    my $bam_genome_par = "--output-genome-bam";
    push @options, $bam_genome_par;
}
if ($samplingbam eq "yes") {
    my $samplingbam = "--sampling-for-bam";
    push @options, $samplingbam;
}
if ($estRSPD eq "yes") {
    my $rspd = "--estimate-rspd";
    push @options, $rspd;
}
$probF = "--forward-prob $probF";
push @options, $probF;

if ($calcCI eq "yes") {
    my $calcCI = "--calc-ci";
    push @options, $calcCI;
    my $cimem = "--ci-memory $NMB";
    push @options, $cimem;
}
if ($tagName) {
    my $tagName = "--tag $tagName";
    push @options, $tagName;
}
if ($L) {
	my $L = "--seed-length $L";
	push @options, $L;
}
if ($C) {
    my $C = "--bowtie-n $C";
    push @options, $C;
}
if ($E) {
    my $E = "--bowtie-e $E";
    push @options, $E;
}
if ($maxHits) {
    my $maxHits = "--bowtie-m $maxHits";
    push @options, $maxHits;
}
if ($minL != 1) {
    my $minL = "--fragment-length-min $minL";
    push @options, $minL;
}
if ($maxL != 1000) {
    my $maxL = "--fragment-length-max $maxL";
    push @options, $maxL;
}
if ($mean) {
    my $mean = "--fragment-length-mean $mean";
    push @options, $mean;
}
if ($sd) {
    my $sd = "--fragment-length-sd $sd";
    push @options, $sd;
}
my $options=  join(" ", @options);

#BUILD COMMAND BASED ON PARSED OPTIONS
if ($no_qual) { 
        #reads are in fasta file format
	if ($paired_end) { # reads are in paired end
	    my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities --paired-end -p $nThreads $options $fasta1 $fasta2 $dbref $output";
 	    print "RSEM Parameters used by Galaxy:\n$cmd\n";
	    system($cmd);
	}
	#run single end with one fasta file
	else {
	my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities -p $nThreads $options $single_fasta $dbref $output";
 	print "RSEM Parameters used by Galaxy:\n$cmd\n";
	system($cmd);
	}
}
else {
    # reads are in fastq file format
    # type of fastq file?
	my $fastqtype;
	if ($phred33) {
		$fastqtype = "--phred33-quals";
	}
	elsif ($phred64) {
		$fastqtype = "--phred64-quals";
	}
	elsif ($solexa) {
		$fastqtype = "--solexa-quals";
	}
	if ($paired_end) { 
	#reads in paired end 
        #run paired end with two fasq files
	    my $cmd = "$rsem_version/rsem-calculate-expression --quiet --paired-end -p $nThreads $options $fastqtype $fastq1 $fastq2 $dbref $output";
 	    print "RSEM Parameters used by Galaxy:\n$cmd\n";
	    system($cmd);
	}
	else { 
	my $cmd = "$rsem_version/rsem-calculate-expression --quiet -p $nThreads $options $fastqtype $single_fastq $dbref $output";
 	print "RSEM Parameters used by Galaxy:\n$cmd\n";
	system($cmd);
	}
}

 #Rename files for galaxy
my $mv_genes = "mv $output.genes.results $output";
my $mv_isoforms = "mv $output.isoforms.results $isoforms";

#print "bamtype-parameter=$bamtype\n";
my $mv_bam_transcript;
my $mv_bam_genome;
if ($bamtype eq "yes") {
  $mv_bam_genome = "mv $output.genome.sorted.bam $bam_genome";
  system($mv_bam_genome);
}

$mv_bam_transcript = "mv $output.transcript.sorted.bam $bamfile";

my @rsem_dir = split(/\//, $output);
my $short_output = $rsem_dir[-1];
my $mv_theta = "mv $output.stat/$short_output.theta $theta";
my $mv_cnt = "mv $output.stat/$short_output.cnt $cnt";
my $mv_model = "mv $output.stat/$short_output.model $model";
system($mv_genes);
system($mv_isoforms);
system($mv_bam_transcript);
#system($mv_theta);
#system($mv_cnt);
#system($mv_model);
#print "LOG $mv\n";