diff rsem/rsem-wrapper-1.1.17.pl @ 0:4edac0183857

Initial commit from tarball version 1.17
author victor
date Mon, 05 Mar 2012 11:12:34 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem/rsem-wrapper-1.1.17.pl	Mon Mar 05 11:12:34 2012 -0500
@@ -0,0 +1,238 @@
+#!/usr/bin/perl 
+
+
+use Data::Dumper;
+use Getopt::Long;
+use Pod::Usage;
+
+
+
+#pod2usage(-verbose => 1) if ($help == 1);
+#if (@ARGV == 0) {
+#	pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2);
+#}
+
+my $rsem_version = "/opt/rsem-1.1.17";
+my $minL = 1;
+my $maxL = 1000;
+my $NMB = 1024;
+
+# Extra file output #beta
+# --isoformfile $isoforms 
+# --thetafile $theta 
+# --cntfile $cnt 
+# --modelfile $model 
+# --bamfile $bam_res
+
+GetOptions(
+    "log=s"                  => \$log,
+    "bam_genome=s"           => \$bam_genome,
+    "bamtype=s"              => \$bamtype,
+    "isoformfile=s"          => \$isoforms,
+    "reference=s"            => \$dbref,
+    "sampling-for-bam=s"     => \$samplingbam,
+    "thetafile=s"            => \$theta,
+    "cntfile=s"              => \$cnt,
+    "modelfile=s"            => \$model,
+    "bamfile=s"              => \$bamfile,
+    "output=s"               => \$output,
+    "single_fasta=s"         => \$single_fasta,
+    "fasta1=s"               => \$fasta1,
+    "fasta2=s"               => \$fasta2,
+    "single_fastq=s"         => \$single_fastq,
+    "fastq1=s"               => \$fastq1,
+    "fastq2=s"               => \$fastq2,
+    "no-qualities"           => \$no_qual,
+    "paired-end"             => \$paired_end,
+    "sam"                    => \$is_sam,
+    "bam"                    => \$is_bam,
+    "sam-header-info=s"      => \$fn_list,
+    "tag=s"                  => \$tagName,
+    "seed-length=i"          => \$L,
+    "bowtie-path=s"          => \$bowtie_path,
+    "bowtie-n=i"             => \$C,
+    "bowtie-e=i"             => \$E,
+    "bowtie-m=i"             => \$maxHits,
+    "phred33-quals"          => \$phred33,
+    "phred64-quals"          => \$phred64,
+    "solexa-quals"           => \$solexa,
+    "forward-prob=f"         => \$probF,
+    "fragment-length-min=i"  => \$minL,
+    "fragment-length-max=i"  => \$maxL,
+    "fragment-length-mean=f" => \$mean,
+    "fragment-length-sd=f"   => \$sd,
+    "estimate-rspd=s"        => \$estRSPD,
+    "num-rspd-bins=i"        => \$B,
+    "p|num-threads=i"        => \$nThreads,
+    "output-genome-bam"      => \$genBamF,
+    "calc-ci=s"              => \$calcCI,
+    "ci-memory=i"            => \$NMB,
+    "time"                   => \$mTime,
+    "q|quiet"                => \$quiet,
+) or pod2usage( -exitval => 2, -verbose => 2 );
+
+#check parameters and options
+
+if ($is_sam || $is_bam) {
+    pod2usage(-msg => "from rsem-wrapper->Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 4);
+    pod2usage(-msg => "--sam and --bam cannot be active at the same time!", -exitval => 2, -verbose => 2) if ($is_sam == 1&& $is_bam == 1);
+    pod2usage(-msg => "--bowtie-path, --bowtie-n, --bowtie-e, --bowtie-m, --phred33-quals, --phred64-quals or --solexa-quals cannot be set if input is SAM/BAM format!", -exitval => 2, -verbose => 2) if ($bowtie_path ne "" || $C != 2 || $E != 99999999 || $maxHits != 200 || $phred33 || $phred64 || $solexa);
+}
+#else {
+#    pod2usage(-msg => "from rsem-wraper->Invalid number of arguments!", -exitval => 2, -verbose => 2) 
+#	if (!$paired_end && scalar(@ARGV) != 1 || $paired_end && scalar(@ARGV) != 1);    
+#    pod2usage(-msg => "Only one of --phred33-quals --phred64-quals/--solexa1.3-quals --solexa-suqls can be active!", -exitval => 2, -verbose => 2) if ($phred33 + $phred64 + $solexa > 1);    
+#    podwusage(-msg => "--sam , --bam or --sam-header-info cannot be set if use bowtie aligner to produce alignments!", -exitval => 2, -verbose => 2) if ($is_sam || $is_bam || $fn_list ne "");
+#}
+
+pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
+pod2usage(-msg => "Min fragment length should be at least 1!", -exitval => 2, -verbose => 2) if ($minL < 1);
+pod2usage(-msg => "Min fragment length should be smaller or equal to max fragment length!", -exitval => 2, -verbose => 2) if ($minL > $maxL);
+pod2usage(-msg => "The memory allocated for calculating credibility intervals should be at least 1 MB!\n", -exitval => 2, -verbose => 2) if ($NMB < 1);
+pod2usage(-msg => "Number of threads should be at least 1!\n", -exitval => 2, -verbose => 2) if ($nThreads < 1);
+
+# IO Redirection to log file
+use IO::Handle;
+open OUTPUT, '>', $log or die "cant open file $log $!\n";;
+open ERROR,  '>>', $log  or die "cant open file $log $!\n";
+STDOUT->fdopen( \*OUTPUT, 'w' ) or die "cant open file $!\n";
+STDERR->fdopen( \*ERROR,  'w' ) or die "cant open file $!\n";
+# 
+
+my @options;
+
+# generates new output called sample_name.genome.bam 
+# with alignments
+# mapped to genomic coordinates and annotated with their posterior
+# probabilities. In addition, RSEM will call samtools (included in
+# RSEM package) to sort and index the bam file.
+# 'sample_name.genome.sorted.bam' and
+# 'sample_name.genome.sorted.bam.bai' will be generated. (Default: off)
+
+if ($bamtype eq "yes") {
+    my $bam_genome_par = "--output-genome-bam";
+    push @options, $bam_genome_par;
+}
+if ($samplingbam eq "yes") {
+    my $samplingbam = "--sampling-for-bam";
+    push @options, $samplingbam;
+}
+if ($estRSPD eq "yes") {
+    my $rspd = "--estimate-rspd";
+    push @options, $rspd;
+}
+$probF = "--forward-prob $probF";
+push @options, $probF;
+
+if ($calcCI eq "yes") {
+    my $calcCI = "--calc-ci";
+    push @options, $calcCI;
+    my $cimem = "--ci-memory $NMB";
+    push @options, $cimem;
+}
+if ($tagName) {
+    my $tagName = "--tag $tagName";
+    push @options, $tagName;
+}
+if ($L) {
+	my $L = "--seed-length $L";
+	push @options, $L;
+}
+if ($C) {
+    my $C = "--bowtie-n $C";
+    push @options, $C;
+}
+if ($E) {
+    my $E = "--bowtie-e $E";
+    push @options, $E;
+}
+if ($maxHits) {
+    my $maxHits = "--bowtie-m $maxHits";
+    push @options, $maxHits;
+}
+if ($minL != 1) {
+    my $minL = "--fragment-length-min $minL";
+    push @options, $minL;
+}
+if ($maxL != 1000) {
+    my $maxL = "--fragment-length-max $maxL";
+    push @options, $maxL;
+}
+if ($mean) {
+    my $mean = "--fragment-length-mean $mean";
+    push @options, $mean;
+}
+if ($sd) {
+    my $sd = "--fragment-length-sd $sd";
+    push @options, $sd;
+}
+my $options=  join(" ", @options);
+
+#BUILD COMMAND BASED ON PARSED OPTIONS
+if ($no_qual) { 
+        #reads are in fasta file format
+	if ($paired_end) { # reads are in paired end
+	    my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities --paired-end -p $nThreads $options $fasta1 $fasta2 $dbref $output";
+ 	    print "RSEM Parameters used by Galaxy:\n$cmd\n";
+	    system($cmd);
+	}
+	#run single end with one fasta file
+	else {
+	my $cmd = "$rsem_version/rsem-calculate-expression --quiet --no-qualities -p $nThreads $options $single_fasta $dbref $output";
+ 	print "RSEM Parameters used by Galaxy:\n$cmd\n";
+	system($cmd);
+	}
+}
+else {
+    # reads are in fastq file format
+    # type of fastq file?
+	my $fastqtype;
+	if ($phred33) {
+		$fastqtype = "--phred33-quals";
+	}
+	elsif ($phred64) {
+		$fastqtype = "--phred64-quals";
+	}
+	elsif ($solexa) {
+		$fastqtype = "--solexa-quals";
+	}
+	if ($paired_end) { 
+	#reads in paired end 
+        #run paired end with two fasq files
+	    my $cmd = "$rsem_version/rsem-calculate-expression --quiet --paired-end -p $nThreads $options $fastqtype $fastq1 $fastq2 $dbref $output";
+ 	    print "RSEM Parameters used by Galaxy:\n$cmd\n";
+	    system($cmd);
+	}
+	else { 
+	my $cmd = "$rsem_version/rsem-calculate-expression --quiet -p $nThreads $options $fastqtype $single_fastq $dbref $output";
+ 	print "RSEM Parameters used by Galaxy:\n$cmd\n";
+	system($cmd);
+	}
+}
+
+ #Rename files for galaxy
+my $mv_genes = "mv $output.genes.results $output";
+my $mv_isoforms = "mv $output.isoforms.results $isoforms";
+
+#print "bamtype-parameter=$bamtype\n";
+my $mv_bam_transcript;
+my $mv_bam_genome;
+if ($bamtype eq "yes") {
+  $mv_bam_genome = "mv $output.genome.sorted.bam $bam_genome";
+  system($mv_bam_genome);
+}
+
+$mv_bam_transcript = "mv $output.transcript.sorted.bam $bamfile";
+
+my @rsem_dir = split(/\//, $output);
+my $short_output = $rsem_dir[-1];
+my $mv_theta = "mv $output.stat/$short_output.theta $theta";
+my $mv_cnt = "mv $output.stat/$short_output.cnt $cnt";
+my $mv_model = "mv $output.stat/$short_output.model $model";
+system($mv_genes);
+system($mv_isoforms);
+system($mv_bam_transcript);
+#system($mv_theta);
+#system($mv_cnt);
+#system($mv_model);
+#print "LOG $mv\n";