Mercurial > repos > artbio > rsem

diff rsem-bwt2.xml @ 0:e5e836936d60 draft
planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/rsem commit d84a0359354698a4b29df12ab581c2618bffcf80
author: artbio
date: Sat, 31 Mar 2018 21:30:07 -0400
children: 49795544dac7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rsem-bwt2.xml	Sat Mar 31 21:30:07 2018 -0400
@@ -0,0 +1,1427 @@
+<tool id="rsembowtie2" name="RSEM-Bowtie2" version="0.4.0">
+  <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+  <requirements>
+    <requirement type="package" version="1.3.0">rsem</requirement>
+    <requirement type="package" version="2.3.4">bowtie2</requirement>
+  </requirements>
+    <stdio>
+        <exit_code range="1:" level="warning" description="Tool exception" />
+    </stdio>
+  <command detect_errors="exit_code"><![CDATA[
+    #if $job.select_job == "index":
+      echo ${job.reference_name} " " | tee $reference_file &&
+      mkdir $reference_file.files_path &&
+      rsem-prepare-reference 
+      #if $job.polya.polya_use == 'add':
+        #if $job.polya.polya_length:
+          --polyA-length $job.polya.polya_length
+        #end if
+      #elif $job.polya.polya_use == 'subset':
+        --no-polyA-subset $job.polya.no_polya_subset
+        #if $job.polya.polya_length:
+          --polyA-length $job.polya.polya_length
+        #end if
+      #elif $job.polya.polya_use == 'none':
+        --no-polyA
+      #end if
+      $job.ntog
+      #if $job.transcript_to_gene_map:
+        --transcript-to-gene-map $job.transcript_to_gene_map
+      #end if
+        --bowtie2
+      #if $job.self_reference.ref_type == 'transcripts':
+        $job.self_reference.reference_fasta_file
+      #else:
+        --gtf $job.self_reference.gtf
+        $job.self_reference.reference_fasta_file
+      #end if
+      ${reference_file.files_path}/${job.reference_name}
+      > ${reference_file.files_path}/${job.reference_name}.log
+    #end if
+    
+    #if $job.select_job == "index" and $run_rsem.select == "Yes":
+      &&
+    #end if
+    
+    #if $run_rsem.select == "Yes":
+      ## uncompress fastq.gz or fastqsanger.gz if needed
+      #if $run_rsem.input.fastq.matepair=="single":
+        #if $run_rsem.input.fastq.singlefastq.is_of_type('fastq.gz') or $run_rsem.input.fastq.singlefastq.is_of_type('fastqsanger.gz'):
+          gunzip < '$run_rsem.input.fastq.singlefastq' > uncomp_single.fastq &&
+        #elif $run_rsem.input.fastq.singlefastq.is_of_type('fastq') or $run_rsem.input.fastq.singlefastq.is_of_type('fastqsanger'):
+          ln -f -s '$run_rsem.input.fastq.singlefastq' 'uncomp_single.fastq' &&
+        #end if
+      #elif $run_rsem.input.fastq.matepair=="paired":
+        #if $run_rsem.input.fastq.fastq1.is_of_type('fastq.gz') or $run_rsem.input.fastq.fastq1.is_of_type('fastqsanger.gz'):
+          gunzip < '$run_rsem.input.fastq.fastq1' > uncomp_pair1.fastq &&
+          gunzip < '$run_rsem.input.fastq.fastq2' > uncomp_pair2.fastq &&
+        #elif $run_rsem.input.fastq.singlefastq.is_of_type('fastq') or $run_rsem.input.fastq.singlefastq.is_of_type('fastqsanger'):
+          ln -f -s '$run_rsem.input.fastq.fastq1' 'uncomp_pair1.fastq' &&
+          ln -f -s '$run_rsem.input.fastq.fastq2' 'uncomp_pair2.fastq' &&
+        #end if
+      #end if
+      rsem-calculate-expression 
+      ## --tag string
+      #if $run_rsem.seedlength:
+        --seed-length $run_rsem.seedlength 
+      #end if
+      --forward-prob $run_rsem.forward_prob
+      #if $run_rsem.rsem_options.fullparams == 'fullset':
+        ## Fragment info
+        #if $run_rsem.rsem_options.fragment_length_mean:
+          --fragment-length-mean $run_rsem.rsem_options.fragment_length_mean
+        #end if
+        #if $run_rsem.rsem_options.fragment_length_min:
+          --fragment-length-min $run_rsem.rsem_options.fragment_length_min
+        #end if
+        #if $run_rsem.rsem_options.fragment_length_sd:
+          --fragment-length-sd $run_rsem.rsem_options.fragment_length_sd
+        #end if
+        #if $run_rsem.rsem_options.fragment_length_max:
+          --fragment-length-max $run_rsem.rsem_options.fragment_length_max
+        #end if
+        ## RSPD
+        #if $run_rsem.rsem_options.rspd.estimate == 'yes':
+          --estimate-rspd
+          #if $run_rsem.rsem_options.rspd.num_rspd_bins:
+            --num-rspd-bins $run_rsem.rsem_options.rspd.num_rspd_bins
+          #end if
+        #end if
+        ## Calculate 95% credibility intervals and posterior mean estimates. 
+        #if $run_rsem.rsem_options.useci.ci == 'yes':
+          --calc-ci
+          #if $run_rsem.rsem_options.useci.cimem:
+            --ci-memory $run_rsem.rsem_options.useci.cimem
+          #end if
+        #end if
+      #end if
+      --num-threads \${GALAXY_SLOTS:-4}
+      --bowtie2
+      #if $run_rsem.input.format == 'fasta' and $run_rsem.input.bowtie2_options.fullparams == 'fullset':
+        ## Bowtie params
+        #if $run_rsem.input.bowtie2_options.bowtie2_mismatch_rate:
+        --bowtie2-mismatch-rate $run_rsem.input.bowtie2_options.bowtie2_mismatch_rate
+        #end if
+        #if $run_rsem.input.bowtie2_options.bowtie2_k:
+        --bowtie2-k $run_rsem.input.bowtie2_options.bowtie2_k
+        #end if
+        #if $run_rsem.input.bowtie2_options.bowtie2_sensitivity_level:
+        --bowtie2-sensitivity-level $run_rsem.input.bowtie2_options.bowtie2_sensitivity_level
+        #end if
+      #end if
+      ## Outputs
+      #if $run_rsem.rsem_outputs.result_bams == 'none':
+        --no-bam-output
+      #elif $run_rsem.rsem_outputs.result_bams == 'default':
+        --sort-bam-by-coordinate
+      #else
+        --sort-bam-by-coordinate
+        --output-genome-bam
+        $run_rsem.rsem_outputs.sampling_for_bam
+      #end if
+      ## Input data
+      #if $run_rsem.input.format=="fastq"
+        $run_rsem.input.fastq_select
+        #if $run_rsem.input.fastq.matepair=="single": 
+          uncomp_single.fastq
+        #elif $run_rsem.input.fastq.matepair=="paired":
+          --paired-end
+          uncomp_pair1.fastq
+          uncomp_pair2.fastq
+        #end if
+      #elif $run_rsem.input.format=="fasta"
+        --no-qualities
+        #if $run_rsem.input.fasta.matepair=="single": 
+          $run_rsem.input.fasta.singlefasta
+        #elif $run_rsem.input.fasta.matepair=="paired":
+          --paired-end
+          $run_rsem.input.fasta.fasta1
+          $run_rsem.input.fasta.fasta2
+        #end if
+      #elif $run_rsem.input.format=="sam"
+        #if $run_rsem.input.matepair=="paired":
+          --paired-end
+        #end if
+        #if $run_rsem.input.rsem_sam._extension == 'sam':
+          --sam
+        #elif $run_rsem.input.rsem_sam._extension == 'bam':
+          --bam
+        #end if
+        $run_rsem.input.rsem_sam
+      #end if
+      ## RSEM reference
+      #if $run_rsem.reference.refSrc == 'history':
+        ${run_rsem.reference.rsem_ref.extra_files_path}/${run_rsem.reference.rsem_ref.metadata.reference_name}
+      #elif $run_rsem.reference.refSrc == 'self':
+        ${reference_file.files_path}/${job.reference_name}
+      #end if
+      ## sample_name: use a hard coded name so we can pull out galaxy outputs
+      rsem_output
+      ## direct output into logfile
+      > $log
+    #end if
+  ]]></command>
+
+  <inputs>
+    <conditional name="job">
+      <param name="select_job" type="select" label="rsem reference">
+        <option value="index">Build rsem reference</option>
+        <option value="no-index" selected="true">rsem reference available from history</option>
+      </param>
+      <when value="index">
+        <conditional name="self_reference">
+          <param name="ref_type" type="select" label="Reference transcript source">
+            <option value="transcripts">transcript fasta</option>
+            <option value="genomic">reference genome and gtf</option>
+          </param>
+          <when value="transcripts">
+            <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file" 
+                   help="The files should contain the sequences of transcripts."/>
+          </when>
+          <when value="genomic">
+            <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file" 
+                   help="The file should contain the sequence of an entire genome."/>
+            <param name="gtf" type="data" format="gtf" label="gtf" 
+                   help="extract transcript reference sequences using the gene annotations specified in this GTF" />
+          </when>
+        </conditional>
+        <param name="transcript_to_gene_map" type="data" format="tabular" optional="true" label="Map of gene ids to transcript (isoform) ids" >
+          <help>
+            Each line of should be of the form: gene_id transcript_id ( with the two fields separated by a tab character )
+            The map can be obtained from the UCSC table browser  
+              group: Genes and Gene Prediction Tracks 
+              table: knownIsoforms
+            Without a map:
+              If a reference genome and gtf is used, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file. 
+              Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
+          </help>
+        </param>
+        <param name="reference_name" type="text" value="rsem_ref_name" label="reference name">
+          <help>A one word name for this RSEM reference containing only letters, digits, and underscore characters</help>
+          <validator type="regex" message="Use only letters, digits, and underscore characters">^\w+$</validator>
+        </param>
+        <conditional name="polya">
+          <param name="polya_use" type="select" label="PolyA ">
+            <option value="add" selected="true">Add poly(A) tails to all transcripts</option>
+            <option value="subset">Exclude poly(A) tails from selected transcripts</option>
+            <option value="none">Do not add poly(A) tails to any transcripts</option>
+          </param>
+          <when value="add">
+            <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
+              <validator type="in_range" message="must be positive " min="1"/>
+            </param>
+          </when>
+          <when value="subset">
+            <param name="no_polya_subset" type="data" format="tabular" optional="true" label="List of transcript IDs (one per line) that should should not have polyA tails added."/> 
+            <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
+              <validator type="in_range" message="must be positive " min="1"/>
+            </param>
+          </when>
+          <when value="none"/>
+        </conditional>
+        <param name="ntog" type="boolean" truevalue="--no-ntog" falsevalue="" checked="false" label="Disable the conversion of 'N' characters to 'G' characters in the reference sequences" help="Bowtie uses the automatic N to G conversion to align against all positions in the reference."/>
+      </when>
+      <when value="no-index">
+      </when>
+    </conditional>
+
+    <conditional name="run_rsem">
+      <param name="select" type="select" label="calculate expression with rsem">
+        <option value="No">Just build rsem reference for latter rsem profiling</option>
+        <option value="Yes" selected="true">profile expression with rsem</option>
+      </param>
+      <when value="Yes">
+        <param name="sample" type="text" value="rsem_sample" label="Sample name" />
+        <conditional name="reference">
+          <param name="refSrc" type="select" label="RSEM Reference Source">
+            <option value="history">From your history</option>
+            <option value="self">Prepare RSEM Reference with this tool</option>
+          </param>
+          <when value="history">
+            <param name="rsem_ref" type="data" format="rsem_ref" label="RSEM reference" />
+          </when>
+          <when value="self">
+          </when>
+        </conditional>
+        <conditional name="input">
+          <param name="format" type="select" label="RSEM Input file type">
+            <option value="fastq">FASTQ</option>
+            <option value="fasta">FASTA</option>
+            <option value="sam">SAM/BAM</option>
+          </param>
+          <when value="fastq">
+            <param name="fastq_select" size="15" type="select" label="FASTQ type" >
+              <option value="--phred33-quals" selected="true">phred33 qualities (default for sanger)</option>
+              <option value="--solexa-quals">solexa qualities</option>
+              <option value="--phred64-quals">phred64 qualities</option>
+            </param>
+            <conditional name="fastq">
+              <param name="matepair" type="select" label="Library type">
+                <option value="single">Single End Reads</option>
+                <option value="paired">Paired End Reads</option>
+              </param>
+              <when value="single">
+                <param name="singlefastq" type="data" format="fastq,fastq.gz" label="FASTQ file" />
+              </when>
+              <when value="paired">
+                <param name="fastq1" type="data" format="fastq,fastq.gz" label="Read 1 fastq file" />
+                <param name="fastq2" type="data" format="fastq,fastq.gz" label="Read 2 fastq file" />
+              </when>
+            </conditional>
+            <expand macro="bowtie2_options"/>
+          </when>
+          <when value="fasta">
+            <conditional name="fasta">
+              <param name="matepair" type="select" label="Library Type">
+                <option value="single">Single End Reads</option>
+                <option value="paired">Paired End Reads</option>
+              </param>
+              <when value="single">
+                <param name="singlefasta" type="data" format="fasta" label="fasta file" />
+              </when>
+              <when value="paired">
+                <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" />
+                <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" />
+              </when>
+            </conditional>
+            <expand macro="bowtie2_options"/>
+          </when>
+          <when value="sam">
+              <!-- convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam -->
+              <param name="matepair" type="select" label="Library Type">
+                <option value="single">Single End Reads</option>
+                <option value="paired">Paired End Reads</option>
+              </param>
+              <param name="rsem_sam" type="data" format="rsem_sam" label="RSEM formatted SAM file" />
+          </when>
+        </conditional>
+        <expand macro="rsem_options"/>
+        <conditional name="rsem_outputs">
+          <param name="result_bams" type="select" label="Create bam results files" 
+                 help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" >
+            <option value="none">No BAM results files</option>
+            <option value="default" selected="true">Transcript BAM results file</option>
+            <option value="both">Transcript and genome BAM results files</option>
+          </param>
+          <when value="none"/>
+          <when value="default">
+            <expand macro="sampling_for_bam"/>
+          </when>
+          <when value="both">
+            <expand macro="sampling_for_bam"/>
+          </when>
+        </conditional>
+      </when>
+      <when value="No">
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="rsem_ref" name="reference_file" label="RSEM ${job.reference_name} reference">
+      <filter>job['select_job'] == "index"</filter>
+    </data>
+    <data format="tabular" name="gene_abundances" label="${run_rsem.sample}.gene_abundances" from_work_dir="rsem_output.genes.results">
+      <filter>run_rsem['select'] == "Yes"</filter>
+    </data>
+    <data format="tabular" name="isoform_abundances" label="${run_rsem.sample}.isoform_abundances" from_work_dir="rsem_output.isoforms.results">
+      <filter>run_rsem['select'] == "Yes"</filter>
+    </data>
+    <data format="bam" name="transcript_sorted_bam" label="${run_rsem.sample}.transcript.bam" from_work_dir="rsem_output.transcript.sorted.bam" >
+      <filter>run_rsem['select'] == "Yes" and run_rsem['rsem_outputs']['result_bams'] != "none"</filter>
+    </data>
+    <data format="bam" name="genome_sorted_bam" label="${run_rsem.sample}.genome.bam" from_work_dir="rsem_output.genome.sorted.bam">
+      <filter>run_rsem['select'] == "Yes" and run_rsem['rsem_outputs']['result_bams'] == "both"</filter>
+    </data>
+    <data format="txt" name="log" label="${run_rsem.sample}.rsem_log">
+      <filter>run_rsem['select'] == "Yes"</filter>
+    </data>
+  </outputs>
+
+  <tests>
+   <test>
+      <param name="select_job" value="index"/>
+      <param name="ref_type" value="genomic"/>
+      <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
+      <param name="gtf" value="ref.gtf" ftype="gtf"/>
+      <param name="reference_name" value="ref"/>
+      <param name="select" value="Yes"/>
+      <param name="sample" value="rsem_sample"/>
+      <param name="refSrc" value="self"/>
+      <param name="format" value="fastq"/>
+      <param name="matepair" value="single"/>
+      <param name="singlefastq" value="test.fq" ftype="fastqsanger"/>
+      <param name="result_bams" value="none"/>
+      <output name="reference_file">
+        <assert_contents>
+          <has_text text="ref" />
+        </assert_contents>
+      </output>
+      <output name="gene_abundances" value="gene_abundances.tab2"/> 
+      <output name="isoform_abundances" value="isoform_abundances.tab2" /> 
+      <output name="log"> 
+        <assert_contents>
+          <has_text text="Expression Results are written" />
+        </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="select_job" value="index"/>
+      <param name="ref_type" value="genomic"/>
+      <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
+      <param name="gtf" value="ref.gtf" ftype="gtf"/>
+      <param name="reference_name" value="ref"/>
+      <param name="select" value="Yes"/>
+      <param name="sample" value="rsem_sample"/>
+      <param name="refSrc" value="self"/>
+      <param name="format" value="fastq"/>
+      <param name="matepair" value="single"/>
+      <param name="singlefastq" value="test.fastq.gz" ftype="fastqsanger.gz"/>
+      <param name="result_bams" value="none"/>
+      <output name="reference_file">
+        <assert_contents>
+          <has_text text="ref" />
+        </assert_contents>
+      </output>
+      <output name="gene_abundances" value="gene_abundances.tab2"/>
+      <output name="isoform_abundances" value="isoform_abundances.tab2" />
+      <output name="log">
+        <assert_contents>
+          <has_text text="Expression Results are written" />
+        </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="select_job" value="index"/>
+      <param name="ref_type" value="genomic"/>
+      <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
+      <param name="gtf" value="ref.gtf" ftype="gtf"/>
+      <param name="reference_name" value="ref"/>
+      <param name="select" value="No"/>
+      <output name="reference_file">
+        <assert_contents>
+          <has_text text="ref" />
+        </assert_contents>
+      </output>
+    </test>
+    <test>
+      <param name="select_job" value="index"/>
+      <param name="ref_type" value="genomic"/>
+      <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
+      <param name="gtf" value="ref.gtf" ftype="gtf"/>
+      <param name="reference_name" value="ref"/>
+      <param name="select" value="No"/>
+      <output name="reference_file">
+        <assert_contents>
+          <has_text text="ref" />
+        </assert_contents>
+      </output>
+    </test>
+  </tests>
+
+  <help>
+.. class:: infomark
+
+RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
+
+NAME
+    rsem-prepare-reference
+
+SYNOPSIS
+    rsem-prepare-reference [options] reference_fasta_file(s) reference_name
+
+DESCRIPTION
+    The rsem-prepare-reference program extracts/preprocesses the reference sequences and builds Bowtie indices using default parameters. 
+    This program is used in conjunction with the 'rsem-calculate-expression' program.
+
+INPUTS
+    A fasta file of transcripts
+    or
+    A genome sequence fasta file and a GTF gene annotation file.  (When using UCSC data, include the related knownIsoforms.txt)
+
+---
+
+NAME
+    rsem-calculate-expression - Estimate gene and isoform expression from
+    RNA-Seq data.
+
+SYNOPSIS
+     rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
+     rsem-calculate-expression [options] --paired-end upstream_read_file(s) downstream_read_file(s) reference_name sample_name
+     rsem-calculate-expression [options] --alignments [--paired-end] input reference_name sample_name
+
+ARGUMENTS
+    upstream_read_files(s)
+        Comma-separated list of files containing single-end reads or
+        upstream reads for paired-end data. By default, these files are
+        assumed to be in FASTQ format. If the --no-qualities option is
+        specified, then FASTA format is expected.
+
+    downstream_read_file(s)
+        Comma-separated list of files containing downstream reads which are
+        paired with the upstream reads. By default, these files are assumed
+        to be in FASTQ format. If the --no-qualities option is specified,
+        then FASTA format is expected.
+
+    input
+        SAM/BAM/CRAM formatted input file. If "-" is specified for the
+        filename, the input is instead assumed to come from standard input.
+        RSEM requires all alignments of the same read group together. For
+        paired-end reads, RSEM also requires the two mates of any alignment
+        be adjacent. In addition, RSEM does not allow the SEQ and QUAL
+        fields to be empty. See Description section for how to make input
+        file obey RSEM's requirements.
+
+    reference_name
+        The name of the reference used. The user must have run
+        'rsem-prepare-reference' with this reference_name before running
+        this program.
+
+    sample_name
+        The name of the sample analyzed. All output files are prefixed by
+        this name (e.g., sample_name.genes.results)
+
+BASIC OPTIONS
+    --paired-end
+        Input reads are paired-end reads. (Default: off)
+
+    --no-qualities
+        Input reads do not contain quality scores. (Default: off)
+
+    --strandedness &lt;none|forward|reverse&gt;
+        This option defines the strandedness of the RNA-Seq reads. It
+        recognizes three values: 'none', 'forward', and 'reverse'. 'none'
+        refers to non-strand-specific protocols. 'forward' means all
+        (upstream) reads are derived from the forward strand. 'reverse'
+        means all (upstream) reads are derived from the reverse strand. If
+        'forward'/'reverse' is set, the '--norc'/'--nofw' Bowtie/Bowtie 2
+        option will also be enabled to avoid aligning reads to the opposite
+        strand. For Illumina TruSeq Stranded protocols, please use
+        'reverse'. (Default: 'none')
+
+    -p/--num-threads &lt;int&gt;
+        Number of threads to use. Both Bowtie/Bowtie2, expression estimation
+        and 'samtools sort' will use this many threads. (Default: 1)
+
+    --alignments
+        Input file contains alignments in SAM/BAM/CRAM format. The exact
+        file format will be determined automatically. (Default: off)
+
+    --fai &lt;file&gt;
+        If the header section of input alignment file does not contain
+        reference sequence information, this option should be turned on.
+        &lt;file&gt; is a FAI format file containing each reference sequence's
+        name and length. Please refer to the SAM official website for the
+        details of FAI format. (Default: off)
+
+    --bowtie2
+        Use Bowtie 2 instead of Bowtie to align reads. Since currently RSEM
+        does not handle indel, local and discordant alignments, the Bowtie2
+        parameters are set in a way to avoid those alignments. In
+        particular, we use options '--sensitive --dpad 0 --gbar 99999999
+        --mp 1,1 --np 1 --score-min L,0,-0.1' by default. The last parameter
+        of '--score-min', '-0.1', is the negative of maximum mismatch rate.
+        This rate can be set by option '--bowtie2-mismatch-rate'. If reads
+        are paired-end, we additionally use options '--no-mixed' and
+        '--no-discordant'. (Default: off)
+
+    --star
+        Use STAR to align reads. Alignment parameters are from ENCODE3's
+        STAR-RSEM pipeline. To save computational time and memory resources,
+        STAR's Output BAM file is unsorted. It is stored in RSEM's temporary
+        directory with name as 'sample_name.bam'. Each STAR job will have
+        its own private copy of the genome in memory. (Default: off)
+
+    --append-names
+        If gene_name/transcript_name is available, append it to the end of
+        gene_id/transcript_id (separated by '_') in files
+        'sample_name.isoforms.results' and 'sample_name.genes.results'.
+        (Default: off)
+
+    --seed &lt;uint32&gt;
+        Set the seed for the random number generators used in calculating
+        posterior mean estimates and credibility intervals. The seed must be
+        a non-negative 32 bit integer. (Default: off)
+
+    --single-cell-prior
+        By default, RSEM uses Dirichlet(1) as the prior to calculate
+        posterior mean estimates and credibility intervals. However, much
+        less genes are expressed in single cell RNA-Seq data. Thus, if you
+        want to compute posterior mean estimates and/or credibility
+        intervals and you have single-cell RNA-Seq data, you are recommended
+        to turn on this option. Then RSEM will use Dirichlet(0.1) as the
+        prior which encourage the sparsity of the expression levels.
+        (Default: off)
+
+    --calc-pme
+        Run RSEM's collapsed Gibbs sampler to calculate posterior mean
+        estimates. (Default: off)
+
+    --calc-ci
+        Calculate 95% credibility intervals and posterior mean estimates.
+        The credibility level can be changed by setting
+        '--ci-credibility-level'. (Default: off)
+
+    -q/--quiet
+        Suppress the output of logging information. (Default: off)
+
+    -h/--help
+        Show help information.
+
+    --version
+        Show version information.
+
+OUTPUT OPTIONS
+    --sort-bam-by-read-name
+        Sort BAM file aligned under transcript coordidate by read name.
+        Setting this option on will produce deterministic maximum likelihood
+        estimations from independent runs. Note that sorting will take long
+        time and lots of memory. (Default: off)
+
+    --no-bam-output
+        Do not output any BAM file. (Default: off)
+
+    --sampling-for-bam
+        When RSEM generates a BAM file, instead of outputting all alignments
+        a read has with their posterior probabilities, one alignment is
+        sampled according to the posterior probabilities. The sampling
+        procedure includes the alignment to the "noise" transcript, which
+        does not appear in the BAM file. Only the sampled alignment has a
+        weight of 1. All other alignments have weight 0. If the "noise"
+        transcript is sampled, all alignments appeared in the BAM file
+        should have weight 0. (Default: off)
+
+    --output-genome-bam
+        Generate a BAM file, 'sample_name.genome.bam', with alignments
+        mapped to genomic coordinates and annotated with their posterior
+        probabilities. In addition, RSEM will call samtools (included in
+        RSEM package) to sort and index the bam file.
+        'sample_name.genome.sorted.bam' and
+        'sample_name.genome.sorted.bam.bai' will be generated. (Default:
+        off)
+
+    --sort-bam-by-coordinate
+        Sort RSEM generated transcript and genome BAM files by coordinates
+        and build associated indices. (Default: off)
+
+    --sort-bam-memory-per-thread &lt;string&gt;
+        Set the maximum memory per thread that can be used by 'samtools
+        sort'. &lt;string&gt; represents the memory and accepts suffices 'K/M/G'.
+        RSEM will pass &lt;string&gt; to the '-m' option of 'samtools sort'. Note
+        that the default used here is different from the default used by
+        samtools. (Default: 1G)
+
+ALIGNER OPTIONS
+    --seed-length &lt;int&gt;
+        Seed length used by the read aligner. Providing the correct value is
+        important for RSEM. If RSEM runs Bowtie, it uses this value for
+        Bowtie's seed length parameter. Any read with its or at least one of
+        its mates' (for paired-end reads) length less than this value will
+        be ignored. If the references are not added poly(A) tails, the
+        minimum allowed value is 5, otherwise, the minimum allowed value is
+        25. Note that this script will only check if the value &gt;= 5 and give
+        a warning message if the value &lt; 25 but &gt;= 5. (Default: 25)
+
+    --phred33-quals
+        Input quality scores are encoded as Phred+33. (Default: on)
+
+    --phred64-quals
+        Input quality scores are encoded as Phred+64 (default for GA
+        Pipeline ver. &gt;= 1.3). (Default: off)
+
+    --solexa-quals
+        Input quality scores are solexa encoded (from GA Pipeline ver. &lt;
+        1.3). (Default: off)
+
+    --bowtie-path &lt;path&gt;
+        The path to the Bowtie executables. (Default: the path to the Bowtie
+        executables is assumed to be in the user's PATH environment
+        variable)
+
+    --bowtie-n &lt;int&gt;
+        (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,
+        Default: 2)
+
+    --bowtie-e &lt;int&gt;
+        (Bowtie parameter) max sum of mismatch quality scores across the
+        alignment. (Default: 99999999)
+
+    --bowtie-m &lt;int&gt;
+        (Bowtie parameter) suppress all alignments for a read if &gt; &lt;int&gt;
+        valid alignments exist. (Default: 200)
+
+    --bowtie-chunkmbs &lt;int&gt;
+        (Bowtie parameter) memory allocated for best first alignment
+        calculation (Default: 0 - use Bowtie's default)
+
+    --bowtie2-path &lt;path&gt;
+        (Bowtie 2 parameter) The path to the Bowtie 2 executables. (Default:
+        the path to the Bowtie 2 executables is assumed to be in the user's
+        PATH environment variable)
+
+    --bowtie2-mismatch-rate &lt;double&gt;
+        (Bowtie 2 parameter) The maximum mismatch rate allowed. (Default:
+        0.1)
+
+    --bowtie2-k &lt;int&gt;
+        (Bowtie 2 parameter) Find up to &lt;int&gt; alignments per read. (Default:
+        200)
+
+    --bowtie2-sensitivity-level &lt;string&gt;
+        (Bowtie 2 parameter) Set Bowtie 2's preset options in --end-to-end
+        mode. This option controls how hard Bowtie 2 tries to find
+        alignments. &lt;string&gt; must be one of "very_fast", "fast", "sensitive"
+        and "very_sensitive". The four candidates correspond to Bowtie 2's
+        "--very-fast", "--fast", "--sensitive" and "--very-sensitive"
+        options. (Default: "sensitive" - use Bowtie 2's default)
+
+    --star-path &lt;path&gt;
+        The path to STAR's executable. (Default: the path to STAR executable
+        is assumed to be in user's PATH environment variable)
+
+    --star-gzipped-read-file
+        (STAR parameter) Input read file(s) is compressed by gzip. (Default:
+        off)
+
+    --star-bzipped-read-file
+        (STAR parameter) Input read file(s) is compressed by bzip2.
+        (Default: off)
+
+    --star-output-genome-bam
+        (STAR parameter) Save the BAM file from STAR alignment under genomic
+        coordinate to 'sample_name.STAR.genome.bam'. This file is NOT sorted
+        by genomic coordinate. In this file, according to STAR's manual,
+        'paired ends of an alignment are always adjacent, and multiple
+        alignments of a read are adjacent as well'. (Default: off)
+
+ADVANCED OPTIONS
+    --tag &lt;string&gt;
+        The name of the optional field used in the SAM input for identifying
+        a read with too many valid alignments. The field should have the
+        format &lt;tagName&gt;:i:&lt;value&gt;, where a &lt;value&gt; bigger than 0 indicates
+        a read with too many alignments. (Default: "")
+
+    --fragment-length-min &lt;int&gt;
+        Minimum read/insert length allowed. This is also the value for the
+        Bowtie/Bowtie2 -I option. (Default: 1)
+
+    --fragment-length-max &lt;int&gt;
+        Maximum read/insert length allowed. This is also the value for the
+        Bowtie/Bowtie 2 -X option. (Default: 1000)
+
+    --fragment-length-mean &lt;double&gt;
+        (single-end data only) The mean of the fragment length distribution,
+        which is assumed to be a Gaussian. (Default: -1, which disables use
+        of the fragment length distribution)
+
+    --fragment-length-sd &lt;double&gt;
+        (single-end data only) The standard deviation of the fragment length
+        distribution, which is assumed to be a Gaussian. (Default: 0, which
+        assumes that all fragments are of the same length, given by the
+        rounded value of --fragment-length-mean)
+
+    --estimate-rspd
+        Set this option if you want to estimate the read start position
+        distribution (RSPD) from data. Otherwise, RSEM will use a uniform
+        RSPD. (Default: off)
+
+    --num-rspd-bins &lt;int&gt;
+        Number of bins in the RSPD. Only relevant when '--estimate-rspd' is
+        specified. Use of the default setting is recommended. (Default: 20)
+
+    --gibbs-burnin &lt;int&gt;
+        The number of burn-in rounds for RSEM's Gibbs sampler. Each round
+        passes over the entire data set once. If RSEM can use multiple
+        threads, multiple Gibbs samplers will start at the same time and all
+        samplers share the same burn-in number. (Default: 200)
+
+    --gibbs-number-of-samples &lt;int&gt;
+        The total number of count vectors RSEM will collect from its Gibbs
+        samplers. (Default: 1000)
+
+    --gibbs-sampling-gap &lt;int&gt;
+        The number of rounds between two succinct count vectors RSEM
+        collects. If the count vector after round N is collected, the count
+        vector after round N + &lt;int&gt; will also be collected. (Default: 1)
+
+    --ci-credibility-level &lt;double&gt;
+        The credibility level for credibility intervals. (Default: 0.95)
+
+    --ci-memory &lt;int&gt;
+        Maximum size (in memory, MB) of the auxiliary buffer used for
+        computing credibility intervals (CI). (Default: 1024)
+
+    --ci-number-of-samples-per-count-vector &lt;int&gt;
+        The number of read generating probability vectors sampled per
+        sampled count vector. The crebility intervals are calculated by
+        first sampling P(C | D) and then sampling P(Theta | C) for each
+        sampled count vector. This option controls how many Theta vectors
+        are sampled per sampled count vector. (Default: 50)
+
+    --keep-intermediate-files
+        Keep temporary files generated by RSEM. RSEM creates a temporary
+        directory, 'sample_name.temp', into which it puts all intermediate
+        output files. If this directory already exists, RSEM overwrites all
+        files generated by previous RSEM runs inside of it. By default,
+        after RSEM finishes, the temporary directory is deleted. Set this
+        option to prevent the deletion of this directory and the
+        intermediate files inside of it. (Default: off)
+
+    --temporary-folder &lt;string&gt;
+        Set where to put the temporary files generated by RSEM. If the
+        folder specified does not exist, RSEM will try to create it.
+        (Default: sample_name.temp)
+
+    --time
+        Output time consumed by each step of RSEM to 'sample_name.time'.
+        (Default: off)
+
+PRIOR-ENHANCED RSEM OPTIONS
+    --run-pRSEM
+        Running prior-enhanced RSEM (pRSEM). Prior parameters, i.e.
+        isoform's initial pseudo-count for RSEM's Gibbs sampling, will be
+        learned from input RNA-seq data and an external data set. When pRSEM
+        needs and only needs ChIP-seq peak information to partition isoforms
+        (e.g. in pRSEM's default partition model), either ChIP-seq peak file
+        (with the '--chipseq-peak-file' option) or ChIP-seq FASTQ files for
+        target and input and the path for Bowtie executables are required
+        (with the '--chipseq-target-read-files &lt;string&gt;',
+        '--chipseq-control-read-files &lt;string&gt;', and '--bowtie-path &lt;path&gt;
+        options), otherwise, ChIP-seq FASTQ files for target and control and
+        the path to Bowtie executables are required. (Default: off)
+
+    --chipseq-peak-file &lt;string&gt;
+        Full path to a ChIP-seq peak file in ENCODE's narrowPeak, i.e.
+        BED6+4, format. This file is used when running prior-enhanced RSEM
+        in the default two-partition model. It partitions isoforms by
+        whether they have ChIP-seq overlapping with their transcription
+        start site region or not. Each partition will have its own prior
+        parameter learned from a training set. This file can be either
+        gzipped or ungzipped. (Default: "")
+
+    --chipseq-target-read-files &lt;string&gt;
+        Comma-separated full path of FASTQ read file(s) for ChIP-seq target.
+        This option is used when running prior-enhanced RSEM. It provides
+        information to calculate ChIP-seq peaks and signals. The file(s) can
+        be either ungzipped or gzipped with a suffix '.gz' or '.gzip'. The
+        options '--bowtie-path &lt;path&gt;' and '--chipseq-control-read-files
+        &lt;string&gt;' must be defined when this option is specified. (Default:
+        "")
+
+    --chipseq-control-read-files &lt;string&gt;
+        Comma-separated full path of FASTQ read file(s) for ChIP-seq conrol.
+        This option is used when running prior-enhanced RSEM. It provides
+        information to call ChIP-seq peaks. The file(s) can be either
+        ungzipped or gzipped with a suffix '.gz' or '.gzip'. The options
+        '--bowtie-path &lt;path&gt;' and '--chipseq-target-read-files &lt;string&gt;'
+        must be defined when this option is specified. (Default: "")
+
+    --chipseq-read-files-multi-targets &lt;string&gt;
+        Comma-separated full path of FASTQ read files for multiple ChIP-seq
+        targets. This option is used when running prior-enhanced RSEM, where
+        prior is learned from multiple complementary data sets. It provides
+        information to calculate ChIP-seq signals. All files can be either
+        ungzipped or gzipped with a suffix '.gz' or '.gzip'. When this
+        option is specified, the option '--bowtie-path &lt;path&gt;' must be
+        defined and the option '--partition-model &lt;string&gt;' will be set to
+        'cmb_lgt' automatically. (Default: "")
+
+    --chipseq-bed-files-multi-targets &lt;string&gt;
+        Comma-separated full path of BED files for multiple ChIP-seq
+        targets. This option is used when running prior-enhanced RSEM, where
+        prior is learned from multiple complementary data sets. It provides
+        information of ChIP-seq signals and must have at least the first six
+        BED columns. All files can be either ungzipped or gzipped with a
+        suffix '.gz' or '.gzip'. When this option is specified, the option
+        '--partition-model &lt;string&gt;' will be set to 'cmb_lgt' automatically.
+        (Default: "")
+
+    --cap-stacked-chipseq-reads
+        Keep a maximum number of ChIP-seq reads that aligned to the same
+        genomic interval. This option is used when running prior-enhanced
+        RSEM, where prior is learned from multiple complementary data sets.
+        This option is only in use when either
+        '--chipseq-read-files-multi-targets &lt;string&gt;' or
+        '--chipseq-bed-files-multi-targets &lt;string&gt;' is specified. (Default:
+        off)
+
+    --n-max-stacked-chipseq-reads &lt;int&gt;
+        The maximum number of stacked ChIP-seq reads to keep. This option is
+        used when running prior-enhanced RSEM, where prior is learned from
+        multiple complementary data sets. This option is only in use when
+        the option '--cap-stacked-chipseq-reads' is set. (Default: 5)
+
+    --partition-model &lt;string&gt;
+        A keyword to specify the partition model used by prior-enhanced
+        RSEM. It must be one of the following keywords:
+
+        - pk
+            Partitioned by whether an isoform has a ChIP-seq peak overlapping
+            with its transcription start site (TSS) region. The TSS region is
+            defined as [TSS-500bp, TSS+500bp]. For simplicity, we refer this
+            type of peak as 'TSS peak' when explaining other keywords.
+
+        - pk_lgtnopk
+            First partitioned by TSS peak. Then, for isoforms in the 'no TSS
+            peak' set, a logistic model is employed to further classify them
+            into two partitions.
+
+        - lm3, lm4, lm5, or lm6
+            Based on their ChIP-seq signals, isoforms are classified into 3,
+            4, 5, or 6 partitions by a linear regression model.
+
+        - nopk_lm2pk, nopk_lm3pk, nopk_lm4pk, or nopk_lm5pk
+            First partitioned by TSS peak. Then, for isoforms in the 'with TSS
+            peak' set, a linear regression model is employed to further
+            classify them into 2, 3, 4, or 5 partitions.
+
+        - pk_lm2nopk, pk_lm3nopk, pk_lm4nopk, or pk_lm5nopk
+            First partitioned by TSS peak. Then, for isoforms in the 'no TSS
+            peak' set, a linear regression model is employed to further
+            classify them into 2, 3, 4, or 5 partitions.
+
+        - cmb_lgt
+            Using a logistic regression to combine TSS signals from multiple
+            complementary data sets and partition training set isoform into
+            'expressed' and 'not expressed'. This partition model is only in
+            use when either '--chipseq-read-files-multi-targets &lt;string&gt;' or
+            '--chipseq-bed-files-multi-targets &lt;string&gt; is specified.
+
+            Parameters for all the above models are learned from a training set.
+            For detailed explanations, please see prior-enhanced RSEM's paper.
+            (Default: 'pk')
+
+DEPRECATED OPTIONS
+    The options in this section are deprecated. They are here only for
+    compatibility reasons and may be removed in future releases.
+
+    --sam
+        Inputs are alignments in SAM format. (Default: off)
+
+    --bam
+        Inputs are alignments in BAM format. (Default: off)
+
+    --strand-specific
+        Equivalent to '--strandedness forward'. (Default: off)
+
+    --forward-prob &lt;double&gt;
+        Probability of generating a read from the forward strand of a
+        transcript. Set to 1 for a strand-specific protocol where all
+        (upstream) reads are derived from the forward strand, 0 for a
+        strand-specific protocol where all (upstream) read are derived from
+        the reverse strand, or 0.5 for a non-strand-specific protocol.
+        (Default: off)
+
+DESCRIPTION
+    In its default mode, this program aligns input reads against a reference
+    transcriptome with Bowtie and calculates expression values using the
+    alignments. RSEM assumes the data are single-end reads with quality
+    scores, unless the '--paired-end' or '--no-qualities' options are
+    specified. Alternatively, users can use STAR to align reads using the
+    '--star' option. RSEM has provided options in 'rsem-prepare-reference'
+    to prepare STAR's genome indices. Users may use an alternative aligner
+    by specifying '--alignments', and providing an alignment file in
+    SAM/BAM/CRAM format. However, users should make sure that they align
+    against the indices generated by 'rsem-prepare-reference' and the
+    alignment file satisfies the requirements mentioned in ARGUMENTS
+    section.
+
+    One simple way to make the alignment file satisfying RSEM's requirements
+    is to use the 'convert-sam-for-rsem' script. This script accepts
+    SAM/BAM/CRAM files as input and outputs a BAM file. For example, type
+    the following command to convert a SAM file, 'input.sam', to a
+    ready-for-use BAM file, 'input_for_rsem.bam':
+
+      convert-sam-for-rsem input.sam input_for_rsem
+
+    For details, please refer to 'convert-sam-for-rsem's documentation page.
+
+NOTES
+    1. Users must run 'rsem-prepare-reference' with the appropriate
+    reference before using this program.
+
+    2. For single-end data, it is strongly recommended that the user provide
+    the fragment length distribution parameters (--fragment-length-mean and
+    --fragment-length-sd). For paired-end data, RSEM will automatically
+    learn a fragment length distribution from the data.
+
+    3. Some aligner parameters have default values different from their
+    original settings.
+
+    4. With the '--calc-pme' option, posterior mean estimates will be
+    calculated in addition to maximum likelihood estimates.
+
+    5. With the '--calc-ci' option, 95% credibility intervals and posterior
+    mean estimates will be calculated in addition to maximum likelihood
+    estimates.
+
+    6. The temporary directory and all intermediate files will be removed
+    when RSEM finishes unless '--keep-intermediate-files' is specified.
+
+    With the '--run-pRSEM' option and associated options (see section
+    'PRIOR-ENHANCED RSEM OPTIONS' above for details), prior-enhanced RSEM
+    will be running. Prior parameters will be learned from supplied external
+    data set(s) and assigned as initial pseudo-counts for isoforms in the
+    corresponding partition for Gibbs sampling.
+
+OUTPUT
+    sample_name.isoforms.results
+        File containing isoform level expression estimates. The first line
+        contains column names separated by the tab character. The format of
+        each line in the rest of this file is:
+
+        transcript_id gene_id length effective_length expected_count TPM
+        FPKM IsoPct [posterior_mean_count
+        posterior_standard_deviation_of_count pme_TPM pme_FPKM
+        IsoPct_from_pme_TPM TPM_ci_lower_bound TPM_ci_upper_bound
+        TPM_coefficient_of_quartile_variation FPKM_ci_lower_bound
+        FPKM_ci_upper_bound FPKM_coefficient_of_quartile_variation]
+
+        Fields are separated by the tab character. Fields within "[]" are
+        optional. They will not be presented if neither '--calc-pme' nor
+        '--calc-ci' is set.
+
+        'transcript_id' is the transcript name of this transcript. 'gene_id'
+        is the gene name of the gene which this transcript belongs to
+        (denote this gene as its parent gene). If no gene information is
+        provided, 'gene_id' and 'transcript_id' are the same.
+
+        'length' is this transcript's sequence length (poly(A) tail is not
+        counted). 'effective_length' counts only the positions that can
+        generate a valid fragment. If no poly(A) tail is added,
+        'effective_length' is equal to transcript length - mean fragment
+        length + 1. If one transcript's effective length is less than 1,
+        this transcript's both effective length and abundance estimates are
+        set to 0.
+
+        'expected_count' is the sum of the posterior probability of each
+        read comes from this transcript over all reads. Because 1) each read
+        aligning to this transcript has a probability of being generated
+        from background noise; 2) RSEM may filter some alignable low quality
+        reads, the sum of expected counts for all transcript are generally
+        less than the total number of reads aligned.
+
+        'TPM' stands for Transcripts Per Million. It is a relative measure
+        of transcript abundance. The sum of all transcripts' TPM is 1
+        million. 'FPKM' stands for Fragments Per Kilobase of transcript per
+        Million mapped reads. It is another relative measure of transcript
+        abundance. If we define l_bar be the mean transcript length in a
+        sample, which can be calculated as
+
+        l_bar = \sum_i TPM_i / 10^6 * effective_length_i (i goes through
+        every transcript),
+
+        the following equation is hold:
+
+        FPKM_i = 10^3 / l_bar * TPM_i.
+
+        We can see that the sum of FPKM is not a constant across samples.
+
+        'IsoPct' stands for isoform percentage. It is the percentage of this
+        transcript's abandunce over its parent gene's abandunce. If its
+        parent gene has only one isoform or the gene information is not
+        provided, this field will be set to 100.
+
+        'posterior_mean_count', 'pme_TPM', 'pme_FPKM' are posterior mean
+        estimates calculated by RSEM's Gibbs sampler.
+        'posterior_standard_deviation_of_count' is the posterior standard
+        deviation of counts. 'IsoPct_from_pme_TPM' is the isoform percentage
+        calculated from 'pme_TPM' values.
+
+        'TPM_ci_lower_bound', 'TPM_ci_upper_bound', 'FPKM_ci_lower_bound'
+        and 'FPKM_ci_upper_bound' are lower(l) and upper(u) bounds of 95%
+        credibility intervals for TPM and FPKM values. The bounds are
+        inclusive (i.e. [l, u]).
+
+        'TPM_coefficient_of_quartile_variation' and
+        'FPKM_coefficient_of_quartile_variation' are coefficients of
+        quartile variation (CQV) for TPM and FPKM values. CQV is a robust
+        way of measuring the ratio between the standard deviation and the
+        mean. It is defined as
+
+        CQV := (Q3 - Q1) / (Q3 + Q1),
+
+        where Q1 and Q3 are the first and third quartiles.
+
+    sample_name.genes.results
+        File containing gene level expression estimates. The first line
+        contains column names separated by the tab character. The format of
+        each line in the rest of this file is:
+
+        gene_id transcript_id(s) length effective_length expected_count TPM
+        FPKM [posterior_mean_count posterior_standard_deviation_of_count
+        pme_TPM pme_FPKM TPM_ci_lower_bound TPM_ci_upper_bound
+        TPM_coefficient_of_quartile_variation FPKM_ci_lower_bound
+        FPKM_ci_upper_bound FPKM_coefficient_of_quartile_variation]
+
+        Fields are separated by the tab character. Fields within "[]" are
+        optional. They will not be presented if neither '--calc-pme' nor
+        '--calc-ci' is set.
+
+        'transcript_id(s)' is a comma-separated list of transcript_ids
+        belonging to this gene. If no gene information is provided,
+        'gene_id' and 'transcript_id(s)' are identical (the
+        'transcript_id').
+
+        A gene's 'length' and 'effective_length' are defined as the weighted
+        average of its transcripts' lengths and effective lengths (weighted
+        by 'IsoPct'). A gene's abundance estimates are just the sum of its
+        transcripts' abundance estimates.
+
+    sample_name.alleles.results
+        Only generated when the RSEM references are built with
+        allele-specific transcripts.
+
+        This file contains allele level expression estimates for
+        allele-specific expression calculation. The first line contains
+        column names separated by the tab character. The format of each line
+        in the rest of this file is:
+
+        allele_id transcript_id gene_id length effective_length
+        expected_count TPM FPKM AlleleIsoPct AlleleGenePct
+        [posterior_mean_count posterior_standard_deviation_of_count pme_TPM
+        pme_FPKM AlleleIsoPct_from_pme_TPM AlleleGenePct_from_pme_TPM
+        TPM_ci_lower_bound TPM_ci_upper_bound
+        TPM_coefficient_of_quartile_variation FPKM_ci_lower_bound
+        FPKM_ci_upper_bound FPKM_coefficient_of_quartile_variation]
+
+        Fields are separated by the tab character. Fields within "[]" are
+        optional. They will not be presented if neither '--calc-pme' nor
+        '--calc-ci' is set.
+
+        'allele_id' is the allele-specific name of this allele-specific
+        transcript.
+
+        'AlleleIsoPct' stands for allele-specific percentage on isoform
+        level. It is the percentage of this allele-specific transcript's
+        abundance over its parent transcript's abundance. If its parent
+        transcript has only one allele variant form, this field will be set
+        to 100.
+
+        'AlleleGenePct' stands for allele-specific percentage on gene level.
+        It is the percentage of this allele-specific transcript's abundance
+        over its parent gene's abundance.
+
+        'AlleleIsoPct_from_pme_TPM' and 'AlleleGenePct_from_pme_TPM' have
+        similar meanings. They are calculated based on posterior mean
+        estimates.
+
+        Please note that if this file is present, the fields 'length' and
+        'effective_length' in 'sample_name.isoforms.results' should be
+        interpreted similarly as the corresponding definitions in
+        'sample_name.genes.results'.
+
+    sample_name.transcript.bam
+        Only generated when --no-bam-output is not specified.
+
+        'sample_name.transcript.bam' is a BAM-formatted file of read
+        alignments in transcript coordinates. The MAPQ field of each
+        alignment is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)),
+        where w is the posterior probability of that alignment being the
+        true mapping of a read. In addition, RSEM pads a new tag ZW:f:value,
+        where value is a single precision floating number representing the
+        posterior probability. Because this file contains all alignment
+        lines produced by bowtie or user-specified aligners, it can also be
+        used as a replacement of the aligner generated BAM/SAM file.
+
+    sample_name.transcript.sorted.bam and sample_name.transcript.sorted.bam.bai
+        Only generated when --no-bam-output is not specified and --sort-bam-by-coordinate is specified.
+
+        'sample_name.transcript.sorted.bam' and
+        'sample_name.transcript.sorted.bam.bai' are the sorted BAM file and
+        indices generated by samtools (included in RSEM package).
+
+    sample_name.genome.bam
+        Only generated when --no-bam-output is not specified and
+        --output-genome-bam is specified.
+
+        'sample_name.genome.bam' is a BAM-formatted file of read alignments
+        in genomic coordinates. Alignments of reads that have identical
+        genomic coordinates (i.e., alignments to different isoforms that
+        share the same genomic region) are collapsed into one alignment. The
+        MAPQ field of each alignment is set to min(100, floor(-10 *
+        log10(1.0 - w) + 0.5)), where w is the posterior probability of that
+        alignment being the true mapping of a read. In addition, RSEM pads a
+        new tag ZW:f:value, where value is a single precision floating
+        number representing the posterior probability. If an alignment is
+        spliced, a XS:A:value tag is also added, where value is either '+'
+        or '-' indicating the strand of the transcript it aligns to.
+
+    sample_name.genome.sorted.bam and sample_name.genome.sorted.bam.bai
+        Only generated when --no-bam-output is not specified, and
+        --sort-bam-by-coordinate and --output-genome-bam are specified.
+
+        'sample_name.genome.sorted.bam' and
+        'sample_name.genome.sorted.bam.bai' are the sorted BAM file and
+        indices generated by samtools (included in RSEM package).
+
+    sample_name.time
+        Only generated when --time is specified.
+
+        It contains time (in seconds) consumed by aligning reads, estimating
+        expression levels and calculating credibility intervals.
+
+    sample_name.stat
+        This is a folder instead of a file. All model related statistics are
+        stored in this folder. Use 'rsem-plot-model' can generate plots
+        using this folder.
+
+        'sample_name.stat/sample_name.cnt' contains alignment statistics.
+        The format and meanings of each field are described in
+        'cnt_file_description.txt' under RSEM directory.
+
+        'sample_name.stat/sample_name.model' stores RNA-Seq model parameters
+        learned from the data. The format and meanings of each filed of this
+        file are described in 'model_file_description.txt' under RSEM
+        directory.
+
+        The following four output files will be generated only by
+        prior-enhanced RSEM
+
+        - 'sample_name.stat/sample_name_prsem.all_tr_features'
+          It stores isofrom features for deriving and assigning pRSEM prior.
+          The first line is a header and the rest is one isoform per line.
+          The description for each column is:
+
+          * trid: transcript ID from input annotation
+
+          * geneid: gene ID from input anntation
+
+          * chrom: isoform's chromosome name
+
+          * strand: isoform's strand name
+
+          * start: isoform's end with the lowest genomic loci
+
+          * end: isoform's end with the highest genomic loci
+
+          * tss_mpp: average mappability of [TSS-500bp, TSS+500bp], where
+            TSS is isoform's transcription start site, i.e. 5'-end
+
+          * body_mpp: average mappability of (TSS+500bp, TES-500bp), where
+            TES is isoform's transcription end site, i.e. 3'-end
+
+          * tes_mpp: average mappability of [TES-500bp, TES+500bp]
+
+          * pme_count: isoform's fragment or read count from RSEM's
+            posterior mean estimates
+
+          * tss: isoform's TSS loci
+
+          * tss_pk: equal to 1 if isoform's [TSS-500bp, TSS+500bp] region
+            overlaps with a RNA Pol II peak; 0 otherwise
+
+          * is_training: equal to 1 if isoform is in the training set where
+            Pol II prior is learned; 0 otherwise
+
+        - 'sample_name.stat/sample_name_prsem.all_tr_prior'
+          It stores prior parameters for every isoform. This file does not
+          have a header. Each line contains a prior parameter and an
+          isoform's transcript ID delimited by " # ".
+
+        - 'sample_name.stat/sample_name_uniform_prior_1.isoforms.results'
+          RSEM's posterior mean estimates on the isoform level with an
+          initial pseudo-count of one for every isoform. It is in the same
+          format as the 'sample_name.isoforms.results'.
+
+        - 'sample_name.stat/sample_name_uniform_prior_1.genes.results'
+          RSEM's posterior mean estimates on the gene level with an initial
+          pseudo-count of one for every isoform. It is in the same format as
+          the 'sample_name.genes.results'.
+
+        When learning prior from multiple external data sets in
+        prior-enhanced RSEM, two additional output files will be generated.
+
+        - 'sample_name.stat/sample_name.pval_LL'
+          It stores a p-value and a log-likelihood. The p-value indicates
+          whether the combination of multiple complementary data sets is
+          informative for RNA-seq quantification. The log-likelihood shows
+          how well pRSEM's Dirichlet-multinomial model fits the read counts
+          of partitioned training set isoforms.
+
+        - 'sample_name.stat/sample_name.lgt_mdl.RData'
+          It stores an R object named 'glmmdl', which is a logistic
+          regression model on the training set isoforms and multiple
+          external data sets.
+
+        In addition, extra columns will be added to
+        'sample_name.stat/all_tr_features'
+
+        * is_expr: equal to 1 if isoform has an abundance &gt;= 1 TPM and a
+          non-zero read count from RSEM's posterior mean estimates; 0
+          otherwise
+
+        * "$external_data_set_basename": log10 of external data's signal at
+          [TSS-500, TSS+500]. Signal is the number of reads aligned within
+          that interval and normalized to RPKM by read depth and interval
+          length. It will be set to -4 if no read aligned to that interval.
+
+          There are multiple columns like this one, where each represents an
+          external data set.
+
+        * prd_expr_prob: predicted probability from logistic regression
+          model on whether this isoform is expressed or not. A probability
+          higher than 0.5 is considered as expressed
+
+        * partition: group index, to which this isoforms is partitioned
+
+        * prior: prior parameter for this isoform
+
+EXAMPLES
+    Assume the path to the bowtie executables is in the user's PATH
+    environment variable. Reference files are under '/ref' with name
+    'mouse_125'.
+
+    1) '/data/mmliver.fq', single-end reads with quality scores. Quality
+    scores are encoded as for 'GA pipeline version &gt;= 1.3'. We want to use 8
+    threads and generate a genome BAM file. In addition, we want to append
+    gene/transcript names to the result files:
+
+     rsem-calculate-expression --phred64-quals \
+                               -p 8 \
+                               --append-names \
+                               --output-genome-bam \
+                               /data/mmliver.fq \
+                               /ref/mouse_125 \
+                               mmliver_single_quals
+
+    2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', stranded paired-end
+    reads with quality scores. Suppose the library is prepared using TruSeq
+    Stranded Kit, which means the first mate should map to the reverse
+    strand. Quality scores are in SANGER format. We want to use 8 threads
+    and do not generate a genome BAM file:
+
+     rsem-calculate-expression -p 8 \
+                               --paired-end \
+                               --strandedness reverse \
+                               /data/mmliver_1.fq \
+                               /data/mmliver_2.fq \
+                               /ref/mouse_125 \
+                               mmliver_paired_end_quals
+
+    3) '/data/mmliver.fa', single-end reads without quality scores. We want
+    to use 8 threads:
+
+     rsem-calculate-expression -p 8 \
+                               --no-qualities \
+                               /data/mmliver.fa \
+                               /ref/mouse_125 \
+                               mmliver_single_without_quals
+
+    4) Data are the same as 1). This time we assume the bowtie executables
+    are under '/sw/bowtie'. We want to take a fragment length distribution
+    into consideration. We set the fragment length mean to 150 and the
+    standard deviation to 35. In addition to a BAM file, we also want to
+    generate credibility intervals. We allow RSEM to use 1GB of memory for
+    CI calculation:
+
+     rsem-calculate-expression --bowtie-path /sw/bowtie \
+                               --phred64-quals \
+                               --fragment-length-mean 150.0 \
+                               --fragment-length-sd 35.0 \
+                               -p 8 \
+                               --output-genome-bam \
+                               --calc-ci \
+                               --ci-memory 1024 \
+                               /data/mmliver.fq \
+                               /ref/mouse_125 \
+                               mmliver_single_quals
+
+    5) '/data/mmliver_paired_end_quals.bam', BAM-formatted alignments for
+    paired-end reads with quality scores. We want to use 8 threads:
+
+     rsem-calculate-expression --paired-end \
+                               --alignments \
+                               -p 8 \
+                               /data/mmliver_paired_end_quals.bam \
+                               /ref/mouse_125 \
+                               mmliver_paired_end_quals
+
+    6) '/data/mmliver_1.fq.gz' and '/data/mmliver_2.fq.gz', paired-end reads
+    with quality scores and read files are compressed by gzip. We want to
+    use STAR to aligned reads and assume STAR executable is '/sw/STAR'.
+    Suppose we want to use 8 threads and do not generate a genome BAM file:
+
+     rsem-calculate-expression --paired-end \
+                               --star \
+                               --star-path /sw/STAR \
+                               --gzipped-read-file \
+                               --paired-end \
+                               -p 8 \
+                               /data/mmliver_1.fq.gz \
+                               /data/mmliver_2.fq.gz \
+                               /ref/mouse_125 \
+                               mmliver_paired_end_quals
+
+    7) In the above example, suppose we want to run prior-enhanced RSEM
+    instead. Assuming we want to learn priors from a ChIP-seq peak file
+    '/data/mmlive.narrowPeak.gz':
+
+     rsem-calculate-expression --star \
+                               --star-path /sw/STAR \
+                               --gzipped-read-file \
+                               --paired-end \
+                               --calc-pme \
+                               --run-pRSEM \
+                               --chipseq-peak-file /data/mmliver.narrowPeak.gz \
+                               -p 8 \
+                               /data/mmliver_1.fq.gz \
+                               /data/mmliver_2.fq.gz \
+                               /ref/mouse_125 \
+                               mmliver_paired_end_quals
+
+    8) Similar to the example in 7), suppose we want to use the partition
+    model 'pk_lm2nopk' (partitioning isoforms by Pol II TSS peak first and
+    then partitioning 'no TSS peak' isoforms into two bins by a linear
+    regression model), and we want to partition isoforms by RNA Pol II's
+    ChIP-seq read files '/data/mmliver_PolIIRep1.fq.gz' and
+    '/data/mmliver_PolIIRep2.fq.gz', and the control ChIP-seq read files
+    '/data/mmliver_ChIPseqCtrl.fq.gz'. Also, assuming Bowtie's executables
+    are under '/sw/bowtie/':
+
+     rsem-calculate-expression --star \
+                               --star-path /sw/STAR \
+                               --gzipped-read-file \
+                               --paired-end \
+                               --calc-pme \
+                               --run-pRSEM \
+                               --chipseq-target-read-files /data/mmliver_PolIIRep1.fq.gz,/data/mmliver_PolIIRep2.fq.gz \
+                               --chipseq-control-read-files /data/mmliver_ChIPseqCtrl.fq.gz \
+                               --partition-model pk_lm2nopk \
+                               --bowtie-path /sw/bowtie \
+                               -p 8 \
+                               /data/mmliver_1.fq.gz \
+                               /data/mmliver_2.fq.gz \
+                               /ref/mouse_125 \
+                               mmliver_paired_end_quals
+
+    9) Similar to the example in 8), suppose we want to derive prior from
+    four histone modification ChIP-seq read data sets:
+    '/data/H3K27Ac.fastq.gz', '/data/H3K4me1.fastq.gz',
+    '/data/H3K4me2.fastq.gz', and '/data/H3K4me3.fastq.gz'. Also, assuming
+    Bowtie's executables are under '/sw/bowtie/':
+
+     rsem-calculate-expression --star \
+                               --star-path /sw/STAR \
+                               --gzipped-read-file \
+                               --paired-end \
+                               --calc-pme \
+                               --run-pRSEM \
+                               --partition-model cmb_lgt \
+                               --chipseq-read-files-multi-targets /data/H3K27Ac.fastq.gz,/data/H3K4me1.fastq.gz,/data/H3K4me2.fastq.gz,/data/H3K4me3.fastq.gz \
+                               --bowtie-path /sw/bowtie \
+                               -p 8 \
+                               /data/mmliver_1.fq.gz \
+                               /data/mmliver_2.fq.gz \
+                               /ref/mouse_125 \
+                               mmliver_paired_end_quals
+
+  </help> 
+  <citations>
+    <citation type="doi">10.1186/1471-2105-12-323</citation>
+  </citations>
+
+</tool>
author	artbio
date	Sat, 31 Mar 2018 21:30:07 -0400
parents
children	49795544dac7