annotate rsem_calculate_expression.xml @ 3:59459de65740

mv RSEM datatypes to separate repository, remove samtools dependency
author Jim Johnson <jj@umn.edu>
date Mon, 10 Mar 2014 07:19:56 -0500
parents 5949673f9e3e
children 30a8343fb0e7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
1 <tool id="rsem_calculate_expression" name="RSEM calculate expression" version="1.1.17">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
2 <description>RNA-Seq by Expectation-Maximization</description>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
3 <requirements>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
4 <requirement type="package" version="1.1.17">rsem</requirement>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
5 <requirement type="package" version="1.0.0">bowtie</requirement>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
6 </requirements>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
7 <command>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
8 rsem-calculate-expression
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
9 ## --tag string
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
10 #if $seedlength:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
11 --seed-length $seedlength
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
12 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
13 --forward-prob $forward_prob
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
14 #if $rsem_options.fullparams == 'fullset':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
15 ## Fragment info
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
16 #if $rsem_options.fragment_length_mean:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
17 --fragment-length-mean $rsem_options.fragment_length_mean
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
18 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
19 #if $rsem_options.fragment_length_min:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
20 --fragment-length-min $rsem_options.fragment_length_min
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
21 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
22 #if $rsem_options.fragment_length_sd:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
23 --fragment-length-sd $rsem_options.fragment_length_sd
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
24 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
25 #if $rsem_options.fragment_length_max:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
26 --fragment-length-max $rsem_options.fragment_length_max
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
27 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
28 ## RSPD
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
29 #if $rsem_options.rspd.estimate == 'yes':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
30 --estimate-rspd
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
31 #if $rsem_options.rspd.num_rspd_bins:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
32 --num-rspd-bins $rsem_options.rspd.num_rspd_bins
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
33 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
34 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
35 ## Calculate 95% credibility intervals and posterior mean estimates.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
36 #if $rsem_options.useci.ci == 'yes':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
37 --calc-ci
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
38 #if $rsem_options.useci.cimem:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
39 --ci-memory $rsem_options.useci.cimem
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
40 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
41 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
42 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
43 ## --num-threads $GALAXY_SLOTS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
44 #if $input.format != 'bam' and $input.bowtie_options.fullparams == 'fullset':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
45 ## Bowtie params
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
46 #if $bowtie_options.bowtie_e:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
47 --bowtie-e $bowtie_options.bowtie_e
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
48 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
49 #if $bowtie_options.bowtie_m:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
50 --bowtie-m $bowtie_options.bowtie_m
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
51 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
52 #if $bowtie_options.bowtie_n:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
53 --bowtie-n $bowtie_options.bowtie_n
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
54 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
55 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
56 ## Outputs
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
57 #if $rsem_outputs.result_bams == 'none':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
58 --no-bam-output
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
59 #else
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
60 #if $rsem_outputs.result_bams == 'both':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
61 --output-genome-bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
62 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
63 $rsem_outputs.sampling_for_bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
64 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
65 ## Input data
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
66 #if $input.format=="fastq"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
67 $input.fastq_select
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
68 #if $input.fastq.matepair=="single":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
69 $input.fastq.singlefastq
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
70 #elif $input.fastq.matepair=="paired":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
71 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
72 $input.fastq.fastq1
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
73 $input.fastq.fastq2
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
74 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
75 #elif $input.format=="fasta"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
76 --no-qualities
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
77 #if $input.fasta.matepair=="single":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
78 $input.fasta.singlefasta
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
79 #elif $input.fasta.matepair=="paired":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
80 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
81 $input.fasta.fasta1
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
82 $input.fasta.fasta2
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
83 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
84 #elif $input.format=="sam"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
85 #if $input.matepair=="paired":
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
86 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
87 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
88 #if $input.rsem_sam._extension == 'sam':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
89 --sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
90 #elif $input.rsem_sam._extension == 'bam':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
91 --bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
92 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
93 $input.rsem_sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
94 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
95 ## RSEM reference
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
96 #if $reference.refSrc == 'history':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
97 ${reference.rsem_ref.extra_files_path}/${reference.rsem_ref.metadata.reference_name}
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
98 #elif $reference.refSrc == 'cached':
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
99 ${reference.index.fields.path}
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
100 #end if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
101 ## sample_name: use a hard coded name so we can pull out galaxy outputs
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
102 rsem_output
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
103 ## direct output into logfile
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
104 > $log
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
105 </command>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
106 <macros>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
107 <macro name="rsem_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
108 <param name="seedlength" type="integer" value="25" optional="true" label="Seed length used by the read aligner" help="Providing the correct value for this parameter is important for RSEM's accuracy if the data are single-end reads. RSEM uses this value for Bowtie's seed length parameter. The minimum value is 25. (Default:25)">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
109 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
110 <param name="forward_prob" type="select" label="Is the library strand specific?">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
111 <option value="0.5" selected="true">No</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
112 <option value="1">Yes, the reads (or first reads from paired-end libraries) are only in the forward orientation</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
113 <option value="0">Yes, the reads (or first reads from paired-end libraries) are only in the reverse orientation</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
114 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
115 <conditional name="rsem_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
116 <param name="fullparams" type="select" label="Additional RSEM options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
117 <option value="default">Use RSEM Defaults</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
118 <option value="fullset">Set Additional RSEM Options</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
119 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
120 <when value="default"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
121 <when value="fullset">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
122 <param name="fragment_length_min" type="integer" value="1" optional="true" label="Minimum read/insert length." help=" This is also the value for the bowtie -I option">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
123 <validator type="in_range" message="0 or greater" min="0" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
124 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
125 <param name="fragment_length_max" type="integer" value="1000" optional="true" label="Maximum read/insert length." help=" This is also the value for the bowtie -X option">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
126 <validator type="in_range" message="0 or greater" min="0" max="1000000"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
127 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
128 <param name="fragment_length_mean" type="float" value="" optional="true" label="Fragment length mean (single-end data only)" help="The mean of the fragment length distribution, which is assumed to be a Gaussian. (Default: -1, which disables use of the fragment length distribution)">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
129 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
130 <param name="fragment_length_sd" type="float" value="" optional="true" label="The standard deviation of the fragment length distribution (single-end data only)" help="Default 0, which assumes that all fragments are of the same length, given by the rounded value of fragment length mean. ">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
131 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
132 <conditional name="rspd">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
133 <param name="estimate" type="select" lanel="Read Start Position Distribution (RSPD)"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
134 help="Set this option if you want to estimate the read start position distribution (RSPD) from data. Otherwise, RSEM will use a uniform RSPD.">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
135 <option value="no" selected="true">Use a uniform RSPD</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
136 <option value="yes">Estimate and correct for a non-uniform RSPD</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
137 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
138 <when value="no"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
139 <when value="yes">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
140 <param name="num_rspd_bins" type="integer" value="20" optional="true" label="Number of bins in the RSPD." help="Use of the default setting of 20 is recommended.">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
141 <validator type="in_range" message="" min="0" max="100"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
142 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
143 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
144 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
145 <conditional name="useci">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
146 <param name="ci" type="select" label="Calculate 95% Credibility Intervals">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
147 <option value="no" selected="true">no</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
148 <option value="yes">yes</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
149 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
150 <when value="no"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
151 <when value="yes">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
152 <param name="cimem" size="4" type="text" value="1024" label="Amount of memory in (MB) for computing CI" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
153 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
154 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
155 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
156 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
157 </macro>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
158 <macro name="bowtie_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
159 <conditional name="bowtie_options">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
160 <param name="fullparams" type="select" label="bowtie settings">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
161 <option value="default">use bowtie defaults</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
162 <option value="fullset">set bowtie options</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
163 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
164 <when value="default"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
165 <when value="fullset">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
166 <param name="bowtie_n" type="integer" value="2" optional="true" label="Bowtie mismatches" help="Bowtie parameter max # of mismatches in the seed. (Range: 0-3, Default: 2) ">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
167 <validator type="in_range" message="max # of mismatches in the seed between 0 and 3" min="0" max="3"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
168 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
169 <param name="bowtie_e" type="integer" value="99999999" label="Maximum sum of quality scores at mismatched positions in read alignments. This is also the value for the Bowtie -e option">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
170 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
171 <param name="bowtie_m" type="integer" value="200" label="Discard alignments for reads with number of alignments greater than">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
172 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
173 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
174 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
175 </macro>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
176 <macro name="sampling_for_bam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
177 <param name="sampling_for_bam" type="boolean" truevalue="--sampling-for-bam" falsevalue="" checked="false" label="Use sampling for BAM">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
178 <help> When RSEM generates a BAM file, instead of outputing all alignments a read has with their posterior probabilities, one alignment is sampled according to the posterior probabilities. The sampling procedure includes the alignment to the "noise" transcript, which does not appear in the BAM file. Only the sampled alignment has a weight of 1. All other alignments have weight 0. If the "noise" transcript is sampled, all alignments appeared in the BAM file should have weight 0. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
179 </help>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
180 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
181 </macro>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
182 </macros>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
183
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
184 <inputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
185 <param name="sample" type="text" value="rsem_sample" label="Sample name" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
186 <conditional name="reference">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
187 <param name="refSrc" type="select" label="RSEM Reference Source">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
188 <option value="cached">Locally cached</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
189 <option value="history">From your history</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
190 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
191 <when value="cached">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
192 <param name="index" type="select" label="Select RSEM reference" help="Select from a list of pre-indexed references. If you don't see anything consult the wrapper's documentation on how to create or download a reference">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
193 <options from_data_table="rsem_indexes">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
194 <filter type="sort_by" column="2" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
195 <validator type="no_options" message="No indexes are available" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
196 </options>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
197 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
198 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
199 <when value="history">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
200 <param name="rsem_ref" type="data" format="rsem_ref" label="RSEM reference" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
201 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
202 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
203 <conditional name="input">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
204 <param name="format" type="select" label="RSEM Input file type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
205 <option value="fastq">FASTQ</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
206 <option value="fasta">FASTA</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
207 <option value="sam">SAM/BAM</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
208 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
209 <when value="fastq">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
210 <param name="fastq_select" size="15" type="select" label="FASTQ type" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
211 <option value="--phred33-quals" selected="true">phred33 qualities (default for sanger)</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
212 <option value="--solexa-quals">solexa qualities</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
213 <option value="--phred64-quals">phred64 qualities</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
214 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
215 <conditional name="fastq">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
216 <param name="matepair" type="select" label="Library type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
217 <option value="single">Single End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
218 <option value="paired">Paired End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
219 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
220 <when value="single">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
221 <param name="singlefastq" type="data" format="fastq" label="FASTQ file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
222 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
223 <when value="paired">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
224 <param name="fastq1" type="data" format="fastq" label="Read 1 fastq file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
225 <param name="fastq2" type="data" format="fastq" label="Read 2 fastq file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
226 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
227 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
228 <expand macro="bowtie_options"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
229 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
230 <when value="fasta">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
231 <conditional name="fasta">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
232 <param name="matepair" type="select" label="Library Type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
233 <option value="single">Single End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
234 <option value="paired">Paired End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
235 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
236 <when value="single">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
237 <param name="singlefasta" type="data" format="fasta" label="fasta file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
238 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
239 <when value="paired">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
240 <param name="fasta1" type="data" format="fasta" label="Read 1 fasta file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
241 <param name="fasta2" type="data" format="fasta" label="Read 2 fasta file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
242 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
243 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
244 <expand macro="bowtie_options"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
245 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
246 <when value="sam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
247 <!-- convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam -->
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
248 <param name="matepair" type="select" label="Library Type">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
249 <option value="single">Single End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
250 <option value="paired">Paired End Reads</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
251 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
252 <param name="rsem_sam" type="data" format="rsem_sam" label="RSEM formatted SAM file" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
253 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
254 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
255 <expand macro="rsem_options"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
256 <conditional name="rsem_outputs">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
257 <param name="result_bams" type="select" label="Create bam results files"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
258 help="In addition to the transcript-coordinate-based BAM file output, also output a BAM file with the read alignments in genomic coordinates" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
259 <option value="none">No BAM results files</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
260 <option value="default" selected="true">Transcript BAM results file</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
261 <option value="both">Transcript and genome BAM results files</option>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
262 </param>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
263 <when value="none"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
264 <when value="default">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
265 <expand macro="sampling_for_bam"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
266 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
267 <when value="both">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
268 <expand macro="sampling_for_bam"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
269 </when>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
270 </conditional>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
271 </inputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
272 <stdio>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
273 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
274 </stdio>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
275 <outputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
276 <data format="tabular" name="gene_abundances" label="${sample}.gene_abundances" from_work_dir="rsem_output.genes.results"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
277 <data format="tabular" name="isoform_abundances" label="${sample}.isoform_abundances" from_work_dir="rsem_output.isoforms.results"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
278 <data format="bam" name="transcript_bam" label="${sample}.transcript.bam" from_work_dir="rsem_output.transcript.bam" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
279 <filter>rsem_outputs['result_bams'] != "none"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
280 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
281 <data format="bam" name="transcript_sorted_bam" label="${sample}.transcript.bam" from_work_dir="rsem_output.transcript.sorted.bam" >
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
282 <filter>rsem_outputs['result_bams'] != "none"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
283 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
284 <data format="bam" name="genome_bam" label="${sample}.genome.bam" from_work_dir="rsem_output.genome.bam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
285 <filter>rsem_outputs['result_bams'] == "both"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
286 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
287 <data format="bam" name="genome_sorted_bam" label="${sample}.genome.sorted.bam" from_work_dir="rsem_output.genome.sorted.bam">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
288 <filter>rsem_outputs['result_bams'] == "both"</filter>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
289 </data>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
290 <data format="txt" name="log" label="${sample}.rsem_log"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
291 </outputs>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
292 <tests>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
293 <test>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
294 <param name="sample" value="rsem_sample"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
295 <param name="refSrc" value="history"/>
2
5949673f9e3e rename test data RSEM_ref_reference.rsem_ref
Jim Johnson <jj@umn.edu>
parents: 0
diff changeset
296 <param name="rsem_ref" value="RSEM_ref_reference.rsem_ref" ftype="rsem_ref"/>
0
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
297 <param name="format" value="fastq"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
298 <param name="matepair" value="single"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
299 <param name="singlefastq" value="test.fastq" ftype="fastqsanger"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
300 <param name="result_bams" value="none"/>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
301 <output name="gene_abundances">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
302 <assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
303 <has_text text="ENST00000423562,ENST00000438504,ENST00000488147,ENST00000538476,ENST00000541675" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
304 </assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
305 </output>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
306 <output name="isoform_abundances">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
307 <assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
308 <has_text text="ENST00000332831" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
309 </assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
310 </output>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
311 <output name="log">
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
312 <assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
313 <has_text text="Expression Results are written" />
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
314 </assert_contents>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
315 </output>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
316 </test>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
317 </tests>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
318 <help>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
319
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
320
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
321 RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
322
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
323 NAME
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
324 rsem-calculate-expression
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
325
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
326 SYNOPSIS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
327 rsem-calculate-expression [options] upstream_read_file(s) reference_name sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
328 rsem-calculate-expression [options] --paired-end upstream_read_file/s downstream_read_file/s reference_name sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
329 rsem-calculate-expression [options] --sam/--bam [--paired-end] input reference_name sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
330
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
331 ARGUMENTS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
332 upstream_read_files/s
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
333 Comma-separated list of files containing single-end reads or
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
334 upstream reads for paired-end data. By default, these files are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
335 assumed to be in FASTQ format. If the --no-qualities option is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
336 specified, then FASTA format is expected.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
337
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
338 downstream_read_file/s
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
339 Comma-separated list of files containing downstream reads which are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
340 paired with the upstream reads. By default, these files are assumed
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
341 to be in FASTQ format. If the --no-qualities option is specified,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
342 then FASTA format is expected.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
343
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
344 input
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
345 SAM/BAM formatted input file. If "-" is specified for the filename,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
346 SAM/BAM input is instead assumed to come from standard input. RSEM
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
347 requires all alignments of the same read group together. For
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
348 paired-end reads, RSEM also requires the two mates of any alignment
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
349 be adjacent. See Description section for how to make input file obey
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
350 RSEM's requirements.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
351
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
352 reference_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
353 The name of the reference used. The user must have run
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
354 'rsem-prepare-reference' with this reference_name before running
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
355 this program.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
356
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
357 sample_name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
358 The name of the sample analyzed. All output files are prefixed by
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
359 this name (e.g., sample_name.genes.results)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
360
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
361 OPTIONS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
362
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
363 --paired-end
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
364 Input reads are paired-end reads. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
365
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
366 --no-qualities
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
367 Input reads do not contain quality scores. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
368
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
369 --strand-specific
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
370 The RNA-Seq protocol used to generate the reads is strand specific,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
371 i.e., all (upstream) reads are derived from the forward strand. This
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
372 option is equivalent to --forward-prob=1.0. With this option set, if
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
373 RSEM runs the Bowtie aligner, the '--norc' Bowtie option will be
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
374 used, which disables alignment to the reverse strand of transcripts.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
375 (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
376
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
377 --sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
378 Input file is in SAM format. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
379
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
380 --bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
381 Input file is in BAM format. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
382
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
383 --sam-header-info [file]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
384 RSEM reads header information from input by default. If this option
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
385 is on, header information is read from the specified file. For the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
386 format of the file, please see SAM official website. (Default: "")
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
387
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
388 -p/--num-threads [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
389 Number of threads to use. Both Bowtie and expression estimation will
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
390 use this many threads. (Default: 1)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
391
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
392 --no-bam-output
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
393 Do not output any BAM file. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
394
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
395 --output-genome-bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
396 Generate a BAM file, 'sample_name.genome.bam', with alignments
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
397 mapped to genomic coordinates and annotated with their posterior
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
398 probabilities. In addition, RSEM will call samtools (included in
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
399 RSEM package) to sort and index the bam file.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
400 'sample_name.genome.sorted.bam' and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
401 'sample_name.genome.sorted.bam.bai' will be generated. (Default:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
402 off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
403
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
404 --sampling-for-bam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
405 When RSEM generates a BAM file, instead of outputing all alignments
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
406 a read has with their posterior probabilities, one alignment is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
407 sampled and outputed according to the posterior probabilities. If
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
408 the sampling result is that the read comes from the "noise"
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
409 transcript, nothing is outputed. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
410
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
411 --calc-ci
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
412 Calculate 95% credibility intervals and posterior mean estimates.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
413 (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
414
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
415 --seed-length [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
416 Seed length used by the read aligner. Providing the correct value is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
417 important for RSEM. If RSEM runs Bowtie, it uses this value for
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
418 Bowtie's seed length parameter. Any read with its or at least one of
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
419 its mates' (for paired-end reads) length less than this value will
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
420 be ignored. If the references are not added poly(A) tails, the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
421 minimum allowed value is 5, otherwise, the minimum allowed value is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
422 25. Note that this script will only check if the value less or equal than
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
423 5 and give a warning message if the value less than 25 but greter or equal than
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
424 5. (Default: 25)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
425
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
426 --tag [string]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
427 The name of the optional field used in the SAM input for identifying
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
428 a read with too many valid alignments. The field should have the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
429 format [tagName]:i:[value], where a [value] bigger than 0 indicates
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
430 a read with too many alignments. (Default: "")
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
431
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
432 --bowtie-path [path]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
433 The path to the bowtie executables. (Default: the path to the bowtie
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
434 executables is assumed to be in the user's PATH environment
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
435 variable)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
436
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
437 --bowtie-n [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
438 (Bowtie parameter) max # of mismatches in the seed. (Range: 0-3,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
439 Default: 2)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
440
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
441 --bowtie-e [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
442 (Bowtie parameter) max sum of mismatch quality scores across the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
443 alignment. (Default: 99999999)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
444
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
445 --bowtie-m [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
446 (Bowtie parameter) suppress all alignments for a read if greater then [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
447 valid alignments exist. (Default: 200)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
448
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
449 --bowtie-chunkmbs [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
450 (Bowtie parameter) memory allocated for best first alignment
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
451 calculation (Default: 0 - use bowtie's default)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
452
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
453 --phred33-quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
454 Input quality scores are encoded as Phred+33. (Default: on)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
455
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
456 --phred64-quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
457 Input quality scores are encoded as Phred+64 (default for GA
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
458 Pipeline ver. less than 1.3). (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
459
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
460 --solexa-quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
461 Input quality scores are solexa encoded (from GA Pipeline ver. less
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
462 than 1.3). (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
463
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
464 --forward-prob [double]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
465 Probability of generating a read from the forward strand of a
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
466 transcript. Set to 1 for a strand-specific protocol where all
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
467 (upstream) reads are derived from the forward strand, 0 for a
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
468 strand-specific protocol where all (upstream) read are derived from
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
469 the reverse strand, or 0.5 for a non-strand-specific protocol.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
470 (Default: 0.5)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
471
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
472 --fragment-length-min [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
473 Minimum read/insert length allowed. This is also the value for the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
474 bowtie -I option. (Default: 1)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
475
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
476 --fragment-length-max [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
477 Maximum read/insert length allowed. This is also the value for the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
478 bowtie -X option. (Default: 1000)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
479
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
480 --fragment-length-mean [double]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
481 (single-end data only) The mean of the fragment length distribution,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
482 which is assumed to be a Gaussian. (Default: -1, which disables use
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
483 of the fragment length distribution)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
484
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
485 --fragment-length-sd [double]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
486 (single-end data only) The standard deviation of the fragment length
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
487 distribution, which is assumed to be a Gaussian. (Default: 0, which
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
488 assumes that all fragments are of the same length, given by the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
489 rounded value of --fragment-length-mean)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
490
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
491 --estimate-rspd
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
492 Set this option if you want to estimate the read start position
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
493 distribution (RSPD) from data. Otherwise, RSEM will use a uniform
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
494 RSPD. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
495
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
496 --num-rspd-bins [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
497 Number of bins in the RSPD. Only relevant when '--estimate-rspd' is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
498 specified. Use of the default setting is recommended. (Default: 20)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
499
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
500 --ci-memory [int]
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
501 Maximum size (in memory, MB) of the auxiliary buffer used for
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
502 computing credibility intervals (CI). Set it larger for a faster CI
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
503 calculation. However, leaving 2 GB memory free for other usage is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
504 recommended. (Default: 1024)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
505
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
506 --keep-intermediate-files
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
507 Keep temporary files generated by RSEM. RSEM creates a temporary
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
508 directory, 'sample_name.temp', into which it puts all intermediate
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
509 output files. If this directory already exists, RSEM overwrites all
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
510 files generated by previous RSEM runs inside of it. By default,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
511 after RSEM finishes, the temporary directory is deleted. Set this
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
512 option to prevent the deletion of this directory and the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
513 intermediate files inside of it. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
514
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
515 --time
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
516 Output time consumed by each step of RSEM to 'sample_name.time'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
517 (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
518
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
519 -q/--quiet
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
520 Suppress the output of logging information. (Default: off)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
521
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
522 -h/--help
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
523 Show help information.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
524
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
525 DESCRIPTION
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
526 In its default mode, this program aligns input reads against a reference
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
527 transcriptome with Bowtie and calculates expression values using the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
528 alignments. RSEM assumes the data are single-end reads with quality
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
529 scores, unless the '--paired-end' or '--no-qualities' options are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
530 specified. Users may use an alternative aligner by specifying one of the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
531 --sam and --bam options, and providing an alignment file in the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
532 specified format. However, users should make sure that they align
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
533 against the indices generated by 'rsem-prepare-reference' and the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
534 alignment file satisfies the requirements mentioned in ARGUMENTS
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
535 section.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
536
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
537 One simple way to make the alignment file satisfying RSEM's requirements
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
538 (assuming the aligner used put mates in a paired-end read adjacent) is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
539 to use 'convert-sam-for-rsem' script. This script only accept SAM format
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
540 files as input. If a BAM format file is obtained, please use samtools to
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
541 convert it to a SAM file first. For example, if '/ref/mouse_125' is the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
542 'reference_name' and the SAM file is named 'input.sam', you can run the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
543 following command:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
544
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
545 convert-sam-for-rsem /ref/mouse_125 input.sam -o input_for_rsem.sam
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
546
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
547 For details, please refer to 'convert-sam-for-rsem's documentation page.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
548
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
549 The SAM/BAM format RSEM uses is v1.4. However, it is compatible with old
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
550 SAM/BAM format. However, RSEM cannot recognize 0x100 in the FLAG field.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
551 In addition, RSEM requires SEQ and QUAL are not '*'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
552
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
553 The user must run 'rsem-prepare-reference' with the appropriate
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
554 reference before using this program.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
555
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
556 For single-end data, it is strongly recommended that the user provide
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
557 the fragment length distribution parameters (--fragment-length-mean and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
558 --fragment-length-sd). For paired-end data, RSEM will automatically
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
559 learn a fragment length distribution from the data.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
560
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
561 Please note that some of the default values for the Bowtie parameters
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
562 are not the same as those defined for Bowtie itself.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
563
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
564 The temporary directory and all intermediate files will be removed when
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
565 RSEM finishes unless '--keep-intermediate-files' is specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
566
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
567 With the '--calc-ci' option, 95% credibility intervals and posterior
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
568 mean estimates will be calculated in addition to maximum likelihood
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
569 estimates.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
570
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
571 OUTPUT
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
572 sample_name.genes.results
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
573 File containing gene level expression estimates. The format of each
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
574 line in this file is:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
575
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
576 gene_id expected_counts tau_value [pmc_value tau_pme_value
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
577 tau_ci_lower_bound tau_ci_upper_bound] transcript_id_list
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
578
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
579 Fields are separated by the tab character. Fields within "[]" are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
580 only presented if '--calc-ci' is set. pme stands for posterior mean
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
581 estimation. pmc stands for posterior mean counts. ci_lower_bound(l)
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
582 means the lower bound of the credibility intervals,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
583 ci_upper_bound(u) means the upper bound of the credibility
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
584 intervals. So the credibility interval is [l, u].
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
585 'transcript_id_list' is a space-separated list of transcript_ids
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
586 belonging to the gene. If no gene information is provided, this file
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
587 has the same content as 'sample_name.isoforms.results'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
588
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
589 sample_name.isoforms.results
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
590 File containing isoform level expression values. The format of each
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
591 line in this file is:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
592
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
593 transcript_id expected_counts tau_value [pmc_value tau_pme_value
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
594 tau_ci_lower_bound tau_ci_upper_bound] gene_id
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
595
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
596 Fields are separated by the tab character. 'gene_id' is the gene_id
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
597 of the gene which this transcript belongs to. If no gene information
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
598 is provided, 'gene_id' and 'transcript_id' are the same.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
599
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
600 sample_name.transcript.bam, sample_name.transcript.sorted.bam and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
601 sample_name.transcript.sorted.bam.bai
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
602 Only generated when --no-bam-output is not specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
603
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
604 'sample_name.transcript.bam' is a BAM-formatted file of read
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
605 alignments in transcript coordinates. The MAPQ field of each
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
606 alignment is set to min(100, floor(-10 * log10(1.0 - w) + 0.5)),
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
607 where w is the posterior probability of that alignment being the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
608 true mapping of a read. In addition, RSEM pads a new tag ZW:f:value,
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
609 where value is a single precision floating number representing the
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
610 posterior probability.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
611
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
612 'sample_name.transcript.sorted.bam' and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
613 'sample_name.transcript.sorted.bam.bai' are the sorted BAM file and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
614 indices generated by samtools (included in RSEM package).
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
615
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
616 sample_name.genome.bam, sample_name.genome.sorted.bam and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
617 sample_name.genome.sorted.bam.bai
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
618 Only generated when --no-bam-output is not specified and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
619 --output-genome-bam is specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
620
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
621 'sample_name.genome.bam' is a BAM-formatted file of read alignments
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
622 in genomic coordinates. Alignments of reads that have identical
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
623 genomic coordinates (i.e., alignments to different isoforms that
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
624 share the same genomic region) are collapsed into one alignment. The
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
625 MAPQ field of each alignment is set to min(100, floor(-10 *
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
626 log10(1.0 - w) + 0.5)), where w is the posterior probability of that
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
627 alignment being the true mapping of a read. In addition, RSEM pads a
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
628 new tag ZW:f:value, where value is a single precision floating
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
629 number representing the posterior probability. If an alignment is
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
630 spliced, a XS:A:value tag is also added, where value is either '+'
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
631 or '-' indicating the strand of the transcript it aligns to.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
632
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
633 'sample_name.genome.sorted.bam' and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
634 'sample_name.genome.sorted.bam.bai' are the sorted BAM file and
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
635 indices generated by samtools (included in RSEM package).
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
636
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
637 sample_name.sam.gz
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
638 Only generated when the input files are raw reads instead of SAM/BAM
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
639 format files
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
640
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
641 It is the gzipped SAM output produced by bowtie aligner.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
642
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
643 sample_name.time
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
644 Only generated when --time is specified.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
645
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
646 It contains time (in seconds) consumed by aligning reads, estimating
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
647 expression levels and calculating credibility intervals.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
648
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
649 sample_name.stat
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
650 This is a folder instead of a file. All model related statistics are
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
651 stored in this folder. Use 'rsem-plot-model' can generate plots
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
652 using this folder.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
653
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
654 EXAMPLES
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
655 Assume the path to the bowtie executables is in the user's PATH
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
656 environment variable. Reference files are under '/ref' with name
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
657 'mouse_125'.
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
658
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
659 1) '/data/mmliver.fq', single-end reads with quality scores. Quality
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
660 scores are encoded as for 'GA pipeline version >= 1.3'. We want to use 8
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
661 threads and generate a genome BAM file:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
662
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
663 rsem-calculate-expression --phred64-quals \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
664 -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
665 --output-genome-bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
666 /data/mmliver.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
667 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
668 mmliver_single_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
669
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
670 2) '/data/mmliver_1.fq' and '/data/mmliver_2.fq', paired-end reads with
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
671 quality scores. Quality scores are in SANGER format. We want to use 8
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
672 threads and do not generate a genome BAM file:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
673
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
674 rsem-calculate-expression -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
675 --paired-end \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
676 /data/mmliver_1.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
677 /data/mmliver_2.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
678 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
679 mmliver_paired_end_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
680
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
681 3) '/data/mmliver.fa', single-end reads without quality scores. We want
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
682 to use 8 threads:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
683
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
684 rsem-calculate-expression -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
685 --no-qualities \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
686 /data/mmliver.fa \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
687 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
688 mmliver_single_without_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
689
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
690 4) Data are the same as 1). We want to take a fragment length
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
691 distribution into consideration. We set the fragment length mean to 150
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
692 and the standard deviation to 35. In addition to a BAM file, we also
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
693 want to generate credibility intervals. We allow RSEM to use 1GB of
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
694 memory for CI calculation:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
695
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
696 rsem-calculate-expression --bowtie-path /sw/bowtie \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
697 --phred64-quals \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
698 --fragment-length-mean 150.0 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
699 --fragment-length-sd 35.0 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
700 -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
701 --output-genome-bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
702 --calc-ci \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
703 --ci-memory 1024 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
704 /data/mmliver.fq \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
705 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
706 mmliver_single_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
707
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
708 5) '/data/mmliver_paired_end_quals.bam', paired-end reads with quality
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
709 scores. We want to use 8 threads:
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
710
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
711 rsem-calculate-expression --paired-end \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
712 --bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
713 -p 8 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
714 /data/mmliver_paired_end_quals.bam \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
715 /ref/mouse_125 \
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
716 mmliver_paired_end_quals
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
717 </help>
ca988deacfd1 Uploaded
jjohnson
parents:
diff changeset
718 </tool>