picard: picard_CollectRnaSeqMetrics.xml comparison

comparison picard_CollectRnaSeqMetrics.xml @ 0:5166ed57b1c4 draft

Uploaded version 1.135

author	avowinkel
date	Mon, 06 Jul 2015 14:46:32 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:5166ed57b1c4
+<tool name="CollectRnaSeqMetrics" id="picard_CollectRnaSeqMetrics" version="1.135">
+<description> collect metrics about the alignment of RNA to various functional classes of loci in the genome</description>
+<macros>
+<import>picard_macros.xml</import>
+</macros>
+<expand macro="requirements">
+<requirement type="package" version="3.1.2">R</requirement>
+</expand>
+<command>
+## Set up input files
+## Reference sequences
+#set $reference_fasta_filename = "localref.fa"
+#if str( $reference_source.reference_source_selector ) == "history":
+ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
+#else:
+#set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+#end if
+## refFlat data
+## The awk line below converts a file obtained from UCSC as specified in the tool help to refFlat format
+grep -v '^#' ${refFlat} | awk '{print $11"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10}' > refFlat.tab &amp;&amp;
+## Start picard command
+@java_options@
+java -jar \$JAVA_JAR_PATH/picard.jar
+CollectRnaSeqMetrics
+REF_FLAT=refFlat.tab
+#if str( $ribosomal_intervals ) != "None":
+	 RIBOSOMAL_INTERVALS="${ribosomal_intervals}"
+#end if
+STRAND_SPECIFICITY="${strand_specificity}"
+MINIMUM_LENGTH="${minimum_length}"
+CHART_OUTPUT="${pdfFile}"
+#for $sequence_to_ignore in $ignore_list:
+	 IGNORE_SEQUENCE="${sequence_to_ignore.sequence}"
+#end for
+RRNA_FRAGMENT_PERCENTAGE="${rrna_fragment_percentage}"
+METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
+INPUT="${inputFile}"
+OUTPUT="${outFile}"
+REFERENCE_SEQUENCE="${reference_fasta_filename}"
+ASSUME_SORTED="${assume_sorted}"
+QUIET=true
+VERBOSITY=ERROR
+VALIDATION_STRINGENCY=${validation_stringency}
+</command>
+<inputs>
+<param format="sam,bam" type="data" name="inputFile" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" />
+<conditional name="reference_source">
+	 <param name="reference_source_selector" type="select" label="Load reference genome from">
+	    <option value="cached">Local cache</option>
+	    <option value="history">History</option>
+	 </param>
+	 <when value="cached">
+	    <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE">
+	       <options from_data_table="all_fasta"></options>
+	       <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+	    </param>
+	 </when>
+	 <when value="history">
+	    <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
+	 </when>
+</conditional>
+<param format="tabular" name="refFlat" type="data" label="Gene annotations in refFlat form" help="See &quot;Obtaining gene annotations in refFlat format&quot; below for help" />
+<param name="ribosomal_intervals" format="picard_interval_list" type="data" optional="True" label="Location of rRNA sequences in genome, in interval_list format" help="RIBOSOMAL_INTERVALS; If not specified no bases will be identified as being ribosomal. The list of intervals can be geberated from BED or Interval datasets using Galaxy BedToIntervalList tool"/>
+<param name="strand_specificity" type="select" label="What is the RNA-seq library strand specificity" help="STRAND_SPECIFICITY; For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand.">
+	 <option value="NONE" select="True">None</option>
+	 <option value="FIRST_READ_TRANSCRIPTION_STRAND">First read transcription strand</option>
+	 <option value="SECOND_READ_TRANSCRIPTION_STRAND">Second read transcription strand</option>
+</param>
+<param name="minimum_length" type="integer" value="500" label="When calculating coverage based values use only use transcripts of this length or greater" help="MINIMUM_LENGTH; default=500"/>
+<repeat name="ignore_list" title="Sequences to ignore" min="0" help="You can provide multiple sequences by clicking the button below">
+<param name="sequence" type="text" size="80" label="Ignore reads matching this sequence"/>
+</repeat>
+<param name="rrna_fragment_percentage" type="float" value="0.8" label="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair to be considered rRNA." help="RRNA_FRAGMENT_PERCENTAGE; default=0.8"/>
+<param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL">
+	 <option value="ALL_READS" selected="True">All reads</option>
+	 <option value="SAMPLE">Sample</option>
+	 <option value="LIBRARY">Library</option>
+	 <option value="READ_GROUP">Read group</option>
+</param>
+<param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/>
+<expand macro="VS" />
+</inputs>
+<outputs>
+<data format="pdf" name="pdfFile" label="${tool.name} on ${on_string}: Chart PDF"/>
+<data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Summary stats"/>
+</outputs>
+<stdio>
+<exit_code range="1:"  level="fatal"/>
+</stdio>
+<tests>
+<test>
+<param name="reference_source_selector" value="history"/>
+<param name="ref_file" value="picard_CollectRnaSeqMetrics_ref.fa" ftype="fasta"/>
+<param name="inputFile" value="picard_CollectRnaSeqMetrics.bam" ftype="bam"/>
+<param name="assume_sorted" value="true" />
+<param name="refFlat" value="picard_CollectRnaSeqMetrics.refFlat" />
+<param name="metric_accumulation_level" value="ALL_READS" />
+<param name="minimum_length" value="500" />
+<param name="strand_specificity" value="NONE" />
+<param name="rrna_fragment_percentage" value="0.8" />
+<output name="outFile" file="picard_CollectRnaSeqMetrics_test1.tab" ftype="tabular" lines_diff="4"/>
+</test>
+</tests>
+<help>
+.. class:: infomark
+**Purpose**
+Collects metrics about the alignment of RNA to various functional classes of loci in the genome: coding, intronic, UTR, intergenic, ribosomal.
+@dataset_collections@
+-----
+.. class:: warningmark
+**Obtaining gene annotations in refFlat format**
+This tool requires gene annotations in refFlat_ format. These data can be obtained from UCSC table browser directly through Galaxy by following these steps:
+1. Click on **Get Data** in the upper part of left pane of Galaxy interface
+2. Click on **UCSC Main** link
+3. Set your genome and dataset of interest. It **must** be the same genome build against which you have mapped the reads contained in the BAM file you are analyzing
+4. In the **output format** field choose **selected fields from primary and related tables**
+5. Click **get output** button
+6. In the first table presented at the top of the page select (using checkboxes) first 11 fields:
+name
+chrom
+strand
+txStart
+txEnd
+cdsStart
+cdsEnd
+exonCount
+exonStarts
+exonEnds
+proteinId
+7. Click **done with selection**
+8. Click **Send query to Galaxy**
+9. A new dataset will appear in the current Galaxy history
+10. Use this dataset as the input for **Gene annotations in refFlat form** dropdown of this tool
+.. _refFlat: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat
+@description@
+REF_FLAT=File                 Gene annotations in refFlat form.  Format described here:
+http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat  Required.
+RIBOSOMAL_INTERVALS=File      Location of rRNA sequences in genome, in interval_list format.  If not specified no bases
+will be identified as being ribosomal. Format described here:
+http://picard.sourceforge.net/javadoc/net/sf/picard/util/IntervalList.html  and can be
+				 generated from BED datasetes using Galaxy's wrapper for picard_BedToIntervalList tool
+STRAND_SPECIFICITY=StrandSpecificity
+STRAND=StrandSpecificity      For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND
+if the reads are expected to be on the transcription strand.  Required. Possible values:
+{NONE, FIRST_READ_TRANSCRIPTION_STRAND, SECOND_READ_TRANSCRIPTION_STRAND}
+MINIMUM_LENGTH=Integer        When calculating coverage based values (e.g. CV of coverage) only use transcripts of this
+length or greater.  Default value: 500.
+IGNORE_SEQUENCE=String        If a read maps to a sequence specified with this option, all the bases in the read are
+counted as ignored bases.
+RRNA_FRAGMENT_PERCENTAGE=Double
+This percentage of the length of a fragment must overlap one of the ribosomal intervals
+for a read or read pair by this must in order to be considered rRNA.  Default value: 0.8.
+METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
+LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics.    Possible values: {ALL_READS, SAMPLE,
+LIBRARY, READ_GROUP} This option may be specified 0 or more times.
+ASSUME_SORTED=Boolean
+AS=Boolean                    If true (default), then the sort order in the header file will be ignored.  Default
+value: true. Possible values: {true, false}
+@more_info@
+</help>
+</tool>

Mercurial > repos > avowinkel > picard

comparison picard_CollectRnaSeqMetrics.xml @ 0:5166ed57b1c4 draft