Mercurial > repos > avowinkel > picard

diff picard_SamToFastq.xml @ 0:5166ed57b1c4 draft
Uploaded version 1.135
author: avowinkel
date: Mon, 06 Jul 2015 14:46:32 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/picard_SamToFastq.xml	Mon Jul 06 14:46:32 2015 -0400
@@ -0,0 +1,196 @@
+<tool name="SamToFastq" id="picard_SamToFastq" version="1.135">
+  <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description>
+  <macros>
+    <import>picard_macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <command>
+     
+    echo "BAM" > $report &amp;&amp;    ## This is necessary for output dataset detection (see output tags below)
+    
+    @java_options@
+    
+    java -jar \$JAVA_JAR_PATH/picard.jar
+    SamToFastq
+    
+    INPUT="${inputFile}"
+    
+    #if str( $output_per_rg ) == "true":
+      OUTPUT_PER_RG=true
+      OUTPUT_DIR=.
+    #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false":
+      FASTQ=READ1.fastq
+      SECOND_END_FASTQ=READ2.fastq
+      UNPAIRED_FASTQ=UNPAIRED_READS.fastq
+    #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true":
+      FASTQ=INTERLEAVED.fastq
+    #end if
+  
+    RE_REVERSE="${re_reverse}"
+    INTERLEAVE="${interleave}"
+    INCLUDE_NON_PF_READS="${include_non_pf_reads}"
+    CLIPPING_ATTRIBUTE="${clipping_attribute}"
+    CLIPPING_ACTION="${clipping_action}"
+    READ1_TRIM="${read1_trim}"
+    
+    #if int($read1_max_bases_to_write) > -1:
+      READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}"
+    #end if
+    
+    READ2_TRIM="${read2_trim}"
+    
+    #if int($read2_max_bases_to_write) > -1:
+      READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}"
+    #end if
+    
+    INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}"
+    
+    
+    VALIDATION_STRINGENCY="${validation_stringency}"
+    QUIET=true
+    VERBOSITY=ERROR
+  
+  </command>
+  <inputs>
+    
+    <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
+    <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/>
+    <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/>
+    <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/>
+    <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/>
+    <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/>
+    <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/>
+    <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/>
+    <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
+    <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/>
+    <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
+    <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/>
+    
+    <expand macro="VS" />
+    
+  </inputs> 
+  
+  <outputs>
+    <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files -->
+    <data format="txt" name="report" label="SamToFastq run" hidden="true">
+      <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fastq" ext="fastqsanger" visible="true"/>
+    </data>
+  </outputs>
+  
+  <tests>
+    <test>
+      <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/>
+      <param name="output_per_rg" value="false"/>
+      <param name="re_reverse" value="true"/>
+      <param name="interleave" value="true"/>
+      <param name="include_non_pf_reads" value="false"/>
+      <param name="clipping_attribute" value="null" />
+      <param name="clipping_action" value="null" />
+      <param name="read1_trim" value="0" />
+      <param name="read1_max_bases_to_write" value="-1"/>
+      <param name="read2_trim" value="0" />
+      <param name="read2_max_bases_to_write" value="-1"/>
+      <param name="include_non_primary_alignments" value="false"/>   
+      <output name="report">
+        <assert_contents>
+          <has_line line="BAM" />
+        </assert_contents>
+        <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/>
+      </output>
+    </test>
+  </tests>
+  
+  <stdio>
+    <exit_code range="1:"  level="fatal"/>
+  </stdio>
+  
+  <help>
+
+**Purpose**
+
+Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer.
+
+-----
+
+.. class:: warningmark
+
+**DANGER: Multiple Outputs**
+
+Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing!
+
+@dataset_collections@
+
+@description@
+
+  FASTQ=File
+  F=File                        Output fastq file (single-end fastq or, if paired, first end of the pair fastq).  
+                                Required.  Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
+  
+  SECOND_END_FASTQ=File
+  F2=File                       Output fastq file (if paired, second end of the pair fastq).  Default value: null.  
+                                Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
+  
+  UNPAIRED_FASTQ=File
+  FU=File                       Output fastq file for unpaired reads; may only be provided in paired-fastq mode  Default 
+                                value: null.  Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
+  
+  OUTPUT_PER_RG=Boolean
+  OPRG=Boolean                  Output a fastq file per read group (two fastq files per read group if the group is 
+                                paired).  Default value: false. Possible values: {true, false}  Cannot be used in
+                                conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F)
+  
+  OUTPUT_DIR=File
+  ODIR=File                     Directory in which to output the fastq file(s).  Used only when OUTPUT_PER_RG is true.  
+                                Default value: null. 
+  
+  RE_REVERSE=Boolean
+  RC=Boolean                    Re-reverse bases and qualities of reads with negative strand flag set before writing them 
+                                to fastq  Default value: true. Possible values: {true, false} 
+  
+  INTERLEAVE=Boolean
+  INTER=Boolean                 Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe 
+                                which end it came from  Default value: false. Possible values: {true, false} 
+  
+  INCLUDE_NON_PF_READS=Boolean
+  NON_PF=Boolean                Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes 
+                                filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads.  
+                                Default value: false. Possible values: {true, false} 
+  
+  CLIPPING_ATTRIBUTE=String
+  CLIP_ATTR=String              The attribute that stores the position at which the SAM record should be clipped  Default 
+                                value: null. 
+  
+  CLIPPING_ACTION=String
+  CLIP_ACT=String               The action that should be taken with clipped reads: 'X' means the reads and qualities 
+                                should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in 
+                                the clipped region; and any integer means that the base qualities should be set to that 
+                                value in the clipped region.  Default value: null. 
+  
+  READ1_TRIM=Integer
+  R1_TRIM=Integer               The number of bases to trim from the beginning of read 1.  Default value: 0. 
+  
+  READ1_MAX_BASES_TO_WRITE=Integer
+  R1_MAX_BASES=Integer          The maximum number of bases to write from read 1 after trimming. If there are fewer than 
+                                this many bases left after trimming, all will be written.  If this value is null then all 
+                                bases left after trimming will be written.  Default value: null. 
+  
+  READ2_TRIM=Integer
+  R2_TRIM=Integer               The number of bases to trim from the beginning of read 2.  Default value: 0. 
+  
+  READ2_MAX_BASES_TO_WRITE=Integer
+  R2_MAX_BASES=Integer          The maximum number of bases to write from read 2 after trimming. If there are fewer than 
+                                this many bases left after trimming, all will be written.  If this value is null then all 
+                                bases left after trimming will be written.  Default value: null. 
+  
+  INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean
+                                If true, include non-primary alignments in the output.  Support of non-primary alignments 
+                                in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and 
+                                there are paired reads with non-primary alignments.  Default value: false.
+                                Possible values: {true, false} 
+  
+@more_info@
+
+  </help>
+</tool>
+
+
author	avowinkel
date	Mon, 06 Jul 2015 14:46:32 -0400
parents
children