Mercurial > repos > avowinkel > picard
diff picard_SamToFastq.xml @ 0:5166ed57b1c4 draft
Uploaded version 1.135
author | avowinkel |
---|---|
date | Mon, 06 Jul 2015 14:46:32 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_SamToFastq.xml Mon Jul 06 14:46:32 2015 -0400 @@ -0,0 +1,196 @@ +<tool name="SamToFastq" id="picard_SamToFastq" version="1.135"> + <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description> + <macros> + <import>picard_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + + echo "BAM" > $report && ## This is necessary for output dataset detection (see output tags below) + + @java_options@ + + java -jar \$JAVA_JAR_PATH/picard.jar + SamToFastq + + INPUT="${inputFile}" + + #if str( $output_per_rg ) == "true": + OUTPUT_PER_RG=true + OUTPUT_DIR=. + #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false": + FASTQ=READ1.fastq + SECOND_END_FASTQ=READ2.fastq + UNPAIRED_FASTQ=UNPAIRED_READS.fastq + #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true": + FASTQ=INTERLEAVED.fastq + #end if + + RE_REVERSE="${re_reverse}" + INTERLEAVE="${interleave}" + INCLUDE_NON_PF_READS="${include_non_pf_reads}" + CLIPPING_ATTRIBUTE="${clipping_attribute}" + CLIPPING_ACTION="${clipping_action}" + READ1_TRIM="${read1_trim}" + + #if int($read1_max_bases_to_write) > -1: + READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}" + #end if + + READ2_TRIM="${read2_trim}" + + #if int($read2_max_bases_to_write) > -1: + READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}" + #end if + + INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}" + + + VALIDATION_STRINGENCY="${validation_stringency}" + QUIET=true + VERBOSITY=ERROR + + </command> + <inputs> + + <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/> + <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/> + <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/> + <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/> + <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/> + <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/> + <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/> + <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/> + <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/> + <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/> + <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/> + <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/> + + <expand macro="VS" /> + + </inputs> + + <outputs> + <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files --> + <data format="txt" name="report" label="SamToFastq run" hidden="true"> + <discover_datasets pattern="(?P<designation>.+)\.fastq" ext="fastqsanger" visible="true"/> + </data> + </outputs> + + <tests> + <test> + <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/> + <param name="output_per_rg" value="false"/> + <param name="re_reverse" value="true"/> + <param name="interleave" value="true"/> + <param name="include_non_pf_reads" value="false"/> + <param name="clipping_attribute" value="null" /> + <param name="clipping_action" value="null" /> + <param name="read1_trim" value="0" /> + <param name="read1_max_bases_to_write" value="-1"/> + <param name="read2_trim" value="0" /> + <param name="read2_max_bases_to_write" value="-1"/> + <param name="include_non_primary_alignments" value="false"/> + <output name="report"> + <assert_contents> + <has_line line="BAM" /> + </assert_contents> + <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/> + </output> + </test> + </tests> + + <stdio> + <exit_code range="1:" level="fatal"/> + </stdio> + + <help> + +**Purpose** + +Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer. + +----- + +.. class:: warningmark + +**DANGER: Multiple Outputs** + +Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing! + +@dataset_collections@ + +@description@ + + FASTQ=File + F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq). + Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) + + SECOND_END_FASTQ=File + F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null. + Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) + + UNPAIRED_FASTQ=File + FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default + value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG) + + OUTPUT_PER_RG=Boolean + OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is + paired). Default value: false. Possible values: {true, false} Cannot be used in + conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F) + + OUTPUT_DIR=File + ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true. + Default value: null. + + RE_REVERSE=Boolean + RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them + to fastq Default value: true. Possible values: {true, false} + + INTERLEAVE=Boolean + INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe + which end it came from Default value: false. Possible values: {true, false} + + INCLUDE_NON_PF_READS=Boolean + NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes + filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads. + Default value: false. Possible values: {true, false} + + CLIPPING_ATTRIBUTE=String + CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default + value: null. + + CLIPPING_ACTION=String + CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities + should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in + the clipped region; and any integer means that the base qualities should be set to that + value in the clipped region. Default value: null. + + READ1_TRIM=Integer + R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0. + + READ1_MAX_BASES_TO_WRITE=Integer + R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than + this many bases left after trimming, all will be written. If this value is null then all + bases left after trimming will be written. Default value: null. + + READ2_TRIM=Integer + R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0. + + READ2_MAX_BASES_TO_WRITE=Integer + R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than + this many bases left after trimming, all will be written. If this value is null then all + bases left after trimming will be written. Default value: null. + + INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean + If true, include non-primary alignments in the output. Support of non-primary alignments + in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and + there are paired reads with non-primary alignments. Default value: false. + Possible values: {true, false} + +@more_info@ + + </help> +</tool> + +