Mercurial > repos > devteam > picard
diff picard_FastqToSam.xml @ 5:3d4f1fa26f0e draft
Uploaded
author | devteam |
---|---|
date | Tue, 16 Dec 2014 19:03:21 -0500 |
parents | bf1c3f9f8282 |
children | 3a3234d7a2e8 |
line wrap: on
line diff
--- a/picard_FastqToSam.xml Fri Feb 21 12:07:49 2014 -0500 +++ b/picard_FastqToSam.xml Tue Dec 16 19:03:21 2014 -0500 @@ -1,145 +1,230 @@ -<tool id="picard_FastqToSam" name="FASTQ to BAM" version="1.56.0"> - <description>creates an unaligned BAM file</description> - <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> - <!-- Dan Blankenberg --> - <command>java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC - -jar "\$JAVA_JAR_PATH/FastqToSam.jar" - FASTQ="${input_fastq1}" - #if str( $input_fastq2) != "None": - FASTQ2="${input_fastq2}" +<tool name="FastqToSam" id="picard_FastqToSam" version="1.126.0"> + <description>convert Fastq data into unaligned BAM</description> + <requirements> + <requirement type="package" version="1.126.0">picard</requirement> + </requirements> + + <macros> + <import>picard_macros.xml</import> + </macros> + + <command> + @java_options@ + + java -jar \$JAVA_JAR_PATH/picard.jar + FastqToSam + + #if str( $input_type.input_type_selector ) == "se": + FASTQ="${input_type.fastq}" + #elif str( $input_type.input_type_selector ) == "pe": + FASTQ="${input_type.fastq}" + FASTQ2="${input_type.fastq2}" + #else + FASTQ="${input_type.fastq.forward}" + FASTQ2="${input_type.fastq.reverse}" #end if - QUALITY_FORMAT="${ dict( fastqsanger='Standard', fastqcssanger='Standard', fastqillumina='Illumina', fastqsolexa='Solexa' )[ $input_fastq1.ext ] }" ##Solexa, Illumina, Standard - OUTPUT="${output_bam}" + + QUALITY_FORMAT="${quality_format}" + OUTPUT="${outFile}" READ_GROUP_NAME="${read_group_name}" - SAMPLE_NAME="${sample_name}" - #if $param_type.param_type_selector == "advanced": - #if str( $param_type.library_name ) != "": - LIBRARY_NAME="${param_type.library_name}" - #end if - #if str( $param_type.platform_unit ) != "": - PLATFORM_UNIT="${param_type.platform_unit}" - #end if - #if str( $param_type.platform ) != "": - PLATFORM="${param_type.platform}" - #end if - #if str( $param_type.sequencing_center ) != "": - SEQUENCING_CENTER="${param_type.sequencing_center}" - #end if - #if str( $param_type.predicted_insert_size ) != "": - PREDICTED_INSERT_SIZE="${param_type.predicted_insert_size}" - #end if - #if str( $param_type.description.value ) != "": - DESCRIPTION="${param_type.description}" - #end if - #if str( $param_type.run_date ) != "": - RUN_DATE="${param_type.run_date}" - #end if - #if str( $param_type.min_q ) != "": - MIN_Q="${param_type.min_q}" - #end if - #if str( $param_type.max_q ) != "": - MAX_Q="${param_type.max_q}" - #end if - SORT_ORDER="${param_type.sort_order}" - #else: - SORT_ORDER=coordinate ##unsorted, queryname, coordinate; always use coordinate + SAMPLE_NAME="${sample_name}" + + #if str( $library_name ): + LIBRARY_NAME="${library_name}" + #end if + + #if str( $platform_unit ): + PLATFORM_UNIT="${platform_unit}" + #end if + + #if str( $platform ): + PLATFORM="${platform}" + #end if + + #if str( $sequencing_center ): + SEQUENCING_CENTER="${sequencing_center}" + #end if + + #if str( $predicted_insert_size ): + PREDICTED_INSERT_SIZE="${predicted_insert_size}" #end if - 2>&1 - || echo "Error running Picard FastqToSAM" >&2 + + #if str( $comment ): + COMMENT="${comment}" + #end if + + #if str( $description ): + DESCRIPTION="${description}" + #end if + + #if str( $run_date ): + RUN_DATE="${run_date}" + #end if + + MIN_Q="${min_q}" + MAX_Q="${max_q}" + STRIP_UNPAIRED_MATE_NUMBER="${strip_unpairied_mate_number}" + ALLOW_AND_IGNORE_EMPTY_LINES="${allow_and_ignore_empty_lines}" + + SORT_ORDER=coordinate + VALIDATION_STRINGENCY="${validation_stringency}" + QUIET=true + VERBOSITY=ERROR + </command> <inputs> - <param name="input_fastq1" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" label="FASTQ file" /> <!-- confirm that fastqcssanger also works --> - <param name="input_fastq2" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" optional="True" label="Second FASTQ of paired end data" help="Only needed when using paired end data." > - <options options_filter_attribute="ext" from_parameter="tool.app.datatypes_registry.datatypes_by_extension" transform_lines="obj.keys()"> - <column name="name" index="0"/> - <column name="value" index="0"/> - <filter type="param_value" ref="input_fastq1" ref_attribute="ext" column="0"/> - </options> - </param> - <param name="read_group_name" type="text" value="A" label="Read Group Name" /> - <param name="sample_name" type="text" value="unknown sample" label="Sample Name" /> - <conditional name="param_type"> - <param name="param_type_selector" type="select" label="Basic or Advanced options"> - <option value="basic" selected="True">Basic</option> - <option value="advanced">Advanced</option> + <conditional name="input_type"> + <param name="input_type_selector" type="select" label="What is your input data" help="Select between single end, paired end, and collections. See help below for full explanation of dataset types"> + <option value="se">Single end (single dataset)</option> + <option value="pe">Paired end (two datasets)</option> + <option value="pc">Paired collection</option> </param> - <when value="basic"> - <!-- Do nothing here --> + <when value="se"> + <param name="fastq" type="data" format="fastq" label="Input fastq file for single end data" help="FASTQ"/> </when> - <when value="advanced"> - <param name="library_name" type="text" value="" label="Library Name" /> - <param name="platform_unit" type="text" value="" label="Platform Unit" /> - <param name="platform" type="text" value="" label="Platform" /> - <param name="sequencing_center" type="text" value="" label="Sequencing Center" /> - <param name="predicted_insert_size" type="integer" value="" optional="True" label="Predicted Insert Size" /> - <param name="description" type="text" value="" label="Description" /> - <param name="run_date" type="text" value="" label="Run Date" /> - <param name="min_q" type="integer" optional="True" value="0" label="Min Q" /> - <param name="max_q" type="integer" optional="True" value="93" label="Max Q" /> - <param name="sort_order" type="select" label="Sort order"> - <option value="coordinate" selected="True">coordinate</option> - <option value="queryname">queryname</option> - <option value="unsorted">unsorted</option> - </param> + <when value="pe"> + <param name="fastq" type="data" format="fastq" label="Input fastq file for the first read in paired end data" help="FASTQ"/> + <param name="fastq2" type="data" format="fastq" label="Input fastq file for the second read of paired end data" help="FASTQ2"/> + </when> + <when value="pc"> + <param name="fastq" type="data_collection" collection_type="paired" label="FASTQ paired dataset collection" help="FASTQ and FASTQ2; A collection of two datasets with forward and reverse reads. See help below on explanation of dataset collections"/> </when> </conditional> - </inputs> + + <param name="quality_format" type="select" label="Select quality encoding scheme" help="QUALITY_FORMAT"> + <option value="Standard" selected="True">Sanger (+33)</option> + <option value="Illumina">Illumina (+64)</option> + <option value="Solexa">Solexa (+66)</option> + </param> + + <param name="read_group_name" type="text" size="20" value="A" label="Read group name" help="READ_GROUP_NAME"/> + <param name="sample_name" type="text" size="20" value="sample-a" label="Sample name" help="SAMPLE_NAME"/> + <param name="library_name" type="text" size="20" optional="True" label="The library name" help="LIBRARY_NAME; Optional"/> + <param name="platform_unit" type="text" size="20" optional="True" label="The platform unit (often run_barcode.lane)" help="PLATFORM_UNIT; Optional"/> + <param name="platform" type="text" size="20" optional="True" label="The platform type (e.g. illumina, 454)" help="PLATFORM; Optional"/> + <param name="sequencing_center" type="text" size="20" optional="True" label="The sequencing center from which the data originated" help="SEQUENCING_CENTER; Optional"/> + + <param name="predicted_insert_size" type="integer" min="0" max="100000" optional="True" label="Predicted median insert size, to insert into the read group header" help="PREDICTED_INSERT_SIZE; Optional"/> + <param name="comment" type="text" size="20" optional="True" label="Comment to include in the output dataset's header" help="COMMENT; Optional"/> + <param name="description" type="text" size="20" optional="True" label="Optional description information" help="DESCRIPTION; Optional"/> + <param name="run_date" optional="True" type="text" label="Run date" help="RGDT; Optional; Format=YYYY-MM-DD (eg 1997-07-16)"/> + <param name="min_q" type="integer" value="0" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MIN_Q; An exception will be thrown if a quality is less than this value; default=0"/> + <param name="max_q" type="integer" value="93" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MAX_Q; An exception will be thrown if a quality is greater than this value; default=93"/> + <param name="strip_unpairied_mate_number" type="boolean" truevalue="true" falsevalue="false" label="If true and this is an unpaired fastq any occurance of '/1' will be removed from the end of a read name" help="STRIP_UNPAIRED_MATE_NUMBER; default=false"/> + <param name="allow_and_ignore_empty_lines" type="boolean" truevalue="true" falsevalue="false" label="Allow (and ignore) empty lines" help="ALLOW_AND_IGNORE_EMPTY_LINES; default=false"/> + + <expand macro="VS" /> + + </inputs> + <outputs> - <data format="bam" name="output_bam" /> + <data format="bam" name="outFile" label="${tool.name} on ${on_string}: reads as unaligned BAM"/> </outputs> + <tests> - <test> - <param name="input_fastq1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" /> - <param name="input_fastq2" /> - <param name="read_group_name" value="A" /> - <param name="sample_name" value="unknown sample" /> - <param name="param_type_selector" value="basic" /> - <output name="output_bam" file="picard_fastq_to_sam_out1.bam" ftype="bam"/> - </test> - <test> - <param name="input_fastq1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" /> - <param name="input_fastq2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" /> - <param name="read_group_name" value="A" /> - <param name="sample_name" value="unknown sample" /> - <param name="param_type_selector" value="basic" /> - <output name="output_bam" file="picard_fastq_to_sam_out2.bam" ftype="bam"/> - </test> + <test> + <param name="input_type_selector" value="pe" /> + <param name="quality_format" value="Standard" /> + <param name="read_group_name" value="A" /> + <param name="sample_name" value="sample-a" /> + <param name="library_name" value="A"/> + <param name="platform_unit" value="A"/> + <param name="platform" value="Illumina"/> + <param name="sequencing_center" value="A"/> + <param name="predicted_insert_size" value="300"/> + <param name="comment" value="A"/> + <param name="description" value="A"/> + <param name="run_date" value="2014-10-10"/> + <param name="min_q" value="0" /> + <param name="max_q" value="93" /> + <param name="strip_unpairied_mate_number" value="False" /> + <param name="allow_and_ignore_empty_lines" value="False" /> + <param name="validation_stringency" value="LENIENT"/> + <param name="fastq" value="picard_FastqToSam_read1.fq" ftype="fastq" /> + <param name="fastq2" value="picard_FastqToSam_read2.fq" ftype="fastq" /> + <output name="outFile" file="picard_FastqToSam_test1.bam" ftype="bam" lines_diff="4"/> + </test> </tests> + + <stdio> + <exit_code range="1:" level="fatal"/> + </stdio> + <help> -**What it does** + +.. class:: infomark + +**Purpose** + +Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments. + +@dataset_collections@ + +@RG@ + +@description@ -Picard: FastqToSam converts FASTQ files to unaligned BAM files. + FASTQ=File + F1=File Input fastq file for single end data, or first read in paired end + data. Required. + + FASTQ2=File + F2=File Input fastq file for the second read of paired end data (if used). + + QUALITY_FORMAT=FastqQualityFormat + V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either Solexa for + pre-pipeline 1.3 style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above + (phred scaling + 64) or Standard for phred scaled scores with a character shift of 33. + If this value is not specified, the quality format will be detected automatically. + Default value: null. Possible values: {Solexa, Illumina, Standard} ------- + READ_GROUP_NAME=String + RG=String Read group name Default value: A. + + SAMPLE_NAME=String + SM=String Sample name to insert into the read group header Required. + + LIBRARY_NAME=String + LB=String The library name to place into the LB attribute in the read group header. + + PLATFORM_UNIT=String + PU=String The platform unit (often run_barcode.lane) to insert into the read group header. + + PLATFORM=String + PL=String The platform type (e.g. illumina, solid) to insert into the read group header. + + SEQUENCING_CENTER=String + CN=String The sequencing center from which the data originated. + + PREDICTED_INSERT_SIZE=Integer + PI=Integer Predicted median insert size, to insert into the read group header. + + COMMENT=String + CO=String Comment to include in the merged output file's header. + + DESCRIPTION=String + DS=String Inserted into the read group header. + + RUN_DATE=Iso8601Date + DT=Iso8601Date Date the run was produced, to insert into the read group header. + + MIN_Q=Integer Minimum quality allowed in the input fastq. An exception will be thrown if a quality is + less than this value. Default value: 0. + + MAX_Q=Integer Maximum quality allowed in the input fastq. An exception will be thrown if a quality is + greater than this value. Default value: 93. + + STRIP_UNPAIRED_MATE_NUMBER=Boolean + If true and this is an unpaired fastq any occurance of '/1' will be removed from the end + of a read name. Default value: false. Possible values: {true, false} + + ALLOW_AND_IGNORE_EMPTY_LINES=Boolean + Allow (and ignore) empty lines Default value: false. Possible values: {true, false} + -Please cite the website "http://picard.sourceforge.net". +@more_info@ ------- + </help> +</tool> -**Input formats** - -FastqToSam accepts FASTQ input files. If using paired-end data, you should select two FASTQ files. - ------- - -**Outputs** - -The output is in BAM format, see http://samtools.sourceforge.net for more details. - -------- - -**FastqToSam settings** - -This is list of FastqToSam options:: - - READ_GROUP_NAME=String Read group name Default value: A. This option can be set to 'null' to clear the default value. - SAMPLE_NAME=String Sample name to insert into the read group header Required. - LIBRARY_NAME=String The library name to place into the LB attribute in the read group header Default value: null. - PLATFORM_UNIT=String The platform unit (often run_barcode.lane) to insert into the read group header Default value: null. - PLATFORM=String The platform type (e.g. illumina, solid) to insert into the read group header Default value: null. - SEQUENCING_CENTER=String The sequencing center from which the data originated Default value: null. - PREDICTED_INSERT_SIZE=Integer Predicted median insert size, to insert into the read group header Default value: null. - DESCRIPTION=String Inserted into the read group header Default value: null. - </help> -</tool>