Mercurial > repos > devteam > picard

<tool id="picard_artifact_metrics" name="Picard Collect Sequencing Artifact Metrics" version="@TOOL_VERSION@.@WRAPPER_VERSION@" profile="@PROFILE@">
    <description>Collect metrics to quantify single-base sequencing artifacts</description>
    <macros>
        <import>picard_macros.xml</import>
        <token name="@WRAPPER_VERSION@">0</token>
    </macros>
    <expand macro="requirements">
  </expand>
    <command detect_errors="exit_code"><![CDATA[
    @java_options@
    @symlink_element_identifier@

    #set $reference_fasta_filename = "localref.fa"
    @handle_reference_source@

    picard CollectSequencingArtifactMetrics
    --I '$escaped_element_identifier'
    --O 'OutPut'
    --R '${reference_fasta_filename}'
    --AS ${assume_sorted}
    --CONTEXT_SIZE ${context_size}
    --INCLUDE_DUPLICATES '${duplicates}'
    #if $contexts_to_print
        #for $context in str($contexts_to_print).split(','):
          --CONTEXTS_TO_PRINT '${context}'
        #end for;
    #end if;
    --MINIMUM_QUALITY_SCORE '${min_quality_score}'
    --INCLUDE_UNPAIRED '${unpaired}'
    --MAXIMUM_INSERT_SIZE '${max_size}'
    --MINIMUM_INSERT_SIZE '${min_size}'
    --MINIMUM_MAPPING_QUALITY '${minim_map_quality}'
    --VALIDATION_STRINGENCY '${validation_stringency}';
  ]]></command>
    <inputs>
        <param name="inputFile" format="sam,bam" type="data" label="SAM/BAM Input file"/>
        <conditional name="reference_source">
            <param name="reference_source_selector" type="select" label="Load reference genome from">
                <option value="cached">Local cache</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
                    <options from_data_table="picard_indexes">
                        <filter type="sort_by" column="2"/>
                        <validator type="no_options" message="No indexes are available"/>
                    </options>
                    <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
                </param>
            </when>
            <when value="history">
                <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command"/>
            </when>
        </conditional>
        <param name="context_size" value="1" type="integer" label="How many nucleotides (context) on each side to consider" help="e.g NXN corresponds to 1 &quot;N&quot; on each side of &quot;X&quot;"/>
        <param name="contexts_to_print" type="text" label="Contexts to print in the detailed tables, separated by comma (avoid spaces)" help="e.g. (for context size=1) AAA,AGG. Default prints all contexts">
            <sanitizer>
                <valid initial="string.letters">
                    <add value=","/>
                </valid>
            </sanitizer>
        </param>
        <param name="min_quality_score" type="integer" value="30" label="Minimum base quality score"/>
        <param name="max_size" type="integer" value="600" label="Maximum insert size"/>
        <param name="min_size" type="integer" value="30" label="Minimum insert size"/>
        <param name="minim_map_quality" type="integer" value="20" label="Minimum mapping quality"/>
        <param name="assume_sorted" type="boolean" label="Assume SAM/BAM is sorted"/>
        <param name="duplicates" type="boolean" label="Include duplicates" checked="false" truevalue="true" falsevalue="false"/>
        <param name="unpaired" type="boolean" label="Include unpaired reads" checked="false" truevalue="true" falsevalue="false"/>
        <expand macro="VS"/>
    </inputs>
    <outputs>
        <data name="pre_details" format="tabular" from_work_dir="OutPut.pre_adapter_detail_metrics" label="Detailed table for artifacts introduced prior to the addition of adapters"/>
        <data name="pre_summary" format="tabular" from_work_dir="OutPut.pre_adapter_summary_metrics" label="Summary table for artifacts introduced prior to the addition of adapters"/>
        <data name="pos_details" format="tabular" from_work_dir="OutPut.bait_bias_detail_metrics" label="Detailed table for artifacts introduced posterior to the addition of adapters"/>
        <data name="pos_summary" format="tabular" from_work_dir="OutPut.bait_bias_summary_metrics" label="Summary table for artifacts introduced posterior to the addition of adapters"/>
        <data name="err_summary" format="tabular" from_work_dir="OutPut.error_summary_metrics" label="General Summary of artifactual errors"/>
    </outputs>
    <tests>
        <test>
            <param name="inputFile" value="picard_ARRG_test1.bam" ftype="bam"/>
            <param name="reference_source_selector" value="history"/>
            <param name="ref_file" value="picard_BedToIntervalList_ref.fa"/>
            <param name="context_size" value="1"/>
            <param name="contexts_to_print" value="AAA,CAA,AAC,AAT,AAG"/>
            <output name="pre_details" file="pre_details" ftype="tabular" value="pre_detail" lines_diff="4"/>
            <output name="pre_summary" file="pre_summary" ftype="tabular" value="pre_summary" lines_diff="4"/>
            <output name="pos_details" file="pos_details" ftype="tabular" value="pos_detail" lines_diff="4"/>
            <output name="pos_summary" file="pos_summary" ftype="tabular" value="pos_summary" lines_diff="4"/>
            <output name="err_summary" file="err_summary" ftype="tabular" value="err_summary" lines_diff="4"/>
        </test>
        <test>
            <param name="inputFile" value="picard_ARRG_test1.bam" ftype="bam" dbkey="hg38"/>
            <param name="reference_source_selector" value="cached"/>
            <param name="context_size" value="1"/>
            <param name="contexts_to_print" value="AAA,CAA,AAC,AAT,AAG"/>
            <output name="pre_details" file="pre_details" ftype="tabular" value="pre_detail" lines_diff="4"/>
            <output name="pre_summary" file="pre_summary" ftype="tabular" value="pre_summary" lines_diff="4"/>
            <output name="pos_details" file="pos_details" ftype="tabular" value="pos_detail" lines_diff="4"/>
            <output name="pos_summary" file="pos_summary" ftype="tabular" value="pos_summary" lines_diff="4"/>
            <output name="err_summary" file="err_summary" ftype="tabular" value="err_summary" lines_diff="4"/>
        </test>
    </tests>
    <help>

.. class:: infomark

**Purpose**

Program to chart the distribution of potential sequencing "single nucleotide mutation" artifacts in a SAM or BAM file.

@dataset_collections@

@description@

  ASSUME_SORTED=Boolean           If true (default), then the sort order in the header file will be ignored.
                                  Default: True

  CONTEXT_SIZE=integer            The number of context bases to include on each side of the assayed base.

  CONTEXT_SIZE_TO_PRINT=String    If specified, only print results for these contexts in the detail metrics output.
                                  However, the summary metrics output will still take all contexts into consideration.

  DB_SNP=text file                VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis.

  INCLUDE_DUPLICATES=Boolean      Include duplicate reads. If set to true then all reads flagged as duplicates will be included as well.

  INCLUDE_UNPAIRED=Boolean        Include unpaired reads. If set to true then all paired reads will be included as well -
                                  MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored.

  MAXIMUM_INSERT_SIZE=Integer     The maximum insert size for a read to be included in analysis. Set to 0 to have no maximum.
                                  Default = 600

  MINIMUM_INSERT_SIZE=Integer     The minimum insert size for a read to be included in analysis. Default = 60

  MINIMUM_MAPPING_QUALITY         The minimum mapping quality score for a base to be included in analysis. Default = 30

  MINIMUM_QUALITY_SCORE           The minimum base quality score for a base to be included in analysis. Default = 20

@more_info@

  </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Sun, 03 Mar 2024 16:06:11 +0000
parents	b502c227b5e6
children