view picard_CollectSequencingArtifactsMetrics.xml @ 31:585027e65f3b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/picard commit 70d2a66c405be58d4413753792bcadf212a4da84
author iuc
date Sat, 25 Feb 2023 20:33:49 +0000
parents b502c227b5e6
children 3f254c5ced1d
line wrap: on
line source

<tool id='picard_artifact_metrics' name='Picard Collect Sequencing Artifact Metrics' version="@TOOL_VERSION@.@WRAPPER_VERSION@">
  <description>Collect metrics to quantify single-base sequencing artifacts</description>
  <macros>
    <import>picard_macros.xml</import>
    <token name="@WRAPPER_VERSION@">2</token>
  </macros>
  <expand macro="requirements">
    <requirement type="package" version="3.4.1">r-base</requirement>
  </expand>
  <command detect_errors="exit_code"><![CDATA[
    @java_options@
    @symlink_element_identifier@
    
    #set $reference_fasta_filename = "localref.fa"
    @handle_reference_source@
    
    picard 
    CollectSequencingArtifactMetrics 
    I='$escaped_element_identifier'
    O='OutPut'
    R='${reference_fasta_filename}'
    AS=${assume_sorted}
    CONTEXT_SIZE=${context_size}
    INCLUDE_DUPLICATES='${duplicates}'
    #if $contexts_to_print 
      #for $context in str($contexts_to_print).split(','):
          CONTEXTS_TO_PRINT='${context}'
      #end for;
    #end if;
    MINIMUM_QUALITY_SCORE='${min_quality_score}'
    INCLUDE_UNPAIRED='${unpaired}'
    MAXIMUM_INSERT_SIZE='${max_size}'
    MINIMUM_INSERT_SIZE='${min_size}'
    MINIMUM_MAPPING_QUALITY='${minim_map_quality}'
    VALIDATION_STRINGENCY='${validation_stringency}';
  ]]></command>  
  <inputs>
    <param name='inputFile' format="sam,bam" type='data' label='SAM/BAM Input file'/>
    <conditional name="reference_source">
      <param name="reference_source_selector" type="select" label="Load reference genome from">
        <option value="cached">Local cache</option>
        <option value="history">History</option>
      </param>
      <when value="cached">
        <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
          <options from_data_table="picard_indexes">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No indexes are available" />
          </options>
          <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
        </param>
      </when>
      <when value="history">
        <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" />
      </when>
    </conditional>
  
    <param name='context_size' value="1" type='integer' label='How many nucleotides (context) on each side to consider' help='e.g NXN corresponds to 1 "N" on each side of "X"'/>
    <param name="contexts_to_print" type="text" label="Contexts to print in the detailed tables, separated by comma (avoid spaces)" help="e.g. (for context size=1) AAA,AGG. Default prints all contexts">
        <sanitizer>
            <valid initial="string.letters"><add value="," /></valid>
        </sanitizer>
    </param>  
    <param name='min_quality_score' type='integer' value='30' label='Minimum base quality score'/>
    <param name="max_size" type="integer" value="600" label="Maximum insert size"/>
    <param name="min_size" type="integer" value="30" label="Minimum insert size"/>
    <param name="minim_map_quality" type="integer" value="20" label="Minimum mapping quality"/>
    <param name='assume_sorted' type='boolean' label='Assume SAM/BAM is sorted'/>
    <param name='duplicates' type='boolean' label='Include duplicates' checked="false" truevalue="true" falsevalue="false"/>
    <param name='unpaired' type='boolean' label='Include unpaired reads' checked="false" truevalue="true" falsevalue="false"/>

    <expand macro="VS" />

  </inputs>

  <outputs>
    <data name='pre_details' format="tabular"  from_work_dir="OutPut.pre_adapter_detail_metrics" label='Detailed table for artifacts introduced prior to the addition of adapters'/>
    <data name='pre_summary' format="tabular"  from_work_dir="OutPut.pre_adapter_summary_metrics" label='Summary table for artifacts introduced prior to the addition of adapters'/>
    <data name='pos_details' format="tabular"  from_work_dir="OutPut.bait_bias_detail_metrics" label='Detailed table for artifacts introduced posterior to the addition of adapters'/>
    <data name='pos_summary' format="tabular"  from_work_dir="OutPut.bait_bias_summary_metrics" label='Summary table for artifacts introduced posterior to the addition of adapters'/>
    <data name='err_summary' format="tabular"  from_work_dir="OutPut.error_summary_metrics" label='General Summary of artifactual errors'/>
  </outputs>
  
  <tests>
    <test>
      <param name="inputFile" value="picard_ARRG_test1.bam" ftype="bam" />
      <param name="reference_source_selector" value="history"/>
      <param name="ref_file" value="picard_BedToIntervalList_ref.fa" />
      <param name="context_size" value="1" />
      <param name="contexts_to_print" value="AAA,CAA,AAC,AAT,AAG" />
      <output name='pre_details' file='pre_details' ftype="tabular"  value="pre_detail" lines_diff="4"/>
      <output name='pre_summary' file='pre_summary' ftype="tabular"  value="pre_summary" lines_diff="4"/>
      <output name='pos_details' file='pos_details' ftype="tabular"  value="pos_detail" lines_diff="4"/>
      <output name='pos_summary' file='pos_summary' ftype="tabular"  value="pos_summary" lines_diff="4"/>
      <output name='err_summary' file='err_summary' ftype="tabular"  value="err_summary" lines_diff="4"/>
    </test>
    <test>
      <param name="inputFile" value="picard_ARRG_test1.bam" ftype="bam" dbkey="hg38"/>
      <param name="reference_source_selector" value="cached"/>
      <param name="context_size" value="1" />
      <param name="contexts_to_print" value="AAA,CAA,AAC,AAT,AAG" />
      <output name='pre_details' file='pre_details' ftype="tabular"  value="pre_detail" lines_diff="4"/>
      <output name='pre_summary' file='pre_summary' ftype="tabular"  value="pre_summary" lines_diff="4"/>
      <output name='pos_details' file='pos_details' ftype="tabular"  value="pos_detail" lines_diff="4"/>
      <output name='pos_summary' file='pos_summary' ftype="tabular"  value="pos_summary" lines_diff="4"/>
      <output name='err_summary' file='err_summary' ftype="tabular"  value="err_summary" lines_diff="4"/>
    </test>
  </tests>
  <help>

.. class:: infomark

**Purpose**

Program to chart the distribution of potential sequencing "single nucleotide mutation" artifacts in a SAM or BAM file.

@dataset_collections@

@description@

  ASSUME_SORTED=Boolean           If true (default), then the sort order in the header file will be ignored.  
                                  Default: True

  CONTEXT_SIZE=integer            The number of context bases to include on each side of the assayed base.
  
  CONTEXT_SIZE_TO_PRINT=String    If specified, only print results for these contexts in the detail metrics output. 
                                  However, the summary metrics output will still take all contexts into consideration.
                                
  DB_SNP=text file                VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis.

  INCLUDE_DUPLICATES=Boolean      Include duplicate reads. If set to true then all reads flagged as duplicates will be included as well.

  INCLUDE_UNPAIRED=Boolean        Include unpaired reads. If set to true then all paired reads will be included as well - 
                                  MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored.

  MAXIMUM_INSERT_SIZE=Integer     The maximum insert size for a read to be included in analysis. Set to 0 to have no maximum.
                                  Default = 600

  MINIMUM_INSERT_SIZE=Integer     The minimum insert size for a read to be included in analysis. Default = 60

  MINIMUM_MAPPING_QUALITY         The minimum mapping quality score for a base to be included in analysis. Default = 30

  MINIMUM_QUALITY_SCORE           The minimum base quality score for a base to be included in analysis. Default = 20

@more_info@

  </help>
  <expand macro="citations" />
</tool>