view hapcut2.xml @ 2:800f8086da7d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc commit 908fb9148a3a116dbf7ccd16e0992e7882e748c2
author iuc
date Tue, 30 Apr 2024 08:38:15 +0000
parents 271eb7f4b8bc
children
line wrap: on
line source

<tool id="hapcut2" name="Hapcut2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
  <description>haplotype assembly for diploid organisms</description>
  <macros>
    <token name="@TOOL_VERSION@">1.3.4</token>
    <token name="@VERSION_SUFFIX@">1</token>
    <import>macros.xml</import>   
  </macros>
  <xrefs>
    <xref type="bio.tools">hapcut2</xref>
  </xrefs>
  <requirements>
    <requirement type="package" version="@TOOL_VERSION@">hapcut2</requirement>
  </requirements>
  <command detect_errors="exit_code"><![CDATA[

## Prep inputs
## =====================================================================
ln -s '$input_bam' input.bam
&& ln -s '$input_vcf' input.vcf

## Set reference genome if required
#if $optimization.choice in ['pacbio', 'ont']:
  #if $optimization.reference_genome.source == 'index':
    #set $ref_genome_path = $optimization.reference_genome.index.fields.path
  #else
    #set $ref_genome_path = $optimization.reference_genome.fasta
  #end if
#end if

## Run program
## =====================================================================
## Extract variant fragments from alignment
&& extractHAIRS --bam 'input.bam' --VCF 'input.vcf' --out frags.dat
#if $optimization.choice == 'pacbio':
  --pacbio 1
  --ref '$ref_genome_path'
#elif $optimization.choice == 'ont':
  --ont 1
  --ref '$ref_genome_path'
#elif $optimization.choice == 'hic':
  --HiC 1
#end if

#if $advanced.minIS
  --minIS $advanced.minIS
#end if

#if $advanced.maxIS
  --maxIS $advanced.maxIS
#end if

## Create haplotype.out and haplotype.out.phased.VCF
&& HAPCUT2 --fragments frags.dat --VCF input.vcf --output haplotype.out
#if $optimization.choice == 'hic':
  --HiC 1
#end if
  ]]></command>

  <inputs>
    <param name="input_bam" argument="--bam" type="data" format="bam" label="Input BAM file" help="Coordinate-sorted BAM file"/>
    <param name="input_vcf" argument="--VCF" type="data" format="vcf" label="Input VCF file" help="Variant file with genotypes for a single individual"/>
    <conditional name="optimization">
      <!-- TODO: include 10X (requires extra processing step) -->
      <param name="choice" type="select" display="radio" label="Optimization">
        <option value="default" selected="true">Default</option>
        <option value="pacbio">Pacbio</option>
        <option value="ont">Oxford Nanopore</option>
        <option value="hic">Hi-C</option>
      </param>
      <when value="default"></when>
      <when value="pacbio">
        <expand macro="reference_genome_input"/>
      </when>
      <when value="ont">
        <expand macro="reference_genome_input"/>
      </when>
      <when value="hic"></when>
    </conditional>

    <param name="output_phased" type="boolean" label="Output phased VCF file?" checked="true" help="Output variant calls on the haplotype assembly"/>
    <param name="output_fragments" type="boolean" label="Output fragments file?" help="Output fragments collected by extractHAIRS"/>

    <section name="advanced" title="Advanced parameters">
      <param argument="--maxIS" type="integer" label="Maximum insert size" optional="true" value="1000" min="0" help="Maximum insert size for a paired-end read to be considered as a single fragment for phasing"/>
      <param argument="--minIS" type="integer" label="Minimum insert size" optional="true" value="0" min="0" help="Minimum insert size for a paired-end read to be considered as a single fragment for phasing"/>
    </section>
  </inputs>

  <outputs>
    <data name="haplotype" format="txt" from_work_dir="haplotype.out" label="${tool.name} on ${on_string}: Haplotype block"/>
    <data name="haplotype_phased" format="vcf" from_work_dir="haplotype.out.phased.VCF" label="${tool.name} on ${on_string}: Phased haplotype VCF">
      <filter>output_phased</filter>
    </data>
    <data name="frags" format="txt" from_work_dir="frags.dat" label="${tool.name} on ${on_string}: Fragments">
      <filter>output_fragments</filter>
    </data>
  </outputs>

  <tests>
    <!-- Defaults -->
    <test expect_num_outputs="1">
      <param name="input_bam" ftype="bam" value="input.bam"/>
      <param name="input_vcf" ftype="vcf" value="input.vcf"/>
      <param name="output_fragments" value="0"/>
      <param name="output_phased" value="0"/>
      <conditional name="optimization">
        <param name="choice" value="default"/>
      </conditional>
      <output name="haplotype" ftype="txt" file="output_haplotype.out"/>
    </test>

    <!-- Defaults with all outputs -->
    <test expect_num_outputs="3">
      <param name="input_bam" ftype="bam" value="input.bam"/>
      <param name="input_vcf" ftype="vcf" value="input.vcf"/>
      <param name="output_fragments" value="1"/>
      <param name="output_phased" value="1"/>
      <conditional name="optimization">
        <param name="choice" value="default"/>
      </conditional>
      <output name="frags" ftype="txt" file="output_frag.dat"/>
      <output name="haplotype" ftype="txt" file="output_haplotype.out"/>
      <output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
    </test>

    <!-- Hi-C optimization -->
    <test expect_num_outputs="3">
      <param name="input_bam" ftype="bam" value="input.bam"/>
      <param name="input_vcf" ftype="vcf" value="input.vcf"/>
      <param name="output_fragments" value="1"/>
      <param name="output_phased" value="1"/>
      <conditional name="optimization">
        <param name="choice" value="default"/>
      </conditional>
      <output name="frags" ftype="txt" file="output_frag.dat"/>
      <output name="haplotype" ftype="txt" file="output_haplotype.out"/>
      <output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
    </test>

    <!-- Pacbio optimization with ref genome -->
    <test expect_num_outputs="3">
      <param name="input_bam" ftype="bam" value="input.bam"/>
      <param name="input_vcf" ftype="vcf" value="input.vcf"/>
      <param name="output_fragments" value="1"/>
      <param name="output_phased" value="1"/>
      <conditional name="optimization">
        <param name="choice" value="pacbio"/>
      </conditional>
      <conditional name="reference_genome">
        <param name="source" value="history"/>
      </conditional>
      <param name="fasta" ftype="fasta" value="ref.fasta"/>
      <output name="frags" ftype="txt" file="output_frag.dat"/>
      <output name="haplotype" ftype="txt" file="output_haplotype.out"/>
      <output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
    </test>

    <!-- Oxford nanopore optimization with ref genome -->
    <test expect_num_outputs="3">
      <param name="input_bam" ftype="bam" value="input.bam"/>
      <param name="input_vcf" ftype="vcf" value="input.vcf"/>
      <param name="output_fragments" value="1"/>
      <param name="output_phased" value="1"/>
      <output name="frags" ftype="txt" file="output_frag.dat"/>
      <output name="haplotype" ftype="txt" file="output_haplotype.out"/>
      <output name="haplotype_phased" ftype="vcf" file="output_haplotype.out.phased.vcf"/>
      <conditional name="optimization">
        <param name="optimization" value="ont"/>
      </conditional>
      <conditional name="reference_genome">
        <param name="source" value="history"/>
      </conditional>
      <param name="fasta" ftype="fasta" value="ref.fasta"/>
    </test>
  </tests>

  <help><![CDATA[
.. class:: infomark

*NOTE: At this time HapCUT2 is for diploid organisms only and can assemble haplotypes for one individual at a time. VCF input should contain variants and genotypes for a single diploid individual.*

.. class:: infomark

*NOTE: At this time HapCUT2 on Galaxy cannot be used for 10X Genomics sequencing data.*


**What it does**

HapCUT2 is a maximum-likelihood-based tool for assembling haplotypes from DNA sequence reads, designed to "just work" with excellent speed and accuracy. Previously described haplotype assembly methods are specialized for specific read technologies or protocols, with slow or inaccurate performance on others. With this in mind, HapCUT2 is designed for speed and accuracy across diverse sequencing technologies, including but not limited to:

- NGS short reads (Illumina HiSeq)
- single-molecule long reads (PacBio and Oxford Nanopore)
- Linked-Reads (e.g. 10X Genomics, stLFR or TELL-seq)
- proximity-ligation (Hi-C) reads
- high-coverage sequencing (>40x coverage-per-SNP) using above technologies
- combinations of the above technologies (e.g. scaffold long reads with Hi-C reads)


**Inputs**

Input data should reference a single diploid individual mapped to a reference genome.

1. BAM file with reads mapped to reference genome

2. VCF file with variant calls against reference genome

*Using linked reads (10X Genomics, stLFR etc)?* Additional preparation is required: `see here <https://github.com/vibansal/HapCUT2/blob/master/linkedreads.md>`_


**Outputs**

- ``haplotype.out``: `phased block file <https://github.com/vibansal/HapCUT2/blob/master/outputformat.md>`_
- ``haplotype.out.phased.vcf``: (optional) phased VCF file
- ``Fragments``: (optional) An intermediate file containing alignment fragments with haplotype information

See `HapCUT2 on GitHib <https://github.com/vibansal/HapCUT2>`_ for more detailed information.

  ]]></help>
  <citations>
      <citation type="doi">10.1101/gr.213462.116</citation>
  </citations>
  <expand macro="creator"/>
</tool>