vgp_split_agp: split_agp.xml comparison

comparison split_agp.xml @ 0:fc30c955f2ac draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vgp_processcuration commit c25e877636f68656a0005883efb0f03b5ffd6b0c

author	iuc
date	Wed, 07 Jan 2026 12:48:27 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:fc30c955f2ac
+<tool id="vgp_split_agp" name="VGP Split AGP" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+<description>Correct AGP files and split haplotypes</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="xrefs"/>
+<expand macro="requirements"/>
+<command detect_errors="exit_code"><![CDATA[
+split_agp
+-f '$fasta'
+-a '$agp'
+-o './'
+]]></command>
+<inputs>
+<param name="fasta" type="data" format="fasta,fasta.gz" label="Assembly FASTA file"
+help="Input assembly FASTA file containing both haplotypes." />
+<param name="agp" type="data" format="tabular" label="Curated AGP file"
+help="Input curated AGP file created in PretextView with haplotype markers (Hap_1/H1 and Hap_2/H2)." />
+</inputs>
+<outputs>
+<data name="corrected_agp" format="tabular" from_work_dir="corrected.agp"
+label="${tool.name} on ${on_string}: Corrected AGP"/>
+<!-- Haplotype 1 outputs -->
+<data name="hap1_agp" format="tabular" from_work_dir="Hap_1/hap1.agp" label="${tool.name} on ${on_string}: Hap1 AGP"/>
+<data name="hap1_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_1/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap1 Unlocs without haplotig AGP"/>
+<data name="hap1_haplotigs" format="tabular" from_work_dir="Hap_1/haplotigs.agp" label="${tool.name} on ${on_string}: Hap1 Haplotigs AGP"/>
+<!-- Haplotype 2 outputs -->
+<data name="hap2_agp" format="tabular" from_work_dir="Hap_2/hap2.agp" label="${tool.name} on ${on_string}: Hap2 AGP"/>
+<data name="hap2_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_2/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap2 Unlocs without haplotigs AGP"/>
+<data name="hap2_haplotigs" format="tabular" from_work_dir="Hap_2/haplotigs.agp" label="${tool.name} on ${on_string}: Hap2 Haplotigs AGP"/>
+</outputs>
+<tests>
+<test expect_num_outputs="7">
+<param name="fasta" value="test_input.fasta.gz" ftype="fasta.gz"/>
+<param name="agp" value="test_input.agp" ftype="tabular"/>
+<output name="corrected_agp" file="expected_corrected.agp" ftype="tabular"/>
+<output name="hap1_agp" file="expected_hap1.agp" ftype="tabular"/>
+<output name="hap1_unlocs_no_hapdups" file="expected_hap1_unlocs_no_hapdups.agp" ftype="tabular"/>
+<output name="hap1_haplotigs" file="expected_hap1_haplotigs.agp" ftype="tabular"/>
+<output name="hap2_agp" file="expected_hap2.agp" ftype="tabular"/>
+<output name="hap2_unlocs_no_hapdups" file="expected_hap2_unlocs_no_hapdups.agp" ftype="tabular"/>
+<output name="hap2_haplotigs" file="expected_hap2_haplotigs.agp" ftype="tabular"/>
+</test>
+</tests>
+<help><![CDATA[
+**What it does**
+split_agp corrects AGP files for sequence length discrepancies, splits haplotypes into separate files,
+assigns unlocalized sequences, and removes haplotig duplications.
+This tool performs three sequential operations:
+1. **AGPcorrect**: Validates sequence lengths from FASTA against AGP coordinates, adjusting start/end positions
+2. **hap_split**: Segregates lines containing haplotype markers into separate directories (Hap_1 and Hap_2)
+3. **unloc**: Processes metadata tags, renames unloc scaffolds with sequential numbering, removes haplotigs
+**Inputs**
+- **Assembly FASTA file**: FASTA file containing both haplotypes (may be gzip-compressed)
+- **Curated AGP file**: Tab-delimited AGP file with haplotype markers (Hap_1/H1 or Hap_2/H2)
+**Outputs**
+- **Corrected AGP**: Length-validated AGP file with corrected coordinates
+- **Hap1 AGP**: Haplotype 1 AGP file
+- **Hap1 Unlocs No Hapdups**: Haplotype 1 AGP with unlocalized sequences assigned and haplotigs removed
+- **Hap1 Haplotigs**: Removed duplicate haplotigs from Haplotype 1
+- **Hap2 AGP**: Haplotype 2 AGP file
+- **Hap2 Unlocs No Hapdups**: Haplotype 2 AGP with unlocalized sequences assigned and haplotigs removed
+- **Hap2 Haplotigs**: Removed duplicate haplotigs from Haplotype 2
+**Input Naming Requirements**
+⚠️ **Important**: Scaffolds in the input FASTA must follow this naming convention:
+- Haplotype 1: H1.scaffold_X
+- Haplotype 2: H2.scaffold_X
+This naming pattern is required before manual curation in PretextView.
+**Next Steps**
+After running split_agp, use the gfastats tool to sort each haplotype:
+1. Sort Haplotype 1: gfastats with Hap1 Unlocs No Hapdups AGP
+2. Sort Haplotype 2: gfastats with Hap2 Unlocs No Hapdups AGP
+Then proceed to the chromosome_assignment tool to assign chromosome-level names.
+.. class:: infomark
+**More Information**
+This tool is part of the VGP ProcessCuration pipeline for preparing curated genome assemblies for submission.
+<expand macro="help_common"/>
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > iuc > vgp_split_agp

comparison split_agp.xml @ 0:fc30c955f2ac draft default tip