Mercurial > repos > iuc > vgp_split_agp

diff split_agp.xml @ 0:fc30c955f2ac draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vgp_processcuration commit c25e877636f68656a0005883efb0f03b5ffd6b0c
author: iuc
date: Wed, 07 Jan 2026 12:48:27 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/split_agp.xml	Wed Jan 07 12:48:27 2026 +0000
@@ -0,0 +1,100 @@
+<tool id="vgp_split_agp" name="VGP Split AGP" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>Correct AGP files and split haplotypes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        split_agp
+            -f '$fasta'
+            -a '$agp'
+            -o './'
+    ]]></command>
+    <inputs>
+        <param name="fasta" type="data" format="fasta,fasta.gz" label="Assembly FASTA file"
+            help="Input assembly FASTA file containing both haplotypes." />
+        <param name="agp" type="data" format="tabular" label="Curated AGP file"
+            help="Input curated AGP file created in PretextView with haplotype markers (Hap_1/H1 and Hap_2/H2)." />
+    </inputs>
+    <outputs>
+        <data name="corrected_agp" format="tabular" from_work_dir="corrected.agp"
+            label="${tool.name} on ${on_string}: Corrected AGP"/>
+
+        <!-- Haplotype 1 outputs -->
+        <data name="hap1_agp" format="tabular" from_work_dir="Hap_1/hap1.agp" label="${tool.name} on ${on_string}: Hap1 AGP"/>
+        <data name="hap1_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_1/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap1 Unlocs without haplotig AGP"/>
+        <data name="hap1_haplotigs" format="tabular" from_work_dir="Hap_1/haplotigs.agp" label="${tool.name} on ${on_string}: Hap1 Haplotigs AGP"/>
+
+        <!-- Haplotype 2 outputs -->
+        <data name="hap2_agp" format="tabular" from_work_dir="Hap_2/hap2.agp" label="${tool.name} on ${on_string}: Hap2 AGP"/>
+        <data name="hap2_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_2/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap2 Unlocs without haplotigs AGP"/>
+        <data name="hap2_haplotigs" format="tabular" from_work_dir="Hap_2/haplotigs.agp" label="${tool.name} on ${on_string}: Hap2 Haplotigs AGP"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="7">
+            <param name="fasta" value="test_input.fasta.gz" ftype="fasta.gz"/>
+            <param name="agp" value="test_input.agp" ftype="tabular"/>
+            <output name="corrected_agp" file="expected_corrected.agp" ftype="tabular"/>
+            <output name="hap1_agp" file="expected_hap1.agp" ftype="tabular"/>
+            <output name="hap1_unlocs_no_hapdups" file="expected_hap1_unlocs_no_hapdups.agp" ftype="tabular"/>
+            <output name="hap1_haplotigs" file="expected_hap1_haplotigs.agp" ftype="tabular"/>
+            <output name="hap2_agp" file="expected_hap2.agp" ftype="tabular"/>
+            <output name="hap2_unlocs_no_hapdups" file="expected_hap2_unlocs_no_hapdups.agp" ftype="tabular"/>
+            <output name="hap2_haplotigs" file="expected_hap2_haplotigs.agp" ftype="tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+split_agp corrects AGP files for sequence length discrepancies, splits haplotypes into separate files,
+assigns unlocalized sequences, and removes haplotig duplications.
+
+This tool performs three sequential operations:
+
+1. **AGPcorrect**: Validates sequence lengths from FASTA against AGP coordinates, adjusting start/end positions
+2. **hap_split**: Segregates lines containing haplotype markers into separate directories (Hap_1 and Hap_2)
+3. **unloc**: Processes metadata tags, renames unloc scaffolds with sequential numbering, removes haplotigs
+
+**Inputs**
+
+- **Assembly FASTA file**: FASTA file containing both haplotypes (may be gzip-compressed)
+- **Curated AGP file**: Tab-delimited AGP file with haplotype markers (Hap_1/H1 or Hap_2/H2)
+
+**Outputs**
+
+- **Corrected AGP**: Length-validated AGP file with corrected coordinates
+- **Hap1 AGP**: Haplotype 1 AGP file
+- **Hap1 Unlocs No Hapdups**: Haplotype 1 AGP with unlocalized sequences assigned and haplotigs removed
+- **Hap1 Haplotigs**: Removed duplicate haplotigs from Haplotype 1
+- **Hap2 AGP**: Haplotype 2 AGP file
+- **Hap2 Unlocs No Hapdups**: Haplotype 2 AGP with unlocalized sequences assigned and haplotigs removed
+- **Hap2 Haplotigs**: Removed duplicate haplotigs from Haplotype 2
+
+**Input Naming Requirements**
+
+⚠️ **Important**: Scaffolds in the input FASTA must follow this naming convention:
+- Haplotype 1: H1.scaffold_X
+- Haplotype 2: H2.scaffold_X
+
+This naming pattern is required before manual curation in PretextView.
+
+**Next Steps**
+
+After running split_agp, use the gfastats tool to sort each haplotype:
+
+1. Sort Haplotype 1: gfastats with Hap1 Unlocs No Hapdups AGP
+2. Sort Haplotype 2: gfastats with Hap2 Unlocs No Hapdups AGP
+
+Then proceed to the chromosome_assignment tool to assign chromosome-level names.
+
+.. class:: infomark
+
+**More Information**
+
+This tool is part of the VGP ProcessCuration pipeline for preparing curated genome assemblies for submission.
+
+<expand macro="help_common"/>
+    ]]></help>
+    <expand macro="citations"/>
+</tool>