Mercurial > repos > iuc > vgp_split_agp
diff split_agp.xml @ 0:fc30c955f2ac draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vgp_processcuration commit c25e877636f68656a0005883efb0f03b5ffd6b0c
| author | iuc |
|---|---|
| date | Wed, 07 Jan 2026 12:48:27 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_agp.xml Wed Jan 07 12:48:27 2026 +0000 @@ -0,0 +1,100 @@ +<tool id="vgp_split_agp" name="VGP Split AGP" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> + <description>Correct AGP files and split haplotypes</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + split_agp + -f '$fasta' + -a '$agp' + -o './' + ]]></command> + <inputs> + <param name="fasta" type="data" format="fasta,fasta.gz" label="Assembly FASTA file" + help="Input assembly FASTA file containing both haplotypes." /> + <param name="agp" type="data" format="tabular" label="Curated AGP file" + help="Input curated AGP file created in PretextView with haplotype markers (Hap_1/H1 and Hap_2/H2)." /> + </inputs> + <outputs> + <data name="corrected_agp" format="tabular" from_work_dir="corrected.agp" + label="${tool.name} on ${on_string}: Corrected AGP"/> + + <!-- Haplotype 1 outputs --> + <data name="hap1_agp" format="tabular" from_work_dir="Hap_1/hap1.agp" label="${tool.name} on ${on_string}: Hap1 AGP"/> + <data name="hap1_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_1/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap1 Unlocs without haplotig AGP"/> + <data name="hap1_haplotigs" format="tabular" from_work_dir="Hap_1/haplotigs.agp" label="${tool.name} on ${on_string}: Hap1 Haplotigs AGP"/> + + <!-- Haplotype 2 outputs --> + <data name="hap2_agp" format="tabular" from_work_dir="Hap_2/hap2.agp" label="${tool.name} on ${on_string}: Hap2 AGP"/> + <data name="hap2_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_2/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap2 Unlocs without haplotigs AGP"/> + <data name="hap2_haplotigs" format="tabular" from_work_dir="Hap_2/haplotigs.agp" label="${tool.name} on ${on_string}: Hap2 Haplotigs AGP"/> + </outputs> + <tests> + <test expect_num_outputs="7"> + <param name="fasta" value="test_input.fasta.gz" ftype="fasta.gz"/> + <param name="agp" value="test_input.agp" ftype="tabular"/> + <output name="corrected_agp" file="expected_corrected.agp" ftype="tabular"/> + <output name="hap1_agp" file="expected_hap1.agp" ftype="tabular"/> + <output name="hap1_unlocs_no_hapdups" file="expected_hap1_unlocs_no_hapdups.agp" ftype="tabular"/> + <output name="hap1_haplotigs" file="expected_hap1_haplotigs.agp" ftype="tabular"/> + <output name="hap2_agp" file="expected_hap2.agp" ftype="tabular"/> + <output name="hap2_unlocs_no_hapdups" file="expected_hap2_unlocs_no_hapdups.agp" ftype="tabular"/> + <output name="hap2_haplotigs" file="expected_hap2_haplotigs.agp" ftype="tabular"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +split_agp corrects AGP files for sequence length discrepancies, splits haplotypes into separate files, +assigns unlocalized sequences, and removes haplotig duplications. + +This tool performs three sequential operations: + +1. **AGPcorrect**: Validates sequence lengths from FASTA against AGP coordinates, adjusting start/end positions +2. **hap_split**: Segregates lines containing haplotype markers into separate directories (Hap_1 and Hap_2) +3. **unloc**: Processes metadata tags, renames unloc scaffolds with sequential numbering, removes haplotigs + +**Inputs** + +- **Assembly FASTA file**: FASTA file containing both haplotypes (may be gzip-compressed) +- **Curated AGP file**: Tab-delimited AGP file with haplotype markers (Hap_1/H1 or Hap_2/H2) + +**Outputs** + +- **Corrected AGP**: Length-validated AGP file with corrected coordinates +- **Hap1 AGP**: Haplotype 1 AGP file +- **Hap1 Unlocs No Hapdups**: Haplotype 1 AGP with unlocalized sequences assigned and haplotigs removed +- **Hap1 Haplotigs**: Removed duplicate haplotigs from Haplotype 1 +- **Hap2 AGP**: Haplotype 2 AGP file +- **Hap2 Unlocs No Hapdups**: Haplotype 2 AGP with unlocalized sequences assigned and haplotigs removed +- **Hap2 Haplotigs**: Removed duplicate haplotigs from Haplotype 2 + +**Input Naming Requirements** + +⚠️ **Important**: Scaffolds in the input FASTA must follow this naming convention: +- Haplotype 1: H1.scaffold_X +- Haplotype 2: H2.scaffold_X + +This naming pattern is required before manual curation in PretextView. + +**Next Steps** + +After running split_agp, use the gfastats tool to sort each haplotype: + +1. Sort Haplotype 1: gfastats with Hap1 Unlocs No Hapdups AGP +2. Sort Haplotype 2: gfastats with Hap2 Unlocs No Hapdups AGP + +Then proceed to the chromosome_assignment tool to assign chromosome-level names. + +.. class:: infomark + +**More Information** + +This tool is part of the VGP ProcessCuration pipeline for preparing curated genome assemblies for submission. + +<expand macro="help_common"/> + ]]></help> + <expand macro="citations"/> +</tool>
