comparison split_agp.xml @ 0:fc30c955f2ac draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vgp_processcuration commit c25e877636f68656a0005883efb0f03b5ffd6b0c
author iuc
date Wed, 07 Jan 2026 12:48:27 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fc30c955f2ac
1 <tool id="vgp_split_agp" name="VGP Split AGP" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
2 <description>Correct AGP files and split haplotypes</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="xrefs"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 split_agp
10 -f '$fasta'
11 -a '$agp'
12 -o './'
13 ]]></command>
14 <inputs>
15 <param name="fasta" type="data" format="fasta,fasta.gz" label="Assembly FASTA file"
16 help="Input assembly FASTA file containing both haplotypes." />
17 <param name="agp" type="data" format="tabular" label="Curated AGP file"
18 help="Input curated AGP file created in PretextView with haplotype markers (Hap_1/H1 and Hap_2/H2)." />
19 </inputs>
20 <outputs>
21 <data name="corrected_agp" format="tabular" from_work_dir="corrected.agp"
22 label="${tool.name} on ${on_string}: Corrected AGP"/>
23
24 <!-- Haplotype 1 outputs -->
25 <data name="hap1_agp" format="tabular" from_work_dir="Hap_1/hap1.agp" label="${tool.name} on ${on_string}: Hap1 AGP"/>
26 <data name="hap1_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_1/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap1 Unlocs without haplotig AGP"/>
27 <data name="hap1_haplotigs" format="tabular" from_work_dir="Hap_1/haplotigs.agp" label="${tool.name} on ${on_string}: Hap1 Haplotigs AGP"/>
28
29 <!-- Haplotype 2 outputs -->
30 <data name="hap2_agp" format="tabular" from_work_dir="Hap_2/hap2.agp" label="${tool.name} on ${on_string}: Hap2 AGP"/>
31 <data name="hap2_unlocs_no_hapdups" format="tabular" from_work_dir="Hap_2/hap.unlocs.no_hapdups.agp" label="${tool.name} on ${on_string}: Hap2 Unlocs without haplotigs AGP"/>
32 <data name="hap2_haplotigs" format="tabular" from_work_dir="Hap_2/haplotigs.agp" label="${tool.name} on ${on_string}: Hap2 Haplotigs AGP"/>
33 </outputs>
34 <tests>
35 <test expect_num_outputs="7">
36 <param name="fasta" value="test_input.fasta.gz" ftype="fasta.gz"/>
37 <param name="agp" value="test_input.agp" ftype="tabular"/>
38 <output name="corrected_agp" file="expected_corrected.agp" ftype="tabular"/>
39 <output name="hap1_agp" file="expected_hap1.agp" ftype="tabular"/>
40 <output name="hap1_unlocs_no_hapdups" file="expected_hap1_unlocs_no_hapdups.agp" ftype="tabular"/>
41 <output name="hap1_haplotigs" file="expected_hap1_haplotigs.agp" ftype="tabular"/>
42 <output name="hap2_agp" file="expected_hap2.agp" ftype="tabular"/>
43 <output name="hap2_unlocs_no_hapdups" file="expected_hap2_unlocs_no_hapdups.agp" ftype="tabular"/>
44 <output name="hap2_haplotigs" file="expected_hap2_haplotigs.agp" ftype="tabular"/>
45 </test>
46 </tests>
47 <help><![CDATA[
48 **What it does**
49
50 split_agp corrects AGP files for sequence length discrepancies, splits haplotypes into separate files,
51 assigns unlocalized sequences, and removes haplotig duplications.
52
53 This tool performs three sequential operations:
54
55 1. **AGPcorrect**: Validates sequence lengths from FASTA against AGP coordinates, adjusting start/end positions
56 2. **hap_split**: Segregates lines containing haplotype markers into separate directories (Hap_1 and Hap_2)
57 3. **unloc**: Processes metadata tags, renames unloc scaffolds with sequential numbering, removes haplotigs
58
59 **Inputs**
60
61 - **Assembly FASTA file**: FASTA file containing both haplotypes (may be gzip-compressed)
62 - **Curated AGP file**: Tab-delimited AGP file with haplotype markers (Hap_1/H1 or Hap_2/H2)
63
64 **Outputs**
65
66 - **Corrected AGP**: Length-validated AGP file with corrected coordinates
67 - **Hap1 AGP**: Haplotype 1 AGP file
68 - **Hap1 Unlocs No Hapdups**: Haplotype 1 AGP with unlocalized sequences assigned and haplotigs removed
69 - **Hap1 Haplotigs**: Removed duplicate haplotigs from Haplotype 1
70 - **Hap2 AGP**: Haplotype 2 AGP file
71 - **Hap2 Unlocs No Hapdups**: Haplotype 2 AGP with unlocalized sequences assigned and haplotigs removed
72 - **Hap2 Haplotigs**: Removed duplicate haplotigs from Haplotype 2
73
74 **Input Naming Requirements**
75
76 ⚠️ **Important**: Scaffolds in the input FASTA must follow this naming convention:
77 - Haplotype 1: H1.scaffold_X
78 - Haplotype 2: H2.scaffold_X
79
80 This naming pattern is required before manual curation in PretextView.
81
82 **Next Steps**
83
84 After running split_agp, use the gfastats tool to sort each haplotype:
85
86 1. Sort Haplotype 1: gfastats with Hap1 Unlocs No Hapdups AGP
87 2. Sort Haplotype 2: gfastats with Hap2 Unlocs No Hapdups AGP
88
89 Then proceed to the chromosome_assignment tool to assign chromosome-level names.
90
91 .. class:: infomark
92
93 **More Information**
94
95 This tool is part of the VGP ProcessCuration pipeline for preparing curated genome assemblies for submission.
96
97 <expand macro="help_common"/>
98 ]]></help>
99 <expand macro="citations"/>
100 </tool>