diff chromosome_assignment.xml @ 0:9b52f4e2093b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/vgp_processcuration commit c25e877636f68656a0005883efb0f03b5ffd6b0c
author iuc
date Wed, 07 Jan 2026 12:48:42 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chromosome_assignment.xml	Wed Jan 07 12:48:42 2026 +0000
@@ -0,0 +1,95 @@
+<tool id="vgp_chromosome_assignment" name="VGP Chromosome Assignment" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
+    <description>Assign chromosome names to scaffolds</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        ## Create output directory
+        mkdir -p output_dir &&
+
+        ## Run chromosome_assignment
+        chromosome_assignment
+            -a '$agp'
+            -f '$fasta'
+            -o output_dir
+
+    ]]></command>
+    <inputs>
+        <param name="agp" type="data" format="tabular" label="Haplotype AGP file"
+            help="Input haplotype AGP file without haplotig duplications." />
+        <param name="fasta" type="data" format="fasta" label="Sorted FASTA file"
+            help="Input sorted FASTA file." />
+    </inputs>
+    <outputs>
+        <data name="inter_chr" format="tabular" from_work_dir="output_dir/inter_chr.tsv"
+            label="${tool.name} on ${on_string}: Chromosome Mapping Table"/>
+        <data name="chr_level_fasta" format="fasta" from_work_dir="output_dir/hap.chr_level.fa"
+            label="${tool.name} on ${on_string}: Chromosome-level FASTA"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="agp" value="test_hap1_unlocs_no_hapdups.agp" ftype="tabular"/>
+            <param name="fasta" value="test_hap1_sorted.fa" ftype="fasta"/>
+            <output name="inter_chr" file="expected_hap1_inter_chr.tsv" ftype="tabular"/>
+            <output name="chr_level_fasta" file="expected_hap1_chr_level.fa" ftype="fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+chromosome_assignment substitutes scaffold identifiers with chromosome assignments, generating chromosome-level
+sequences and mapping tables.
+
+The tool processes AGP metadata to:
+
+1. Identify sex chromosomes (X, Y, W, Z) and regular chromosomes
+2. Filter autosomal scaffolds and assign sequential ``SUPER_`` identifiers
+3. Rename sex-linked scaffolds with ``SUPER_X/Y/W/Z`` prefixes
+4. Handle unlocalized contigs by replacing parent scaffold names with chromosomal assignments
+5. Generate documentation mapping original names to new names
+
+**Inputs**
+
+- **Haplotype AGP file**: Tab-delimited AGP file with chromosome assignment metadata (typically hap.unlocs.no_hapdups.agp from split_agp)
+- **Sorted FASTA file**: Sorted sequence file containing scaffolds/contigs (typically sorted using gfastats)
+
+**Outputs**
+
+- **Chromosome Mapping Table (inter_chr.tsv)**: Tab-separated file documenting all scaffold-to-chromosome name transformations
+- **Chromosome-level FASTA**: FASTA file with sequences renamed to chromosome-level assignments
+
+**Workflow Context**
+
+This tool is typically run twice in the VGP curation pipeline, once for each haplotype:
+
+1. Run on Haplotype 1: Use Hap1 AGP and Hap1 sorted FASTA
+2. Run on Haplotype 2: Use Hap2 AGP and Hap2 sorted FASTA
+
+**Input Preparation**
+
+Before running this tool:
+
+1. Run split_agp to split haplotypes and correct AGP files
+2. Use gfastats to sort each haplotype with its corresponding AGP file:
+   - gfastats hap1.fa -a hap1_unlocs_no_hapdups.agp -o hap1.sorted.fa
+   - gfastats hap2.fa -a hap2_unlocs_no_hapdups.agp -o hap2.sorted.fa
+
+**Next Steps**
+
+After running chromosome_assignment on both haplotypes:
+
+1. Run MashMap to align the two chromosome-level haplotypes
+2. Use sak_generation with the two inter_chr.tsv files and MashMap output to generate SAK instructions
+
+.. class:: infomark
+
+**More Information**
+
+This tool is part of the VGP ProcessCuration pipeline for preparing curated genome assemblies for submission.
+
+<expand macro="help_common"/>
+    ]]></help>
+    <expand macro="citations"/>
+</tool>