view ppanggolin_all.xml @ 7:72179c3fa871 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ppanggolin commit 29098ae57dcc42db8f9adf321d31ed37fb999d17
author iuc
date Mon, 24 Nov 2025 12:55:56 +0000
parents d2b0073ef8d6
children
line wrap: on
line source

<tool id="ppanggolin_all" name="PPanGGOLiN all" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
    <description>generates a partitioned pangenome</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="xrefs"/>
    <expand macro="requirements"/>

    <command detect_errors="exit_code"><![CDATA[
        #import re
        
        mkdir -p ./tmp_ppanggolin/all &&
        mkdir -p ./tmp_ppanggolin/organism_list &&
        
        @ORGANISM_LIST@

        ppanggolin all

        #if $extension_input_files == "fasta":
            --fasta
        #elif $extension_input_files == "genbank":
            --anno
        #end if
        ./tmp_ppanggolin/organism_list/organism.list
        
        --output ./tmp_ppanggolin/all
        --force
        --cpu "\${GALAXY_SLOTS:-4}"
        --disable_prog_bar
        
        --coverage $coverage
        --identity $identity
        --translation_table $translation_table
        
        
        #if str($nb_of_partitions) != "":
            --nb_of_partitions $nb_of_partitions
        #end if
        
        $do_defrag
        
        
        && ppanggolin info
        --pangenome ./tmp_ppanggolin/all/pangenome.h5
        > ./tmp_ppanggolin/all/ppanggolin_info.txt
        

        #if "output_functional_modules" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/functional_modules.tsv > '${functional_modules}'
        #end if
        #if "output_modules_RGP_lists" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/modules_RGP_lists.tsv > '${modules_RGP_lists}'
        #end if
        #if "output_modules_spots" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/modules_spots.tsv > '${modules_spots}'
        #end if
        #if "output_modules_in_genomes" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/modules_in_genomes.tsv > '${modules_in_genomes}'
        #end if
        #if "output_modules_summary" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/modules_summary.tsv > '${modules_summary}'
        #end if
        #if "output_spot_borders" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/spot_borders.tsv > '${spot_borders}'
        #end if
        #if "output_spots" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/spots.tsv > '${spots}'
        #end if
        #if "output_summarize_spots" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/summarize_spots.tsv > '${summarize_spots}'
        #end if
        #if "output_border_protein_genes" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/border_protein_genes.fasta > '${border_protein_genes}'
        #end if
        
        #if "output_tile_plot" in $advanced_pangenome_optional_files:
        	&& cat ./tmp_ppanggolin/all/tile_plot.html > '${tile_plot}'
        #end if
        #if "output_Ushaped_plot" in $advanced_pangenome_optional_files:
        	&& cat ./tmp_ppanggolin/all/Ushaped_plot.html > '${Ushaped_plot}'
        #end if
        
        #if "output_pangenomeGraph_json" in $advanced_pangenome_optional_files:
            && cat ./tmp_ppanggolin/all/pangenomeGraph.json > '${pangenomeGraph_json}'
        #end if
        #if "output_pangenomeGraph_gexf" in $advanced_pangenome_optional_files:
            && cat ./tmp_ppanggolin/all/pangenomeGraph.gexf > '${pangenomeGraph_gexf}'
        #end if
        #if "output_pangenomeGraph_light_gexf" in $advanced_pangenome_optional_files:
            && cat ./tmp_ppanggolin/all/pangenomeGraph_light.gexf > '${pangenomeGraph_light_gexf}'
        #end if
        
        #if "output_matrix" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/matrix.csv > '${matrix}'
        #end if
        #if "output_mean_persistent_duplication" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/mean_persistent_duplication.tsv > '${mean_persistent_duplication}'
        #end if
        #if "output_gene_families" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/gene_families.tsv > '${gene_families}'
        #end if
        #if "output_gene_presence_absence" in $advanced_pangenome_optional_files:
		&& cat ./tmp_ppanggolin/all/gene_presence_absence.Rtab > '${gene_presence_absence}'
        #end if
        
        && cat ./tmp_ppanggolin/all/ppanggolin_info.txt > '${ppanggolin_info}'
        && cat ./tmp_ppanggolin/all/regions_of_genomic_plasticity.tsv > '${regions_of_genomic_plasticity}'
        && cat ./tmp_ppanggolin/all/pangenome.h5 > '${pangenome_h5}'
        && cat ./tmp_ppanggolin/all/genomes_statistics.tsv > '${genomes_statistics}'
        
        
    ]]></command>

    <inputs>
        
        <expand macro="inputs_genomes" min="2" extratexthelp=" Processing of at least 15 genomes files is recommended." />
        
        <expand macro="inputs_identity_coverage_do_defrag"/>
        
        <expand macro="inputs_nb_of_partitions"/>
        
        <expand macro="inputs_translation_table"/>
        
        <param name="advanced_pangenome_optional_files" type="select" label="Add the following pangenome output files in the Galaxy history" multiple="true" optional="true" display="checkboxes" >
        	<!-- Basic files -->
            <option value="output_gene_presence_absence" selected="true">Gene presence absence</option>
            <option value="output_gene_families" selected="true">Gene families</option>
            <option value="output_mean_persistent_duplication" selected="true">Mean persistent duplication</option>
            <option value="output_matrix" selected="true">Matrix</option>
        	<!-- plot files -->
            <option value="output_Ushaped_plot" selected="true">Ushaped plot</option>
            <option value="output_tile_plot" selected="true">Tile plot</option>
        	<!-- graph files -->
            <option value="output_pangenomeGraph_light_gexf" selected="true">PanGenome Graph Light (Gexf)</option>
            <option value="output_pangenomeGraph_gexf" selected="true">PanGenome Graph (Gexf)</option>
            <option value="output_pangenomeGraph_json" selected="true">PanGenome Graph (Json)</option>
        	<!-- advanced files -->
            <option value="output_summarize_spots" selected="true">Summarize spots</option>
            <option value="output_spots" selected="true">Spots</option>
            <option value="output_spot_borders" selected="true">Spot borders</option>
            <option value="output_border_protein_genes" selected="true">Border protein genes</option>
            <option value="output_modules_summary" selected="true">Modules summary</option>
            <option value="output_modules_in_genomes" selected="true">Modules in genomes</option>
            <option value="output_modules_spots" selected="true">Modules spots</option>
            <option value="output_modules_RGP_lists" selected="true">Modules RGP lists</option>
            <option value="output_functional_modules" selected="true">Functional modules</option>
        </param>

    </inputs>

    <outputs>
    
        <data name="functional_modules" format="tsv" label="PPanGGOLiN all on ${on_string}: Functional modules" >
          <filter>advanced_pangenome_optional_files and "output_functional_modules" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="modules_RGP_lists" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules RGP lists" >
          <filter>advanced_pangenome_optional_files and "output_modules_RGP_lists" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="modules_spots" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules spots" >
          <filter>advanced_pangenome_optional_files and "output_modules_spots" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="modules_in_genomes" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules in genomes" >
          <filter>advanced_pangenome_optional_files and "output_modules_in_genomes" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="modules_summary" format="tsv" label="PPanGGOLiN all on ${on_string}: Modules summary" >
          <filter>advanced_pangenome_optional_files and "output_modules_summary" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="border_protein_genes" format="fasta" label="PPanGGOLiN all on ${on_string}: Border protein genes" >
          <filter>advanced_pangenome_optional_files and "output_border_protein_genes" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="spot_borders" format="tsv" label="PPanGGOLiN all on ${on_string}: Spot borders" >
          <filter>advanced_pangenome_optional_files and "output_spot_borders" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="spots" format="tsv" label="PPanGGOLiN all on ${on_string}: Spots" >
          <filter>advanced_pangenome_optional_files and "output_spots" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="summarize_spots" format="tsv" label="PPanGGOLiN all on ${on_string}: Summarized spots" >
          <filter>advanced_pangenome_optional_files and "output_summarize_spots" in advanced_pangenome_optional_files</filter>
        </data>
        
        <data name="pangenomeGraph_json" format="json" label="PPanGGOLiN all on ${on_string}: PanGenome Graph (JSON)" >
          <filter>advanced_pangenome_optional_files and "output_pangenomeGraph_json" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="pangenomeGraph_gexf" format="xml" label="PPanGGOLiN all on ${on_string}: PanGenome Graph (GEXF)" >
          <filter>advanced_pangenome_optional_files and "output_pangenomeGraph_gexf" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="pangenomeGraph_light_gexf" format="xml" label="PPanGGOLiN all on ${on_string}: PanGenome Graph Light (GEXF)" >
          <filter>advanced_pangenome_optional_files and "output_pangenomeGraph_light_gexf" in advanced_pangenome_optional_files</filter>
        </data>

        
        <data name="tile_plot" format="html" label="PPanGGOLiN all on ${on_string}: Tile plot" >
          <filter>advanced_pangenome_optional_files and "output_tile_plot" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="Ushaped_plot" format="html" label="PPanGGOLiN all on ${on_string}: U-shaped plot" >
          <filter>advanced_pangenome_optional_files and "output_Ushaped_plot" in advanced_pangenome_optional_files</filter>
        </data>
        
        <data name="matrix" format="csv" label="PPanGGOLiN all on ${on_string}: Matrix" >
          <filter>advanced_pangenome_optional_files and "output_matrix" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="mean_persistent_duplication" format="tsv" label="PPanGGOLiN all on ${on_string}: Mean persistent duplication" >
          <filter>advanced_pangenome_optional_files and "output_mean_persistent_duplication" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="gene_families" format="tsv" label="PPanGGOLiN all on ${on_string}: Gene families" >
          <filter>advanced_pangenome_optional_files and "output_gene_families" in advanced_pangenome_optional_files</filter>
        </data>
        <data name="gene_presence_absence" format="tsv" label="PPanGGOLiN all on ${on_string}: Gene presence/absence matrix" >
          <filter>advanced_pangenome_optional_files and "output_gene_presence_absence" in advanced_pangenome_optional_files</filter>
        </data>
        
        <data name="ppanggolin_info" format="txt" label="PPanGGOLiN all on ${on_string}: PPanGGOLiN info" />
        <data name="regions_of_genomic_plasticity" format="tsv" label="PPanGGOLiN all on ${on_string}: Regions of genomic plasticity" />
        <data name="pangenome_h5" format="h5" label="PPanGGOLiN all on ${on_string}: PanGenome HDF5 file" />
        <data name="genomes_statistics" format="tsv" label="PPanGGOLiN all on ${on_string}: Genome statistics" />
        
    </outputs>

    <tests>
        <test expect_num_outputs="22">
            <param name="nb_of_partitions" value="3"/>
            <param name="coverage" value="0.8"/>
            <param name="identity" value="0.8"/>
            <param name="translation_table" value="1"/>
            <param name="genomes" value="fasta/AP028611_984801_1194801.fasta.gz,fasta/CP107038_1022972_1232972.fasta.gz,fasta/CP113115_1290693_1440693.fasta.gz,fasta/LN831051_1254175_1464175.fasta.gz,fasta/NC_012467_959209_1169209.fasta.gz" ftype="fasta"/>
            <output name="genomes_statistics" >
                <assert_contents>
                    <has_text text="#soft_core=" />
                    <has_text text="#duplication_margin=" />
                </assert_contents>
            </output>
            <output name="regions_of_genomic_plasticity" >
                <assert_contents>
                    <has_text text="region" />
                </assert_contents>
            </output>
            <output name="ppanggolin_info" >
                <assert_contents>
                    <has_text text="Content:" />
                </assert_contents>
            </output>
            <output name="Ushaped_plot" >
                <assert_contents>
                    <has_text text="html" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="22">
            <param name="nb_of_partitions" value="3"/>
            <param name="coverage" value="0.8"/>
            <param name="identity" value="0.8"/>
            <param name="translation_table" value="1"/>
            <param name="genomes" value="genbank/AP028611_984801_1194801.gb.gz,genbank/CP107038_1022972_1232972.gb.gz,genbank/CP113115_1290693_1440693.gb.gz,genbank/LN831051_1254175_1464175.gb.gz,genbank/NC_012467_959209_1169209.gb.gz" ftype="genbank"/>
            <output name="genomes_statistics" >
                <assert_contents>
                    <has_text text="#soft_core=" />
                    <has_text text="#duplication_margin=" />
                </assert_contents>
            </output>
            <output name="regions_of_genomic_plasticity" >
                <assert_contents>
                    <has_text text="region" />
                </assert_contents>
            </output>
            <output name="ppanggolin_info" >
                <assert_contents>
                    <has_text text="Content:" />
                </assert_contents>
            </output>
        </test>
        <test expect_failure="true">
            <param name="nb_of_partitions" value="3"/>
            <param name="coverage" value="0.8"/>
            <param name="identity" value="0.8"/>
            <param name="translation_table" value="1"/>
            <param name="genomes" value="genbank/AP028611_984801_1194801.gb.gz,genbank/CP107038_1022972_1232972.gb.gz,genbank/CP113115_1290693_1440693.gb.gz,fasta/AP028611_984801_1194801.fasta.gz,fasta/CP107038_1022972_1232972.fasta.gz,fasta/CP113115_1290693_1440693.fasta.gz" ftype="genbank"/>
        </test>
    </tests>
    
    <help><![CDATA[
    
        PPanGGOLiN_ (Gautreau et al. 2020) is a software suite used to create and manipulate prokaryotic pangenomes from a set of either assembled 
        genomic DNA sequences or provided genome annotations. PPanGGOLiN builds pangenomes through a graphical model and a statistical method to partition gene 
        families in persistent, shell and cloud genomes. It integrates both information on protein-coding genes and their genomic neighborhood to build a graph 
        of gene families where each node is a gene family, and each edge is a relation of genetic contiguity. 

	The `ppanggolin all` command generates a partitioned pangenome graph with predicted RGP, spots and modules. Please see the documentation_ on how parameters can be tuned for this command.

        .. _PPanGGOLiN: https://github.com/labgem/PPanGGOLiN
        .. _documentation: https://ppanggolin.readthedocs.io/en/latest/user/QuickUsage/quickAnalyses.html
        
    ]]></help>

    <expand macro="citation"/>
    
</tool>