annotate PanExplorer.xml @ 19:b8ea3d2945b5 draft

Uploaded
author dereeper
date Mon, 27 Jan 2025 13:02:48 +0000
parents 716821f47909
children e23149f1f55c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
17
db83eb7c8f78 Uploaded
dereeper
parents: 16
diff changeset
1 <tool id="pangenome_explorer" name="PanExplorer2" version="2.0">
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
2 <description> Bacterial pan-genome analysis </description>
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
3 <requirements>
7
2c65d4257fe6 Uploaded
dereeper
parents: 6
diff changeset
4 <requirement type="package" version="1.30">perl-yaml</requirement>
2c65d4257fe6 Uploaded
dereeper
parents: 6
diff changeset
5 <requirement type="package" version="3.8.7">singularity</requirement>
18
716821f47909 Uploaded
dereeper
parents: 17
diff changeset
6 <container type="docker">docker.io/dereeper/panexplorer_workflow:latest</container>
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
7 </requirements>
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
8
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
9 <command><![CDATA[
7
2c65d4257fe6 Uploaded
dereeper
parents: 6
diff changeset
10
16
1e0698511c9f Uploaded
dereeper
parents: 15
diff changeset
11 export PANEX_PATH=/usr/local/bin/PanExplorer_workflow;
14
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
12
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
13 #if str($mode.mode) == "accessions":
16
1e0698511c9f Uploaded
dereeper
parents: 15
diff changeset
14 perl ${__tool_directory__}/generateConfig.pl 'None' '$input' config.yaml 'None';
14
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
15 #else if str($mode.mode) == "genbanks":
16
1e0698511c9f Uploaded
dereeper
parents: 15
diff changeset
16 perl ${__tool_directory__}/generateConfig.pl '$private_genomes' 'None' config.yaml 'None';
14
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
17 #else if str($mode.mode) == "fasta":
16
1e0698511c9f Uploaded
dereeper
parents: 15
diff changeset
18 perl ${__tool_directory__}/generateConfig.pl '$private_genomes' 'None' config.yaml '$private_genomes_fasta';
14
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
19 #end if
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
20
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
21 cat config.yaml >$logfile;
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
22
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
23 touch $vcf;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
24
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
25 #if $software=="pgap"
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
26 snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_PGAP_heatmap_upset_COG >>$logfile 2>&1;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
27 #else if $software=="roary"
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
28 snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_roary_heatmap_upset_COG >>$logfile 2>&1;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
29 #else if $software=="orthofinder"
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
30 snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_orthofinder_heatmap_upset >>$logfile 2>&1;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
31 #else if $software=="cactus"
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
32 snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_cactus_heatmap_upset_COG2 >>$logfile 2>&1;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
33 #else if $software=="pggb"
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
34 snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_pggb_heatmap_upset_COG >>$logfile 2>&1;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
35 #else
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
36 snakemake --cores 1 --config identity=$min_identity -s \$PANEX_PATH/Snakemake_files/Snakefile_wget_panacota_heatmap_upset_COG >>$logfile 2>&1;
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
37 #end if
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
38
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
39
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
40 ]]></command>
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
41
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
42
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
43 <inputs>
14
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
44 <conditional name="mode">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
45 <param name = "mode" type="select" label="What is your inputs?">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
46 <option value="accessions">Prokaryote genomes: List of Genbank assembly accessions (GCA)</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
47 <option value="genbanks">Prokaryote genomes: Genbank files</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
48 <option value="fasta">Eukaryote genomes: FASTA + GFF files</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
49 </param>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
50 <when value="accessions">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
51 <param name="input" type="text" multiple="true" label="List of genbank identifiers" help="Coma separated list (ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
52 <param type="select" name="software" label="Choose the pan-genome software">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
53 <option value="roary">Roary</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
54 <option value="panacota">PanACoTA</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
55 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
56 </param>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
57 </when>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
58 <when value="genbanks">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
59 <param name="private_genomes" type="data" format="zip" label="Zip of genbank files" optional="true"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
60 <param type="select" name="software" label="Choose the pan-genome software">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
61 <option value="roary">Roary</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
62 <option value="panacota">PanACoTA</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
63 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
64 </param>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
65 </when>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
66 <when value="fasta">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
67 <param name="private_genomes_fasta" type="data" format="zip" label="Zip of Fasta files" optional="true"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
68 <param name="private_genomes" type="data" format="zip" label="Zip of GFF files" optional="true"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
69 <param type="select" name="software" label="Choose the pan-genome software">
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
70 <option value="orthofinder">OrthoFinder</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
71 <option value="cactus">Minigraph-Cactus</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
72 <option value="pggb">PanGenome Graph Builder (PGGB)</option>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
73 </param>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
74 </when>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
75 </conditional>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
76
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
77 <param name="min_identity" type="text" value="80" label="Minimum percentage identity for BlastP" />
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
78 </inputs>
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
79
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
80 <outputs>
19
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
81 <data from_work_dir="outputs/pav_matrix.tsv" format="txt" name="output" label="Pangenome presence absence matrix"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
82 <data from_work_dir="outputs/heatmap.svg.complete.pdf.distance_matrix.hclust.newick" format="newick" name="njtree" label="PanBased NJ tree"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
83 <data from_work_dir="outputs/genomes/genes.txt" format="txt" name="genes" label="Genes"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
84 <data from_work_dir="outputs/cog_output.txt" format="txt" name="cogfile" label="COG assignation"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
85 <data from_work_dir="outputs/GCskew.txt" format="txt" name="gcfile" label="GC_percent"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
86 <data from_work_dir="outputs/upsetr.svg" format="svg" name="upset" label="Upset Diagram"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
87 <data from_work_dir="outputs/heatmap.svg.gz" format="svg" name="heatmap" label="Presence Absence Heatmap"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
88 <data from_work_dir="outputs/heatmap.svg.heatmap_plotly.html" format="html" name="heatmap_html" label="Presence Absence Heatmap interactive"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
89 <data from_work_dir="outputs/cog_stats.txt" format="tabular" name="outcog_stat" label="COG category counts"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
90 <data from_work_dir="outputs/cog_stats2.txt" format="tabular" name="outcog_stat2" label="COG category 2 counts"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
91 <data from_work_dir="outputs/cog_of_clusters.txt" format="tabular" name="outcog_clusters" label="COG of clusters"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
92 <data from_work_dir="outputs/fastani.out.matrix.complete" format="tabular" name="fastani" label="ANI" />
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
93 <data from_work_dir="outputs/fastani.out.svg" format="svg" name="ani_svg" label="ANI heatmap" />
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
94 <data from_work_dir="outputs/rarefaction_curves.txt" format="txt" name="rarefaction_curves" label="Rarefaction curves data"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
95 <data from_work_dir="outputs/rarefaction_curves.svg" format="svg" name="rarefaction_curves_svg" label="Rarefaction curves"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
96 <data from_work_dir="outputs/heaps.tsv" format="txt" name="heaps" label="Heaps law alpha"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
97 <data from_work_dir="outputs/heatmap.svg.complete.pdf.distance_matrix.txt" format="txt" name="distance_matrix" label="Accessory based distance matrix"/>
b8ea3d2945b5 Uploaded
dereeper
parents: 18
diff changeset
98 <data from_work_dir="outputs/all_genomes.vcf" format="vcf" name="vcf" label="VCF file"/>
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
99 <data format="txt" name="logfile" label="Logfile"/>
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
100 <data format="txt" name="roary_log" label="Roary Logfile"/>
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
101 </outputs>
14
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
102 <tests>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
103 <test>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
104 <param name="input" value="GCA_000007385.1,GCA_000010025.1,GCA_000019585.2"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
105 <param name="min_identity" value="80"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
106 <param name="software" value="panacota"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
107 <param name="private_genomes" value=""/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
108 <param name="private_genomes_fasta" value=""/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
109 <output name="distance_matrix" value="Accessory_based_distance_matrix.txt"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
110 <output name="fastani" value="ANI.txt"/>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
111 </test>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
112 </tests>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
113 <help>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
114
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
115 PanExplorer
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
116 =======
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
117
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
118 PanExplorer workflow is a snakemake worklow that can be run in the backend of the PanExplorer web application.
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
119
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
120 Homepage: https://panexplorer.southgreen.fr/
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
121
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
122 It allows to perform a pan-genome analysis using published and annotated bacteria genomes, using different tools that can be invoked: Roary, PGAP, PanACoTA.
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
123
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
124 Pangenome graph builder softwares have been implemented recently in the pipeline: Minigraph-Cactus and PGGB (PanGenome Graph Builder)
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
125
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
126 It provides a presence/absence matrix of genes, an UpsetR Diagram for synthetizing the matrix information and a COG assignation summary for each strain.
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
127
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
128 Please visit the GitHub page for the PanExplorer workflow at: https://github.com/SouthGreenPlatform/PanExplorer_workflow
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
129
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
130
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
131 Inputs
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
132 ------
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
133
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
134 Inputs can be provided as one of the following:
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
135
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
136 * **List of genbank assembly identifiers** comma-separated(ex: GCA_000007385.1,GCA_000010025.1,GCA_000019585.2)
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
137 * **Zip of genbank files** They must include the gene annotation and the complete sequence data
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
138 * **Zip of FASTA file of genomes + Zip of GFF annotation files**: In order to make the association between sequence and annotation, they must be named with the same basename as follows: genome1.fasta, genome1.gff, myspeciesXXX.fasta, myspeciesXXX.gff...
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
139
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
140
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
141
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
142 Outputs
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
143 ------
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
144
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
145 Among the outputs:
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
146
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
147 * **Pangenome presence absence matrix** Pangene presence/absence matrix indicating the PAV (Presence Absence Variation) of clustered genes.
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
148 * **PanBased NJ tree** Distance tree based on PAV data
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
149 * **Heaps law alpha** Estimating if a pan-genome is open or closed based on a Heaps law model.
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
150 * **Rarefaction curves** A rarefaction curve is the cumulative number of gene clusters we observe as more and more genomes are being considered
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
151 * **ANI** Average Nucleotide Identity between genomes
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
152 * **ANI heatmap** image as SVG
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
153 * **VCF file** If a pan-genome graph software has been selected, it provides a VCF of variations among all samples.
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
154
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
155
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
156
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
157 </help>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
158 <citations>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
159 <citation type="doi">10.1093/bioinformatics/btac504</citation>
5a5c9a6b047b Uploaded
dereeper
parents: 13
diff changeset
160 </citations>
2
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
161
97e4e3e818b6 Uploaded
dereeper
parents:
diff changeset
162 </tool>