Mercurial > repos > fubar > egapx_runner
annotate egapx_runner.xml @ 6:a7304162d737 draft
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
author | fubar |
---|---|
date | Sun, 04 Aug 2024 02:30:36 +0000 |
parents | 6effccc966d0 |
children | 9c778770514f |
rev | line source |
---|---|
1
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
1 <tool name="egapx_runner" id="egapx_runner" version="6.0.1" profile="22.05"> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
2 <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay--> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
3 <!--Created by toolfactory@galaxy.org at 03/08/2024 10:40:32 using the Galaxy Tool Factory.--> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
4 <description>Runs egapx</description> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
5 <requirements> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
6 <requirement version="3.12.3" type="package">python</requirement> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
7 <requirement version="24.04.4-0" type="package">nextflow</requirement> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
8 <requirement version="6.0.1" type="package">pyyaml</requirement> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
9 </requirements> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
10 <version_command><![CDATA[echo "6.0.1"]]></version_command> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
11 <command><![CDATA[mkdir -p ./egapx_config && |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
12 #set econfigfile = $econfig + '.config' |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
13 cp '$__tool_directory__/ui/assets/config/executor/$econfigfile' ./egapx_config/ && |
5
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
14 python '$__tool_directory__/ui/egapx.py' '$yamlconfig' -e '$econfig' -o 'egapx_out']]></command> |
1
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
15 <inputs> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
16 <param name="yamlconfig" type="data" optional="false" label="egapx configuration yaml file to execute" help="" format="yaml,txt" multiple="false"/> |
4
6592ae57bb8b
planemo upload for repository https://github.com/ncbi/egapx commit cb2d8304fde9fad4348296c3a51b7992ac5b83bb
fubar
parents:
2
diff
changeset
|
17 <param name="econfig" type="select" label="Workflow run configuration to suit the machine in use" help="Docker minimal will run the sample minimal dustmite yaml"> |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
18 <option value="docker_minimal">Docker_minimal: supports only the minimal dust mite example yaml using 6GB and 4 cores</option> |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
19 <option value="singularity">Singularity: requires at least 128GB ram and 32 cores. 256GB and 64 cores recommended</option> |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
20 <option value="docker">Docker: requires at least 128GB ram and 32 cores. 256GB and 64 cores recommended</option> |
1
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
21 </param> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
22 </inputs> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
23 <outputs> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
24 <collection name="egapx_out" type="list" label="Outputs from egapx"> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
25 <discover_datasets pattern="__name_and_ext__" directory="egapx_out" visible="false"/> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
26 </collection> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
27 </outputs> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
28 |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
29 |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
30 <tests> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
31 <test> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
32 <output_collection name="egapx_out" count="8"/> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
33 <param name="yamlconfig" value="yamlconfig_sample"/> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
34 <param name="econfig" value="docker_minimal"/> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
35 </test> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
36 </tests> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
37 |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
38 |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
39 |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
40 <help><![CDATA[ |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
41 Galaxy tool wrapping the Eukaryotic Genome Annotation Pipeline (EGAPx) |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
42 ================================================================================================= |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
43 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
44 .. class:: warningmark |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
45 |
5
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
46 **Proof of concept: a quick hack to run a NF workflow inside a specialised Galaxy tool wrapper** |
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
47 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
48 EGAPx is a big, complicated Nextflow workflow, challenging and costly to re-implement **properly**, requiring dozens of new tools and replicating a lot of |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
49 complicated *groovy* workflow logic. |
5
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
50 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
51 It is also very new and in rapid development. Investing developer effort and keeping updated as EGAPx changes rapidly may be *inefficient of developer resources*. |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
52 |
5
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
53 This wrapper is designed to allow measuring how *inefficient* it is in terms of computing resource utilisation, in comparison to the developer effort |
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
54 required to convert Nextflow DDL into tools and WF logic. Balancing these competing requirements is a fundamental Galaxy challenge. |
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
55 |
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
56 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
57 EGAPx requires very substantial resources to run with real data. *128GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended. |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
58 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
59 A special minimal example that can be run in 6GB with 4 cores is provided as a yaml configuration and is used for the tool test. |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
60 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
61 In this implementation, the user must supply a yaml configuration file as initial proof of concept. |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
62 History inputs and even a yaml editor might be provided in future. |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
63 |
5
6effccc966d0
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents:
4
diff
changeset
|
64 The NF workflow to tool model tested here may be applicable to other NF workflows that take a single configuration yaml. |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
65 |
6
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
66 .. class:: warningmark |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
67 |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
68 The computational resource cost of typing the wrong SRA identifiers into a tool form is potentially enormous with this tool! |
a7304162d737
planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents:
5
diff
changeset
|
69 |
2
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
70 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
71 Sample yaml configurations |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
72 =========================== |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
73 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
74 YAML sample configurations can be uploaded into your Galaxy history from the `EGAPx github repository <https://github.com/ncbi/egapx/tree/main/examples/>`_. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
75 The simplest possible example is shown below - can be cut/paste into a history dataset in the upload tool. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
76 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
77 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
78 *./examples/input_D_farinae_small.yaml* is included in the examples linked above. RNA-seq data is provided as URI to the reads FASTA files. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
79 These FASTA files are a sampling of the reads from the complete SRA read files to expedite testing. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
80 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
81 :: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
82 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
83 genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
84 taxid: 6954 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
85 reads: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
86 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR8506572.1 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
87 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR8506572.2 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
88 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR9005248.1 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
89 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR9005248.2 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
90 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
91 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
92 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
93 Purpose |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
94 ======== |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
95 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
96 **This is not intended for production** |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
97 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
98 Just a proof of concept. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
99 It is possibly too inefficient to be useful although it may turn out not to be a problem if run on a dedicated workstation. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
100 At least the efficiency can now be more easily estimated. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
101 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
102 This tool is not recommended for public deployment because of the resource demands. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
103 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
104 EGAPx Overview |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
105 =============== |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
106 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
107 .. image:: $PATH_TO_IMAGES/Pipeline_sm_ncRNA_CAGE_80pct.png |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
108 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
109 **Warning:** |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
110 The current version is an alpha release with limited features and organism scope to collect initial feedback on execution. Outputs are not yet complete and not intended for production use. Please open a GitHub [Issue](https://github.com/ncbi/egapx/issues) if you encounter any problems with EGAPx. You can also write to cgr@nlm.nih.gov to give us your feedback or if you have any questions. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
111 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
112 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/). |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
113 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
114 EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models. In the second step, these predictions are further supplemented by _ab-initio_ predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
115 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
116 **Security Notice:** |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
117 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
118 EGAPx has dependencies in and outside of its execution path that include several thousand files from the [NCBI C++ toolkit](https://www.ncbi.nlm.nih.gov/toolkit), and more than a million total lines of code. Static Application Security Testing has shown a small number of verified buffer overrun security vulnerabilities. Users should consult with their organizational security team on risk and if there is concern, consider mitigating options like running via VM or cloud instance. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
119 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
120 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
121 *To specify an array of NCBI SRA datasets in yaml* |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
122 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
123 :: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
124 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
125 reads: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
126 - SRR8506572 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
127 - SRR9005248 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
128 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
129 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
130 *To specify an SRA entrez query* |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
131 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
132 :: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
133 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
134 reads: 'txid6954[Organism] AND biomol_transcript[properties] NOT SRS024887[Accession] AND (SRR8506572[Accession] OR SRR9005248[Accession] )' |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
135 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
136 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
137 **Note:** Both the above examples will have more RNA-seq data than the `input_D_farinae_small.yaml` example. To make sure the entrez query does not produce a large number of SRA runs, please run it first at the [NCBI SRA page](https://www.ncbi.nlm.nih.gov/sra). If there are too many SRA runs, then select a few of them and list it in the input yaml. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
138 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
139 Output |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
140 ======= |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
141 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
142 EGAPx output will appear as a collection in the user history. The main annotation file is called *accept.gff*. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
143 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
144 :: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
145 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
146 accept.gff |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
147 annot_builder_output |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
148 nextflow.log |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
149 run.report.html |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
150 run.timeline.html |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
151 run.trace.txt |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
152 run_params.yaml |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
153 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
154 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
155 The *nextflow.log* is the log file that captures all the process information and their work directories. ``run_params.yaml`` has all the parameters that were used in the EGAPx run. More information about the process time and resources can be found in the other run* files. |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
156 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
157 ## Intermediate files |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
158 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
159 In the log, each line denotes the process that completed in the workflow. The first column (_e.g._ `[96/621c4b]`) is the subdirectory where the intermediate output files and logs are found for the process in the same line, _i.e._, `egapx:miniprot:run_miniprot`. To see the intermediate files for that process, you can go to the work directory path that you had supplied and traverse to the subdirectory `96/621c4b`: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
160 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
161 :: |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
162 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
163 $ aws s3 ls s3://temp_datapath/D_farinae/96/ |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
164 PRE 06834b76c8d7ceb8c97d2ccf75cda4/ |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
165 PRE 621c4ba4e6e87a4d869c696fe50034/ |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
166 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/ |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
167 PRE output/ |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
168 2024-03-27 11:19:18 0 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
169 2024-03-27 11:19:28 6 .command.begin |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
170 2024-03-27 11:20:24 762 .command.err |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
171 2024-03-27 11:20:26 762 .command.log |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
172 2024-03-27 11:20:23 0 .command.out |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
173 2024-03-27 11:19:18 13103 .command.run |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
174 2024-03-27 11:19:18 129 .command.sh |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
175 2024-03-27 11:20:24 276 .command.trace |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
176 2024-03-27 11:20:25 1 .exitcode |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
177 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/output/ |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
178 2024-03-27 11:20:24 17127134 aligns.paf |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
179 |
a3b158471bd3
planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents:
1
diff
changeset
|
180 |
1
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
181 ]]></help> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
182 <citations> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
183 <citation type="doi">10.1093/bioinformatics/bts573</citation> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
184 </citations> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
185 </tool> |
c8e1543546f8
planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff
changeset
|
186 |