annotate getSequenceInfo.xml @ 0:19ae17458c14 draft default tip

Uploaded
author dcouvin
date Wed, 15 Sep 2021 21:35:09 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
1 <tool id="getseqinfo" name="getSequenceInfo tool" version="1.0.1">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
2 <description>allows to extract sequence data and specific information from GenBank </description>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
3
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
4 <requirements>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
5 <requirement type="package" version="1.7.2">perl-bioperl</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
6 <requirement type="package" version="2.32">perl-archive-tar</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
7 <requirement type="package" version="0.45">perl-file-copy-recursive</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
8 <requirement type="package" version="2.16">perl-file-path</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
9 <requirement type="package" version="2.79">perl-net-ftp</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
10 <requirement type="package" version="2.064">perl-io-uncompress-gunzip</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
11 <requirement type="package" version="6.15">perl-lwp-simple</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
12 <requirement type="package" version="1.38_03">perl-posix</requirement>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
13 </requirements>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
14
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
15 <!--<command interpreter="bash">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
16 ./getSequenceInfo.sh genbank $availableKingdom $assemblylevel $species $quantity $output1
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
17 perl $directory/getSequenceInfo.pl -directory $dir -k $availableKingdoms -l $assemblylevel -s $species -n $quantity -o $output
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
18
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
19 mv results/folder/result/summary.html $summary_html &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
20 mv results/folder/result/summary.xls $summary_tsv &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
21
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
22 (mv results/folder/result/Assemby/*.fna ./assembly/ 2> /dev/null || true) &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
23
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
24 (mv results/folder/result/GenBank/*.gbk ./genbanks/ 2> /dev/null || true)
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
25
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
26 var1="genbank_";var2=$availableKingdoms_assembly_summary.txt"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
27
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
28 if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
29 mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
30 fi
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
31
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
32
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
33 </command>-->
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
34
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
35 <command detect_errors="aggressive"><![CDATA[
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
36 perl $__tool_directory__/getSequenceInfo.pl -k "$availableKingdoms"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
37 #if $search == "1"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
38 -taxid "$species"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
39 #else if $search == "0"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
40 -s "$species"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
41 #end if
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
42 -level "$assemblyLevel"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
43 #if $component == "" and $keyword != ''
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
44 -c "$keyword"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
45 #else if $component != "" and $keyword == ""
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
46 -c "$component"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
47 #else if $component != "" and $keyword != ""
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
48 -c "${component},${keyword}"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
49 #end if
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
50 -n $quantity -date "$date"
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
51 -path "${__tool_directory__}/genbank_${availableKingdoms}_assembly_summary.txt" -o ./results &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
52 mkdir ./assembly/ &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
53 mkdir ./genbanks/ &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
54 mkdir ./reports/ &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
55 mv ./results/result/summary.xls $summary_tsv &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
56 mv ./results/result/GenBank/*.gbff ./genbanks/ &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
57 mv ./results/result/Assembly/*.fna ./assembly/ &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
58 mv ./results/result/Report/*.txt ./reports/ &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
59 if [ -f "./results/result/${component}_folder/${component}.fasta" ]; then
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
60 mv ./results/result/${component}_folder/${component}.fasta ./assembly/${component}.fna;
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
61 fi &&
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
62 if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
63 mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
64 fi
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
65 && rm -r ./results/
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
66
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
67 ]]></command>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
68
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
69
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
70 <inputs>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
71
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
72 <param name="availableKingdoms" type="select" label="Select the kingdom:">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
73 <option value="bacteria">bacteria</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
74 <option value="protozoa">protozoa</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
75 <option value="viral">viral</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
76 </param>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
77
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
78 <param name="search" type="select" display="radio" label="Choose between Species (organism's name) or NCBI Taxonomy ID (TaxID)">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
79 <option value="0">Species</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
80 <option value="1">TaxID</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
81 </param>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
82 <param name="species" type="text" area="false" label="Species or NCBI TaxID (eg. Escherichia coli):"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
83
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
84 <param name="assemblyLevel" type="select" label="Assembly level:">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
85 <option value="Complete Genome">Complete Genome</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
86 <option value="Complete Genome,Chromosome,Scaffold,Contig">All</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
87 </param>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
88
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
89 <param name="component" type="select" label="Sequence component:">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
90 <option value=""></option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
91 <option value="plasmid">Plasmid</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
92 <option value="chromosome">Chromosome</option>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
93 <!--<option value="keyword"><param name="keyword" type="text" area="false" label="OR use a keyword:"/></option>-->
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
94 </param>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
95 <param name="keyword" type="text" area="false" label="OR use a keyword (sequences having a description containing this keyword will be dowloaded in a separate file):"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
96
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
97 <param name="quantity" type="text" area="false" value="3" label="Number of assemblies:"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
98 <param name="date" type="text" value="0000-00-00" area="false" label="From Release date (yyyy-mm-dd):"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
99
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
100
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
101 </inputs>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
102
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
103 <outputs>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
104 <collection name="output" type="list" label="${tool.name}: GenBank files">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
105 <discover_datasets pattern="(?P&lt;name&gt;.+)\.gbff$" directory="./genbanks" format="genbank"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
106 </collection>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
107 <collection name="outputreport" type="list" label="${tool.name}: Report files">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
108 <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" directory="./reports" format="txt"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
109 </collection>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
110 <collection name="outputfasta" type="list" label="${tool.name}: FASTA files">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
111 <discover_datasets pattern="(?P&lt;name&gt;.+)\.fna$" directory="./assembly" format="fasta"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
112 </collection>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
113
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
114 <!--<collection name="assemblies" type="list" label="${tool.name} on ${input.element_identifier}: Assembly">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
115 <discover_datasets pattern="__name_and_ext__" directory="assembly" />
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
116 </collection>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
117 <collection name="genbanks" type="list" label="${tool.name} on ${input.element_identifier}: GenBank">
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
118 <discover_datasets pattern="__name_and_ext__" directory="./genbanks" />
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
119 </collection>-->
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
120 <!--<data format="html" name="summary_html" label="summary.html"/>-->
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
121 <data format="tabular" name="summary_tsv" label="${tool.name}: summary.tsv"/>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
122 </outputs>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
123
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
124
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
125 <help><![CDATA[
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
126 This tool allows to download sequences in FASTA or GenBank format, and retrieve specific information (such as country, pubmed ID, host, ...).
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
127
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
128 Please note that NCBI assembly_summary files used to download the sequences have been uploaded on September 6th, 2021.
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
129
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
130 To get more recent assembly_summary files, please use the command line or the GUI version of the tool.
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
131
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
132 GitHub: https://github.com/karubiotools/getSequenceInfo
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
133 ]]></help>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
134
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
135 </tool>
19ae17458c14 Uploaded
dcouvin
parents:
diff changeset
136