0
|
1 <tool id="getseqinfo" name="getSequenceInfo tool" version="1.0.1">
|
|
2 <description>allows to extract sequence data and specific information from GenBank </description>
|
|
3
|
|
4 <requirements>
|
|
5 <requirement type="package" version="1.7.2">perl-bioperl</requirement>
|
|
6 <requirement type="package" version="2.32">perl-archive-tar</requirement>
|
|
7 <requirement type="package" version="0.45">perl-file-copy-recursive</requirement>
|
|
8 <requirement type="package" version="2.16">perl-file-path</requirement>
|
|
9 <requirement type="package" version="2.79">perl-net-ftp</requirement>
|
|
10 <requirement type="package" version="2.064">perl-io-uncompress-gunzip</requirement>
|
|
11 <requirement type="package" version="6.15">perl-lwp-simple</requirement>
|
|
12 <requirement type="package" version="1.38_03">perl-posix</requirement>
|
|
13 </requirements>
|
|
14
|
|
15 <!--<command interpreter="bash">
|
|
16 ./getSequenceInfo.sh genbank $availableKingdom $assemblylevel $species $quantity $output1
|
|
17 perl $directory/getSequenceInfo.pl -directory $dir -k $availableKingdoms -l $assemblylevel -s $species -n $quantity -o $output
|
|
18
|
|
19 mv results/folder/result/summary.html $summary_html &&
|
|
20 mv results/folder/result/summary.xls $summary_tsv &&
|
|
21
|
|
22 (mv results/folder/result/Assemby/*.fna ./assembly/ 2> /dev/null || true) &&
|
|
23
|
|
24 (mv results/folder/result/GenBank/*.gbk ./genbanks/ 2> /dev/null || true)
|
|
25
|
|
26 var1="genbank_";var2=$availableKingdoms_assembly_summary.txt"
|
|
27
|
|
28 if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
|
|
29 mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
|
|
30 fi
|
|
31
|
|
32
|
|
33 </command>-->
|
|
34
|
|
35 <command detect_errors="aggressive"><![CDATA[
|
|
36 perl $__tool_directory__/getSequenceInfo.pl -k "$availableKingdoms"
|
|
37 #if $search == "1"
|
|
38 -taxid "$species"
|
|
39 #else if $search == "0"
|
|
40 -s "$species"
|
|
41 #end if
|
|
42 -level "$assemblyLevel"
|
|
43 #if $component == "" and $keyword != ''
|
|
44 -c "$keyword"
|
|
45 #else if $component != "" and $keyword == ""
|
|
46 -c "$component"
|
|
47 #else if $component != "" and $keyword != ""
|
|
48 -c "${component},${keyword}"
|
|
49 #end if
|
|
50 -n $quantity -date "$date"
|
|
51 -path "${__tool_directory__}/genbank_${availableKingdoms}_assembly_summary.txt" -o ./results &&
|
|
52 mkdir ./assembly/ &&
|
|
53 mkdir ./genbanks/ &&
|
|
54 mkdir ./reports/ &&
|
|
55 mv ./results/result/summary.xls $summary_tsv &&
|
|
56 mv ./results/result/GenBank/*.gbff ./genbanks/ &&
|
|
57 mv ./results/result/Assembly/*.fna ./assembly/ &&
|
|
58 mv ./results/result/Report/*.txt ./reports/ &&
|
|
59 if [ -f "./results/result/${component}_folder/${component}.fasta" ]; then
|
|
60 mv ./results/result/${component}_folder/${component}.fasta ./assembly/${component}.fna;
|
|
61 fi &&
|
|
62 if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
|
|
63 mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
|
|
64 fi
|
|
65 && rm -r ./results/
|
|
66
|
|
67 ]]></command>
|
|
68
|
|
69
|
|
70 <inputs>
|
|
71
|
|
72 <param name="availableKingdoms" type="select" label="Select the kingdom:">
|
|
73 <option value="bacteria">bacteria</option>
|
|
74 <option value="protozoa">protozoa</option>
|
|
75 <option value="viral">viral</option>
|
|
76 </param>
|
|
77
|
|
78 <param name="search" type="select" display="radio" label="Choose between Species (organism's name) or NCBI Taxonomy ID (TaxID)">
|
|
79 <option value="0">Species</option>
|
|
80 <option value="1">TaxID</option>
|
|
81 </param>
|
|
82 <param name="species" type="text" area="false" label="Species or NCBI TaxID (eg. Escherichia coli):"/>
|
|
83
|
|
84 <param name="assemblyLevel" type="select" label="Assembly level:">
|
|
85 <option value="Complete Genome">Complete Genome</option>
|
|
86 <option value="Complete Genome,Chromosome,Scaffold,Contig">All</option>
|
|
87 </param>
|
|
88
|
|
89 <param name="component" type="select" label="Sequence component:">
|
|
90 <option value=""></option>
|
|
91 <option value="plasmid">Plasmid</option>
|
|
92 <option value="chromosome">Chromosome</option>
|
|
93 <!--<option value="keyword"><param name="keyword" type="text" area="false" label="OR use a keyword:"/></option>-->
|
|
94 </param>
|
|
95 <param name="keyword" type="text" area="false" label="OR use a keyword (sequences having a description containing this keyword will be dowloaded in a separate file):"/>
|
|
96
|
|
97 <param name="quantity" type="text" area="false" value="3" label="Number of assemblies:"/>
|
|
98 <param name="date" type="text" value="0000-00-00" area="false" label="From Release date (yyyy-mm-dd):"/>
|
|
99
|
|
100
|
|
101 </inputs>
|
|
102
|
|
103 <outputs>
|
|
104 <collection name="output" type="list" label="${tool.name}: GenBank files">
|
|
105 <discover_datasets pattern="(?P<name>.+)\.gbff$" directory="./genbanks" format="genbank"/>
|
|
106 </collection>
|
|
107 <collection name="outputreport" type="list" label="${tool.name}: Report files">
|
|
108 <discover_datasets pattern="(?P<name>.+)\.txt$" directory="./reports" format="txt"/>
|
|
109 </collection>
|
|
110 <collection name="outputfasta" type="list" label="${tool.name}: FASTA files">
|
|
111 <discover_datasets pattern="(?P<name>.+)\.fna$" directory="./assembly" format="fasta"/>
|
|
112 </collection>
|
|
113
|
|
114 <!--<collection name="assemblies" type="list" label="${tool.name} on ${input.element_identifier}: Assembly">
|
|
115 <discover_datasets pattern="__name_and_ext__" directory="assembly" />
|
|
116 </collection>
|
|
117 <collection name="genbanks" type="list" label="${tool.name} on ${input.element_identifier}: GenBank">
|
|
118 <discover_datasets pattern="__name_and_ext__" directory="./genbanks" />
|
|
119 </collection>-->
|
|
120 <!--<data format="html" name="summary_html" label="summary.html"/>-->
|
|
121 <data format="tabular" name="summary_tsv" label="${tool.name}: summary.tsv"/>
|
|
122 </outputs>
|
|
123
|
|
124
|
|
125 <help><![CDATA[
|
|
126 This tool allows to download sequences in FASTA or GenBank format, and retrieve specific information (such as country, pubmed ID, host, ...).
|
|
127
|
|
128 Please note that NCBI assembly_summary files used to download the sequences have been uploaded on September 6th, 2021.
|
|
129
|
|
130 To get more recent assembly_summary files, please use the command line or the GUI version of the tool.
|
|
131
|
|
132 GitHub: https://github.com/karubiotools/getSequenceInfo
|
|
133 ]]></help>
|
|
134
|
|
135 </tool>
|
|
136
|