annotate blasttoolssearch/blasttoolssearch.xml @ 1:5687b8f1ad69 draft

fix datatype for summary
author fubar
date Wed, 19 Jul 2023 05:48:53 +0000
parents ee581a90a85e
children e213ae40f480
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
1 <tool name="blasttoolssearch" id="blasttoolssearch" version="3.0">
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
2 <!--Source in git at: https://github.com/fubar2/galaxy-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
3 <!--Created by toolfactory@galaxy.org at 19/07/2023 12:39:19 using the Galaxy Tool Factory.-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
4 <description>Runs a legacy Java jar called blasttools from https://github.com/schmidda/blast-tools/tree/master</description>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
5 <requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
6 <requirement type="package">csvtk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
7 <requirement type="package">openjdk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
8 </requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
9 <stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
10 <exit_code range="1:" level="fatal"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
11 </stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
12 <version_command><![CDATA[echo "3.0"]]></version_command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
13 <command><![CDATA[bash
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
14 $runme
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
15 $blastn_search_outputs
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
16 $__tool_directory__/BlastTools.jar
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
17 $summary_viruses_viroids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
18 ]]></command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
19 <configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
20 <configfile name="runme"><![CDATA[#raw
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
21
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
22
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
23 ## eResearch Office, QUT
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
24 ## Created: 31 March 2021
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
25 ## Last modified: 28 September 2022
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
26 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
27 ## Usage: ./run_VirReport_Summary.sh
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
28 ## changed to accept a single input file name passed as $1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
29 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
30 ## July 18 2023
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
31
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
32 dataPath=${PWD}
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
33
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
34 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
35 # The script will Look for all files with the suffix *.tabular
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
36
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
37 #Processing tabular files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
38 file=$1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
39
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
40 var=$(basename $file)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
41
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
42 #STEP0: fetch Top 1 Hits
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
43 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
44 for i in `cat ${var}.top1.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
45 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
46 echo "fetching top hits..." $i;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
47 grep $i $file | head -1 >> ${var}.top1Hits.txt;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
48 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
49
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
50 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
51 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
52 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
53
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
54 #STEP2: summarise the GA blastN files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
55 java -jar $2 -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
56 #filter virus/viroid/endo
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
57 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
58
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
59 #STEP3: fetch unique names from Blast summary reports
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
60 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
61
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
62 #STEP4: retrieve the best hit for each virus/viroid
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
63 echo "processing top hits ..."
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
64 touch ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
65 for id in `cat ${var}_uniq.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
66 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
67 #print on the screen the name of the virus/viroids to search
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
68 #echo "fetching species matches ..." $id
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
69
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
70 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
71 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
72 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
73
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
74 #print the header of the inital summary_blastn file
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
75 cat summary_${var}.txt | head -1 > header
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
76 #report 1
1
5687b8f1ad69 fix datatype for summary
fubar
parents: 0
diff changeset
77 echo "#" > $3
5687b8f1ad69 fix datatype for summary
fubar
parents: 0
diff changeset
78 cat header ${var}_filtered.txt >> $3
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
79
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
80 #end raw]]></configfile>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
81 </configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
82 <inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
83 <param name="blastn_search_outputs" type="data" optional="false" label="blastn_search_outputs" help="" format="tabular" multiple="false"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
84 </inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
85 <outputs>
1
5687b8f1ad69 fix datatype for summary
fubar
parents: 0
diff changeset
86 <data name="summary_viruses_viroids" format="tabular" label="summary_viruses_viroids" hidden="false"/>
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
87 </outputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
88 <tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
89 <test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
90 <output name="summary_viruses_viroids" value="summary_viruses_viroids_sample" compare="diff" lines_diff="0"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
91 <param name="blastn_search_outputs" value="blastn_search_outputs_sample"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
92 </test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
93 </tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
94 <help><![CDATA[
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
95
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
96 **What it Does**
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
97
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
98 Wraps https://github.com/schmidda/blast-tools/tree/master as a Galaxy tool as a demonstration for Roberto Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
99
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
100
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
101
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
102 ------
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
103
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
104
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
105 Script::
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
106
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
107 ## eResearch Office, QUT
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
108 ## Created: 31 March 2021
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
109 ## Last modified: 28 September 2022
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
110 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
111 ## Usage: ./run_VirReport_Summary.sh
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
112 ## changed to accept a single input file name passed as $1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
113 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
114 ## July 18 2023
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
115 dataPath=${PWD}
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
116 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
117 # The script will Look for all files with the suffix *.tabular
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
118 #Processing tabular files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
119 file=$1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
120 var=$(basename $file)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
121 #STEP0: fetch Top 1 Hits
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
122 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
123 for i in `cat ${var}.top1.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
124 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
125 echo "fetching top hits..." $i;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
126 grep $i $file | head -1 >> ${var}.top1Hits.txt;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
127 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
128 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
129 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
130 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
131 #STEP2: summarise the GA blastN files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
132 #java -jar /mnt/c/Users/lelwala/HTS/BlastTools.jar -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
133 java -jar $3 -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
134 #filter virus/viroid/endo
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
135 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
136 #STEP3: fetch unique names from Blast summary reports
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
137 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
138 #STEP4: retrieve the best hit for each virus/viroid
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
139 echo "processing top hits ..."
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
140 touch ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
141 for id in `cat ${var}_uniq.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
142 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
143 #print on the screen the name of the virus/viroids to search
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
144 #echo "fetching species matches ..." $id
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
145 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
146 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
147 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
148 #print the header of the inital summary_blastn file
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
149 cat summary_${var}.txt | head -1 > header
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
150 #report 1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
151 cat header ${var}_filtered.txt > $2
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
152 #removing intermediate files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
153 rm summary_${file}.txt ${file}.txt ${file}.top1.ids ${file}_uniq.ids summary_${file}_filtered.txt header* ${var}_filtered.txt *top1Hits.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
154
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
155 ]]></help>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
156 <citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
157 <citation type="doi">10.1093/bioinformatics/bts573</citation>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
158 </citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
159 </tool>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
160