annotate blasttoolssearch/blasttoolssearch.xml @ 0:ee581a90a85e draft

Uploaded initial version
author fubar
date Wed, 19 Jul 2023 04:34:01 +0000
parents
children 5687b8f1ad69
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
1 <tool name="blasttoolssearch" id="blasttoolssearch" version="3.0">
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
2 <!--Source in git at: https://github.com/fubar2/galaxy-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
3 <!--Created by toolfactory@galaxy.org at 19/07/2023 12:39:19 using the Galaxy Tool Factory.-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
4 <description>Runs a legacy Java jar called blasttools from https://github.com/schmidda/blast-tools/tree/master</description>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
5 <requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
6 <requirement type="package">csvtk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
7 <requirement type="package">openjdk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
8 </requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
9 <stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
10 <exit_code range="1:" level="fatal"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
11 </stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
12 <version_command><![CDATA[echo "3.0"]]></version_command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
13 <command><![CDATA[bash
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
14 $runme
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
15 $blastn_search_outputs
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
16 $__tool_directory__/BlastTools.jar
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
17 $summary_viruses_viroids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
18 ]]></command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
19 <configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
20 <configfile name="runme"><![CDATA[#raw
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
21
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
22
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
23 ## eResearch Office, QUT
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
24 ## Created: 31 March 2021
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
25 ## Last modified: 28 September 2022
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
26 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
27 ## Usage: ./run_VirReport_Summary.sh
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
28 ## changed to accept a single input file name passed as $1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
29 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
30 ## July 18 2023
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
31
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
32 dataPath=${PWD}
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
33
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
34 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
35 # The script will Look for all files with the suffix *.tabular
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
36
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
37 #Processing tabular files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
38 file=$1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
39
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
40 var=$(basename $file)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
41
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
42 #STEP0: fetch Top 1 Hits
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
43 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
44 for i in `cat ${var}.top1.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
45 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
46 echo "fetching top hits..." $i;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
47 grep $i $file | head -1 >> ${var}.top1Hits.txt;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
48 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
49
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
50 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
51 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
52 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
53
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
54 #STEP2: summarise the GA blastN files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
55 java -jar $2 -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
56 #filter virus/viroid/endo
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
57 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
58
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
59 #STEP3: fetch unique names from Blast summary reports
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
60 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
61
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
62 #STEP4: retrieve the best hit for each virus/viroid
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
63 echo "processing top hits ..."
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
64 touch ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
65 for id in `cat ${var}_uniq.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
66 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
67 #print on the screen the name of the virus/viroids to search
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
68 #echo "fetching species matches ..." $id
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
69
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
70 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
71 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
72 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
73
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
74 #print the header of the inital summary_blastn file
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
75 cat summary_${var}.txt | head -1 > header
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
76 #report 1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
77 cat header ${var}_filtered.txt > $3
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
78
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
79 #end raw]]></configfile>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
80 </configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
81 <inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
82 <param name="blastn_search_outputs" type="data" optional="false" label="blastn_search_outputs" help="" format="tabular" multiple="false"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
83 </inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
84 <outputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
85 <data name="summary_viruses_viroids" format="txt" label="summary_viruses_viroids" hidden="false"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
86 </outputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
87 <tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
88 <test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
89 <output name="summary_viruses_viroids" value="summary_viruses_viroids_sample" compare="diff" lines_diff="0"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
90 <param name="blastn_search_outputs" value="blastn_search_outputs_sample"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
91 </test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
92 </tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
93 <help><![CDATA[
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
94
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
95 **What it Does**
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
96
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
97 Wraps https://github.com/schmidda/blast-tools/tree/master as a Galaxy tool as a demonstration for Roberto Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
98
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
99
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
100
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
101 ------
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
102
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
103
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
104 Script::
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
105
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
106 ## eResearch Office, QUT
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
107 ## Created: 31 March 2021
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
108 ## Last modified: 28 September 2022
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
109 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
110 ## Usage: ./run_VirReport_Summary.sh
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
111 ## changed to accept a single input file name passed as $1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
112 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
113 ## July 18 2023
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
114 dataPath=${PWD}
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
115 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
116 # The script will Look for all files with the suffix *.tabular
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
117 #Processing tabular files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
118 file=$1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
119 var=$(basename $file)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
120 #STEP0: fetch Top 1 Hits
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
121 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
122 for i in `cat ${var}.top1.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
123 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
124 echo "fetching top hits..." $i;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
125 grep $i $file | head -1 >> ${var}.top1Hits.txt;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
126 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
127 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
128 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
129 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
130 #STEP2: summarise the GA blastN files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
131 #java -jar /mnt/c/Users/lelwala/HTS/BlastTools.jar -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
132 java -jar $3 -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
133 #filter virus/viroid/endo
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
134 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
135 #STEP3: fetch unique names from Blast summary reports
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
136 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
137 #STEP4: retrieve the best hit for each virus/viroid
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
138 echo "processing top hits ..."
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
139 touch ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
140 for id in `cat ${var}_uniq.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
141 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
142 #print on the screen the name of the virus/viroids to search
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
143 #echo "fetching species matches ..." $id
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
144 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
145 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
146 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
147 #print the header of the inital summary_blastn file
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
148 cat summary_${var}.txt | head -1 > header
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
149 #report 1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
150 cat header ${var}_filtered.txt > $2
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
151 #removing intermediate files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
152 rm summary_${file}.txt ${file}.txt ${file}.top1.ids ${file}_uniq.ids summary_${file}_filtered.txt header* ${var}_filtered.txt *top1Hits.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
153
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
154 ]]></help>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
155 <citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
156 <citation type="doi">10.1093/bioinformatics/bts573</citation>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
157 </citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
158 </tool>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
159