Mercurial > repos > fubar > blasttools_search_test
comparison blasttoolssearch/blasttoolssearch.xml @ 0:ee581a90a85e draft
Uploaded initial version
author | fubar |
---|---|
date | Wed, 19 Jul 2023 04:34:01 +0000 |
parents | |
children | 5687b8f1ad69 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ee581a90a85e |
---|---|
1 <tool name="blasttoolssearch" id="blasttoolssearch" version="3.0"> | |
2 <!--Source in git at: https://github.com/fubar2/galaxy--> | |
3 <!--Created by toolfactory@galaxy.org at 19/07/2023 12:39:19 using the Galaxy Tool Factory.--> | |
4 <description>Runs a legacy Java jar called blasttools from https://github.com/schmidda/blast-tools/tree/master</description> | |
5 <requirements> | |
6 <requirement type="package">csvtk</requirement> | |
7 <requirement type="package">openjdk</requirement> | |
8 </requirements> | |
9 <stdio> | |
10 <exit_code range="1:" level="fatal"/> | |
11 </stdio> | |
12 <version_command><![CDATA[echo "3.0"]]></version_command> | |
13 <command><![CDATA[bash | |
14 $runme | |
15 $blastn_search_outputs | |
16 $__tool_directory__/BlastTools.jar | |
17 $summary_viruses_viroids | |
18 ]]></command> | |
19 <configfiles> | |
20 <configfile name="runme"><![CDATA[#raw | |
21 | |
22 | |
23 ## eResearch Office, QUT | |
24 ## Created: 31 March 2021 | |
25 ## Last modified: 28 September 2022 | |
26 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids. | |
27 ## Usage: ./run_VirReport_Summary.sh | |
28 ## changed to accept a single input file name passed as $1 | |
29 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero | |
30 ## July 18 2023 | |
31 | |
32 dataPath=${PWD} | |
33 | |
34 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed. | |
35 # The script will Look for all files with the suffix *.tabular | |
36 | |
37 #Processing tabular files | |
38 file=$1 | |
39 | |
40 var=$(basename $file) | |
41 | |
42 #STEP0: fetch Top 1 Hits | |
43 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids | |
44 for i in `cat ${var}.top1.ids` | |
45 do | |
46 echo "fetching top hits..." $i; | |
47 grep $i $file | head -1 >> ${var}.top1Hits.txt; | |
48 done | |
49 | |
50 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool | |
51 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe | |
52 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt | |
53 | |
54 #STEP2: summarise the GA blastN files | |
55 java -jar $2 -t blastn ${var}.txt | |
56 #filter virus/viroid/endo | |
57 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt | |
58 | |
59 #STEP3: fetch unique names from Blast summary reports | |
60 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids | |
61 | |
62 #STEP4: retrieve the best hit for each virus/viroid | |
63 echo "processing top hits ..." | |
64 touch ${var}_filtered.txt | |
65 for id in `cat ${var}_uniq.ids` | |
66 do | |
67 #print on the screen the name of the virus/viroids to search | |
68 #echo "fetching species matches ..." $id | |
69 | |
70 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5) | |
71 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt | |
72 done | |
73 | |
74 #print the header of the inital summary_blastn file | |
75 cat summary_${var}.txt | head -1 > header | |
76 #report 1 | |
77 cat header ${var}_filtered.txt > $3 | |
78 | |
79 #end raw]]></configfile> | |
80 </configfiles> | |
81 <inputs> | |
82 <param name="blastn_search_outputs" type="data" optional="false" label="blastn_search_outputs" help="" format="tabular" multiple="false"/> | |
83 </inputs> | |
84 <outputs> | |
85 <data name="summary_viruses_viroids" format="txt" label="summary_viruses_viroids" hidden="false"/> | |
86 </outputs> | |
87 <tests> | |
88 <test> | |
89 <output name="summary_viruses_viroids" value="summary_viruses_viroids_sample" compare="diff" lines_diff="0"/> | |
90 <param name="blastn_search_outputs" value="blastn_search_outputs_sample"/> | |
91 </test> | |
92 </tests> | |
93 <help><![CDATA[ | |
94 | |
95 **What it Does** | |
96 | |
97 Wraps https://github.com/schmidda/blast-tools/tree/master as a Galaxy tool as a demonstration for Roberto Barrero | |
98 | |
99 | |
100 | |
101 ------ | |
102 | |
103 | |
104 Script:: | |
105 | |
106 ## eResearch Office, QUT | |
107 ## Created: 31 March 2021 | |
108 ## Last modified: 28 September 2022 | |
109 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids. | |
110 ## Usage: ./run_VirReport_Summary.sh | |
111 ## changed to accept a single input file name passed as $1 | |
112 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero | |
113 ## July 18 2023 | |
114 dataPath=${PWD} | |
115 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed. | |
116 # The script will Look for all files with the suffix *.tabular | |
117 #Processing tabular files | |
118 file=$1 | |
119 var=$(basename $file) | |
120 #STEP0: fetch Top 1 Hits | |
121 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids | |
122 for i in `cat ${var}.top1.ids` | |
123 do | |
124 echo "fetching top hits..." $i; | |
125 grep $i $file | head -1 >> ${var}.top1Hits.txt; | |
126 done | |
127 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool | |
128 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe | |
129 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt | |
130 #STEP2: summarise the GA blastN files | |
131 #java -jar /mnt/c/Users/lelwala/HTS/BlastTools.jar -t blastn ${var}.txt | |
132 java -jar $3 -t blastn ${var}.txt | |
133 #filter virus/viroid/endo | |
134 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt | |
135 #STEP3: fetch unique names from Blast summary reports | |
136 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids | |
137 #STEP4: retrieve the best hit for each virus/viroid | |
138 echo "processing top hits ..." | |
139 touch ${var}_filtered.txt | |
140 for id in `cat ${var}_uniq.ids` | |
141 do | |
142 #print on the screen the name of the virus/viroids to search | |
143 #echo "fetching species matches ..." $id | |
144 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5) | |
145 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt | |
146 done | |
147 #print the header of the inital summary_blastn file | |
148 cat summary_${var}.txt | head -1 > header | |
149 #report 1 | |
150 cat header ${var}_filtered.txt > $2 | |
151 #removing intermediate files | |
152 rm summary_${file}.txt ${file}.txt ${file}.top1.ids ${file}_uniq.ids summary_${file}_filtered.txt header* ${var}_filtered.txt *top1Hits.txt | |
153 | |
154 ]]></help> | |
155 <citations> | |
156 <citation type="doi">10.1093/bioinformatics/bts573</citation> | |
157 </citations> | |
158 </tool> | |
159 |