annotate blasttoolssearch/blasttoolssearch.xml @ 4:2051ee2bedc4 draft

Uploaded
author fubar
date Wed, 19 Jul 2023 06:45:26 +0000
parents a6593725b728
children 3a499f3ed69c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
1 <tool name="blasttoolssearch" id="blasttoolssearch" version="3.0">
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
2 <!--Source in git at: https://github.com/fubar2/galaxy-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
3 <!--Created by toolfactory@galaxy.org at 19/07/2023 12:39:19 using the Galaxy Tool Factory.-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
4 <description>Runs a legacy Java jar called blasttools from https://github.com/schmidda/blast-tools/tree/master</description>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
5 <requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
6 <requirement type="package">csvtk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
7 <requirement type="package">openjdk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
8 </requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
9 <stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
10 <exit_code range="1:" level="fatal"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
11 </stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
12 <version_command><![CDATA[echo "3.0"]]></version_command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
13 <command><![CDATA[bash
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
14 $runme
2
e213ae40f480 retry with echo -n so no newline
fubar
parents: 1
diff changeset
15 $blastn_search_outputs
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
16 $__tool_directory__/BlastTools.jar
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
17 $summary_viruses_viroids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
18 ]]></command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
19 <configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
20 <configfile name="runme"><![CDATA[#raw
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
21
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
22
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
23 ## eResearch Office, QUT
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
24 ## Created: 31 March 2021
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
25 ## Last modified: 28 September 2022
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
26 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
27 ## Usage: ./run_VirReport_Summary.sh
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
28 ## changed to accept a single input file name passed as $1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
29 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
30 ## July 18 2023
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
31
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
32 dataPath=${PWD}
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
33
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
34 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
35 # The script will Look for all files with the suffix *.tabular
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
36
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
37 #Processing tabular files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
38 file=$1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
39
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
40 var=$(basename $file)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
41
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
42 #STEP0: fetch Top 1 Hits
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
43 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
44 for i in `cat ${var}.top1.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
45 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
46 echo "fetching top hits..." $i;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
47 grep $i $file | head -1 >> ${var}.top1Hits.txt;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
48 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
49
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
50 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
51 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
52 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
53
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
54 #STEP2: summarise the GA blastN files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
55 java -jar $2 -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
56 #filter virus/viroid/endo
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
57 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
58
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
59 #STEP3: fetch unique names from Blast summary reports
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
60 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
61
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
62 #STEP4: retrieve the best hit for each virus/viroid
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
63 echo "processing top hits ..."
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
64 touch ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
65 for id in `cat ${var}_uniq.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
66 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
67 #print on the screen the name of the virus/viroids to search
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
68 #echo "fetching species matches ..." $id
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
69
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
70 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
71 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
72 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
73
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
74 #print the header of the inital summary_blastn file
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
75 cat summary_${var}.txt | head -1 > header
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
76 #report 1
3
a6593725b728 Uploaded
fubar
parents: 2
diff changeset
77 cat header ${var}_filtered.txt > $3
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
78
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
79 #end raw]]></configfile>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
80 </configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
81 <inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
82 <param name="blastn_search_outputs" type="data" optional="false" label="blastn_search_outputs" help="" format="tabular" multiple="false"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
83 </inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
84 <outputs>
1
5687b8f1ad69 fix datatype for summary
fubar
parents: 0
diff changeset
85 <data name="summary_viruses_viroids" format="tabular" label="summary_viruses_viroids" hidden="false"/>
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
86 </outputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
87 <tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
88 <test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
89 <output name="summary_viruses_viroids" value="summary_viruses_viroids_sample" compare="diff" lines_diff="0"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
90 <param name="blastn_search_outputs" value="blastn_search_outputs_sample"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
91 </test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
92 </tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
93 <help><![CDATA[
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
94
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
95 **What it Does**
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
96
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
97 Wraps https://github.com/schmidda/blast-tools/tree/master as a Galaxy tool as a demonstration for Roberto Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
98
2
e213ae40f480 retry with echo -n so no newline
fubar
parents: 1
diff changeset
99
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
100
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
101 ]]></help>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
102 <citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
103 <citation type="doi">10.1093/bioinformatics/bts573</citation>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
104 </citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
105 </tool>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
106