annotate blasttoolssearch/blasttoolssearch.xml @ 6:c23d0b047de9 draft

Uploaded
author fubar
date Wed, 19 Jul 2023 23:39:38 +0000
parents 3a499f3ed69c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
1 <tool name="blasttoolssearch" id="blasttoolssearch" version="3.1">
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
2 <!--Source in git at: https://github.com/fubar2/galaxy-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
3 <!--Created by toolfactory@galaxy.org at 19/07/2023 12:39:19 using the Galaxy Tool Factory.-->
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
4 <description>Runs a legacy Java jar called blasttools from https://github.com/schmidda/blast-tools/tree/master</description>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
5 <requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
6 <requirement type="package">csvtk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
7 <requirement type="package">openjdk</requirement>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
8 </requirements>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
9 <stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
10 <exit_code range="1:" level="fatal"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
11 </stdio>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
12 <version_command><![CDATA[echo "3.0"]]></version_command>
5
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
13 <command><![CDATA[
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
14 mkdir -p files &&
6
c23d0b047de9 Uploaded
fubar
parents: 5
diff changeset
15 ln -sf '$blastn_search_outputs' files/\$(basename '$blastn_search_outputs') &&
5
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
16 bash $runme ./files $__tool_directory__/BlastTools.jar $summary_viruses_viroids
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
17 ]]></command>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
18 <configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
19 <configfile name="runme"><![CDATA[#raw
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
20
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
21
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
22 ## eResearch Office, QUT
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
23 ## Created: 31 March 2021
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
24 ## Last modified: 28 September 2022
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
25 ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
26 ## Usage: ./run_VirReport_Summary.sh
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
27 ## changed to accept a single input file name passed as $1
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
28 ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
29 ## July 18 2023
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
30
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
31 dataPath=${PWD}
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
32
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
33 # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
34 # The script will Look for all files with the suffix *.tabular
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
35
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
36 #Processing tabular files
5
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
37 for file in files/*.*
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
38 do
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
39 var=$(basename $file)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
40
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
41 #STEP0: fetch Top 1 Hits
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
42 cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
43 for i in `cat ${var}.top1.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
44 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
45 echo "fetching top hits..." $i;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
46 grep $i $file | head -1 >> ${var}.top1Hits.txt;
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
47 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
48
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
49 #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
50 ###### namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
51 cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
52
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
53 #STEP2: summarise the GA blastN files
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
54 java -jar $2 -t blastn ${var}.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
55 #filter virus/viroid/endo
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
56 cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
57
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
58 #STEP3: fetch unique names from Blast summary reports
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
59 cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
60
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
61 #STEP4: retrieve the best hit for each virus/viroid
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
62 echo "processing top hits ..."
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
63 touch ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
64 for id in `cat ${var}_uniq.ids`
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
65 do
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
66 #print on the screen the name of the virus/viroids to search
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
67 #echo "fetching species matches ..." $id
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
68
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
69 #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
70 grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
71 done
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
72
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
73 #print the header of the inital summary_blastn file
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
74 cat summary_${var}.txt | head -1 > header
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
75 #report 1
3
a6593725b728 Uploaded
fubar
parents: 2
diff changeset
76 cat header ${var}_filtered.txt > $3
5
3a499f3ed69c Uploaded
fubar
parents: 4
diff changeset
77 done
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
78 #end raw]]></configfile>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
79 </configfiles>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
80 <inputs>
6
c23d0b047de9 Uploaded
fubar
parents: 5
diff changeset
81 <param name="blastn_search_outputs" type="data" optional="false" label="blastn_search_outputs" help="" format="tabular" multiple="false"/>
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
82 </inputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
83 <outputs>
1
5687b8f1ad69 fix datatype for summary
fubar
parents: 0
diff changeset
84 <data name="summary_viruses_viroids" format="tabular" label="summary_viruses_viroids" hidden="false"/>
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
85 </outputs>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
86 <tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
87 <test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
88 <output name="summary_viruses_viroids" value="summary_viruses_viroids_sample" compare="diff" lines_diff="0"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
89 <param name="blastn_search_outputs" value="blastn_search_outputs_sample"/>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
90 </test>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
91 </tests>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
92 <help><![CDATA[
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
93
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
94 **What it Does**
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
95
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
96 Wraps https://github.com/schmidda/blast-tools/tree/master as a Galaxy tool as a demonstration for Roberto Barrero
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
97
2
e213ae40f480 retry with echo -n so no newline
fubar
parents: 1
diff changeset
98
0
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
99
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
100 ]]></help>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
101 <citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
102 <citation type="doi">10.1093/bioinformatics/bts573</citation>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
103 </citations>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
104 </tool>
ee581a90a85e Uploaded initial version
fubar
parents:
diff changeset
105