diff blast_tools_search/blasttoolssearch.xml @ 8:186734f1d63c draft default tip

Replace plotly_blast_tool content with blasttools_search. :(
author fubar
date Fri, 04 Aug 2023 01:57:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast_tools_search/blasttoolssearch.xml	Fri Aug 04 01:57:51 2023 +0000
@@ -0,0 +1,165 @@
+<tool name="blasttoolssearch" id="blasttoolssearch" version="3.0">
+  <!--Source in git at: https://github.com/fubar2/galaxy_tf_overlay-->
+  <!--Created by toolfactory@galaxy.org at 04/08/2023 10:36:33 using the Galaxy Tool Factory.-->
+  <description>Runs a legacy Java jar called blasttools from https://github.com/schmidda/blast-tools/tree/master</description>
+  <requirements>
+    <requirement version="0.26.0" type="package">csvtk</requirement>
+    <requirement version="11.0.13" type="package">openjdk</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" level="fatal"/>
+  </stdio>
+  <version_command><![CDATA[echo "3.0"]]></version_command>
+  <command><![CDATA[bash $runme "$blastn_search_outputs" "$__tool_directory__/BlastTools.jar" "$summary_viruses_viroids" "$all_blasttools_output"]]></command>
+  <configfiles>
+    <configfile name="runme"><![CDATA[#raw
+
+## eResearch Office, QUT
+## Created:  31 March 2021
+## Last modified: 28 September 2022
+## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
+## Usage: ./run_VirReport_Summary.sh
+## changed to accept a single input file name passed as $1
+## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
+## July 18 2023
+
+
+# Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
+# The script will Look for all files with the suffix *.tabular
+
+#Processing tabular files
+file=$1
+    var=$(basename $file)
+
+    #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
+    ######  namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
+    cat $file |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}_all.txt
+    java -jar $2 -t blastn ${var}_all.txt
+    cat summary_${var}_all.txt | grep "virus\|viroid\|endo" > $4
+
+
+    #STEP0: fetch Top 1 Hits
+    cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
+    for i in `cat ${var}.top1.ids`
+      do
+        echo "fetching top hits..." $i 1>&2 ;
+        grep $i $file | head -1 >> ${var}.top1Hits.txt;
+      done
+
+    #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
+    ######  namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
+    cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
+
+    #STEP2: summarise the GA blastN files
+    java -jar $2 -t blastn ${var}.txt
+    #filter virus/viroid/endo
+    cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
+
+    #STEP3: fetch unique names from Blast summary reports
+    cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
+
+    #STEP4: retrieve the best hit for each virus/viroid
+    echo "processing top hits ..." 1>&2
+    touch ${var}_filtered.txt
+    for id in `cat ${var}_uniq.ids`
+      do
+        #print on the screen the name of the virus/viroids to search
+        #echo "fetching species matches ..." $id  1>&2
+
+        #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
+        grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
+      done
+
+    #print the header of the inital summary_blastn file
+    cat summary_${var}.txt | head -1 > header
+    #report 1
+    cat header ${var}_filtered.txt > $3
+
+
+
+
+
+#end raw]]></configfile>
+  </configfiles>
+  <inputs>
+    <param name="blastn_search_outputs" type="data" optional="false" label="blastn_search_outputs" help="Nucleotide blast search output from a Galaxy blast search" format="tabular" multiple="false"/>
+  </inputs>
+  <outputs>
+    <data name="summary_viruses_viroids" format="tabular" label="summary_viruses_viroids" hidden="false"/>
+    <data name="all_blasttools_output" format="tabular" label="all_blasttools_output" hidden="false"/>
+  </outputs>
+  <tests>
+    <test>
+      <output name="summary_viruses_viroids" value="summary_viruses_viroids_sample" compare="diff" lines_diff="0"/>
+      <output name="all_blasttools_output" value="all_blasttools_output_sample" compare="diff" lines_diff="0"/>
+      <param name="blastn_search_outputs" value="blastn_search_outputs_sample"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+
+**What it Does**
+
+Wraps https://github.com/schmidda/blast-tools/tree/master as a Galaxy tool as a demonstration for Roberto Barrero
+
+ 
+
+------
+
+
+Script::
+
+    ## eResearch Office, QUT
+    ## Created:  31 March 2021
+    ## Last modified: 28 September 2022
+    ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
+    ## Usage: ./run_VirReport_Summary.sh
+    ## changed to accept a single input file name passed as $1
+    ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
+    ## July 18 2023
+    # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
+    # The script will Look for all files with the suffix *.tabular
+    #Processing tabular files
+    file=$1
+        var=$(basename $file)
+        #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
+        ######  namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
+        cat $file |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > $ {var}_all.txt
+        java -jar $2 -t blastn $ {var}_all.txt
+        cat summary_$ {var}_all.txt | grep "virus\|viroid\|endo" > $4
+        #STEP0: fetch Top 1 Hits
+        cat $file | awk '{print $1}' | sort | uniq > $ {var}.top1.ids
+        for i in `cat $ {var}.top1.ids`
+          do
+            echo "fetching top hits..." $i 1>&2 ;
+            grep $i $file | head -1 >> $ {var}.top1Hits.txt;
+          done
+        #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
+        ######  namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
+        cat $ {var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > $ {var}.txt
+        #STEP2: summarise the GA blastN files
+        java -jar $2 -t blastn $ {var}.txt
+        #filter virus/viroid/endo
+        cat summary_$ {var}.txt | grep "virus\|viroid\|endo" > summary_$ {var}_filtered.txt
+        #STEP3: fetch unique names from Blast summary reports
+        cat summary_$ {var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > $ {var}_uniq.ids
+        #STEP4: retrieve the best hit for each virus/viroid
+        echo "processing top hits ..." 1>&2
+        touch $ {var}_filtered.txt
+        for id in `cat $ {var}_uniq.ids`
+          do
+            #print on the screen the name of the virus/viroids to search
+            #echo "fetching species matches ..." $id  1>&2
+            #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
+            grep $id summary_$ {var}.txt | sort -k3,3nr -k5,5nr | head -1 >> $ {var}_filtered.txt
+          done
+        #print the header of the inital summary_blastn file
+        cat summary_$ {var}.txt | head -1 > header
+        #report 1
+        cat header $ {var}_filtered.txt > $3
+
+]]></help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/bts573</citation>
+  </citations>
+</tool>
+