Mercurial > repos > fubar > blasttools_search_test

diff blasttoolssearch/blasttoolssearch.xml @ 4:2051ee2bedc4 draft
Uploaded
author: fubar
date: Wed, 19 Jul 2023 06:45:26 +0000
parents: a6593725b728
children: 3a499f3ed69c
--- a/blasttoolssearch/blasttoolssearch.xml	Wed Jul 19 06:06:07 2023 +0000
+++ b/blasttoolssearch/blasttoolssearch.xml	Wed Jul 19 06:45:26 2023 +0000
@@ -98,59 +98,6 @@
 
 
 
-------
-
-
-Script::
-
-    ## eResearch Office, QUT
-    ## Created:  31 March 2021
-    ## Last modified: 28 September 2022
-    ## Script: Processes Galaxy Australia generated blastN outputs to summarise and report hits to REGULATED and ENDEMIC viruses/viroids.
-    ## Usage: ./run_VirReport_Summary.sh
-    ## changed to accept a single input file name passed as $1
-    ## Ross Lazarus for a ToolFactory wrapper for Robert Barrero
-    ## July 18 2023
-    dataPath=${PWD}
-    # Requirement: One or more GA-VSD .tabular outputs need to be in the folder where the command above (Usage)is executed.
-    # The script will Look for all files with the suffix *.tabular
-    #Processing tabular files
-    file=$1
-        var=$(basename $file)
-        #STEP0: fetch Top 1 Hits
-        cat $file | awk '{print $1}' | sort | uniq > ${var}.top1.ids
-        for i in `cat ${var}.top1.ids`
-          do
-            echo "fetching top hits..." $i;
-            grep $i $file | head -1 >> ${var}.top1Hits.txt;
-          done
-        #STEP1: modify the columns of Galaxy Australia (GA) blast output to the expected format by the BlastTools.jar tool
-        ######  namely: qseqid sgi sacc length pident mismatch gapopen qstart qend qlen sstart send slen sstrand evalue bitscore qcovhsp stitle staxids qseq sseq sseqid qcovs qframe sframe
-        cat ${var}.top1Hits.txt |csvtk cut -H -t -f 1,19,20,4,3,5,6,7,8,17,9,10,18,22,11,12,24,21,25,15,16,2,23,13,14 | sed 's/ /_/g' > ${var}.txt
-        #STEP2: summarise the GA blastN files
-        #java -jar /mnt/c/Users/lelwala/HTS/BlastTools.jar -t blastn ${var}.txt
-        java -jar $3 -t blastn ${var}.txt
-        #filter virus/viroid/endo
-        cat summary_${var}.txt | grep "virus\|viroid\|endo" > summary_${var}_filtered.txt
-        #STEP3: fetch unique names from Blast summary reports
-        cat summary_${var}_filtered.txt | awk '{print $7}' | awk -F "|" '{print $2}'| sort | uniq | sed 's/Species://' > ${var}_uniq.ids
-        #STEP4: retrieve the best hit for each virus/viroid
-        echo "processing top hits ..."
-        touch ${var}_filtered.txt
-        for id in `cat ${var}_uniq.ids`
-          do
-            #print on the screen the name of the virus/viroids to search
-            #echo "fetching species matches ..." $id
-            #fetch the virus name on the summary_blastn file by selecting the longest alignment (column 3) and highest genome coverage (column 5)
-            grep $id summary_${var}.txt | sort -k3,3nr -k5,5nr | head -1 >> ${var}_filtered.txt
-          done
-        #print the header of the inital summary_blastn file
-        cat summary_${var}.txt | head -1 > header
-        #report 1
-        cat header ${var}_filtered.txt > $2
-        #removing intermediate files
-        rm summary_${file}.txt ${file}.txt ${file}.top1.ids ${file}_uniq.ids summary_${file}_filtered.txt header* ${var}_filtered.txt *top1Hits.txt
-
 ]]></help>
   <citations>
     <citation type="doi">10.1093/bioinformatics/bts573</citation>
author	fubar
date	Wed, 19 Jul 2023 06:45:26 +0000
parents	a6593725b728
children	3a499f3ed69c