diff BlastParser_and_hits.py @ 6:78c34df2dd8d draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
author drosofff
date Tue, 02 Feb 2016 11:38:51 -0500
parents a0dec1a0f2ef
children 1991c830504a
line wrap: on
line diff
--- a/BlastParser_and_hits.py	Fri Jan 15 12:51:19 2016 -0500
+++ b/BlastParser_and_hits.py	Tue Feb 02 11:38:51 2016 -0500
@@ -21,6 +21,7 @@
     the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list")
     the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned")
     the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned")
+    the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output")
     args = the_parser.parse_args()
     if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):
         the_parser.error('argument(s) missing, call the -h option of the script')
@@ -129,7 +130,7 @@
         leftCoordinate = 1
     return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
     
-def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"):
+def outputParsing (dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"):
     def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""):
         for subject in results.keys():
             if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov:
@@ -160,9 +161,10 @@
             blasted_transcripts.append(transcript)
     blasted_transcripts = list( set( blasted_transcripts))
     if mode == "verbose":
-        print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n"
+        print >>F, "--- %s ---" % (dataset_name)
+        print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore"
         for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
-            print >> F, "#\n# %s" % subject
+            print >> F, " \n# %s" % subject
             print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])
             print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])
             print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])
@@ -181,6 +183,7 @@
                     info = "\t".join(info)
                     print >> F, info
     else:
+        print >>F, "--- %s ---" % (dataset_name)
         print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score"
         for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
             line = []
@@ -219,7 +222,7 @@
     results = defaultdict(dict)
     for subject in Xblastdict:
         results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"]  = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
-    blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,
+    blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,
                                         filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,
                                         filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in,
                                         filter_term_out=args.filter_term_out, mode=args.mode)