Mercurial > repos > drosofff > msp_blastparser_and_hits
diff BlastParser_and_hits.py @ 6:78c34df2dd8d draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
author | drosofff |
---|---|
date | Tue, 02 Feb 2016 11:38:51 -0500 |
parents | a0dec1a0f2ef |
children | 1991c830504a |
line wrap: on
line diff
--- a/BlastParser_and_hits.py Fri Jan 15 12:51:19 2016 -0500 +++ b/BlastParser_and_hits.py Tue Feb 02 11:38:51 2016 -0500 @@ -21,6 +21,7 @@ the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") + the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output") args = the_parser.parse_args() if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): the_parser.error('argument(s) missing, call the -h option of the script') @@ -129,7 +130,7 @@ leftCoordinate = 1 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) -def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): +def outputParsing (dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): for subject in results.keys(): if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: @@ -160,9 +161,10 @@ blasted_transcripts.append(transcript) blasted_transcripts = list( set( blasted_transcripts)) if mode == "verbose": - print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" + print >>F, "--- %s ---" % (dataset_name) + print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): - print >> F, "#\n# %s" % subject + print >> F, " \n# %s" % subject print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) @@ -181,6 +183,7 @@ info = "\t".join(info) print >> F, info else: + print >>F, "--- %s ---" % (dataset_name) print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): line = [] @@ -219,7 +222,7 @@ results = defaultdict(dict) for subject in Xblastdict: results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) - blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, + blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, filter_term_out=args.filter_term_out, mode=args.mode)