Mercurial > repos > drosofff > msp_blastparser_and_hits
changeset 4:60b6bd959929 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit e842488e979d8a00b9646061573355cb427bc89c
author | drosofff |
---|---|
date | Fri, 15 Jan 2016 12:29:30 -0500 |
parents | 8f5d48294f70 |
children | a0dec1a0f2ef |
files | BlastParser_and_hits.py BlastParser_and_hits.xml |
diffstat | 2 files changed, 35 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/BlastParser_and_hits.py Mon Oct 19 12:13:12 2015 -0400 +++ b/BlastParser_and_hits.py Fri Jan 15 12:29:30 2016 -0500 @@ -1,5 +1,5 @@ #!/usr/bin/python -# blastn blastx parser revised debugged: 3-4-2015. Commit issue. +# blastn tblastn blastx parser revised 14-1-2016. # drosofff@gmail.com import sys @@ -17,6 +17,8 @@ the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out best BitScores below the specified float number") the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") + the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") + the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") args = the_parser.parse_args() @@ -127,21 +129,40 @@ leftCoordinate = 1 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) -def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): +def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): + def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): + print "###", filter_term_in + for subject in results.keys(): + if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: + del results[subject] + continue + if results[subject]["maxBitScores"]<filter_maxScore: + del results[subject] + continue + if results[subject]["meanBitScores"]<filter_meanScore: + del results[subject] + continue + if filter_term_in in subject: + pass + else: + del results[subject] + continue + if filter_term_out and filter_term_out in subject: + del results[subject] + continue + return results + F= open(F, "w") Fasta=open(Fasta, "w") blasted_transcripts = [] + filter_results (results, filter_relativeCov, filter_maxScore, filter_meanScore, filter_term_in, filter_term_out) for subject in results: - if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: - continue for transcript in Xblastdict[subject]: blasted_transcripts.append(transcript) blasted_transcripts = list( set( blasted_transcripts)) if mode == "verbose": print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): - if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: - continue print >> F, "#\n# %s" % subject print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) @@ -163,8 +184,6 @@ else: print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): - if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: - continue line = [] line.append(subject) line.append(results[subject]["subjectLength"]) @@ -203,7 +222,8 @@ results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, - filter_meanScore=args.filter_meanScore, mode=args.mode) + filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, + filter_term_out=args.filter_term_out, mode=args.mode) dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) -if __name__=="__main__": __main__() +if __name__=="__main__": __main__() \ No newline at end of file
--- a/BlastParser_and_hits.xml Mon Oct 19 12:13:12 2015 -0400 +++ b/BlastParser_and_hits.xml Fri Jan 15 12:29:30 2016 -0500 @@ -1,4 +1,4 @@ -<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.3.2"> +<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.3.3"> <description>for virus discovery</description> <requirements></requirements> <command interpreter="python"> @@ -14,6 +14,8 @@ --filter_relativeCov $additional_filters.filter_relativeCov --filter_maxScore $additional_filters.filter_maxScore --filter_meanScore $additional_filters.filter_meanScore + --filter_term_in "$additional_filters.filter_term_in" + --filter_term_out "$additional_filters.filter_term_out" #end if --al_sequences $al_sequences --un_sequences $un_sequences @@ -38,6 +40,8 @@ <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/> <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/> <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/> + <param name="filter_term_in" type="text" value="" label="filter the subject list with a keyword" help=""/> + <param name="filter_term_out" type="text" value="" label="filter the subject list excluding a keyword" help=""/> </when> </conditional> </inputs>