Mercurial > repos > drosofff > msp_blastparser_and_hits
diff BlastParser_and_hits.py @ 2:bb0d4cd765c5 draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
author | drosofff |
---|---|
date | Tue, 29 Sep 2015 06:32:31 -0400 |
parents | 1964514aabde |
children | 8f5d48294f70 |
line wrap: on
line diff
--- a/BlastParser_and_hits.py Mon Sep 14 12:18:46 2015 -0400 +++ b/BlastParser_and_hits.py Tue Sep 29 06:32:31 2015 -0400 @@ -130,6 +130,13 @@ def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): F= open(F, "w") Fasta=open(Fasta, "w") + blasted_transcripts = [] + for subject in results: + if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: + continue + for transcript in Xblastdict[subject]: + blasted_transcripts.append(transcript) + blasted_transcripts = list( set( blasted_transcripts)) if mode == "verbose": print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): @@ -172,18 +179,14 @@ print >> Fasta, "" # final carriage return for the sequence F.close() Fasta.close() + return blasted_transcripts -def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): +def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences): '''to output the sequences that matched and did not matched in the blast''' - blasted_transcripts = [] - for subject in blastdict: - for transcript in blastdict[subject]: - blasted_transcripts.append(transcript) - blasted_transcripts = list( set( blasted_transcripts)) F_matched = open (matched_sequences, "w") F_unmatched = open (unmatched_sequences, "w") for transcript in fastadict: - if transcript in blasted_transcripts: + if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) else: print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) @@ -195,11 +198,12 @@ args = Parser() fastadict = getfasta (args.sequences) Xblastdict = getblast (args.blast) - sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences) results = defaultdict(dict) for subject in Xblastdict: results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) - outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, - filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, - filter_meanScore=args.filter_meanScore, mode=args.mode) + blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, + filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, + filter_meanScore=args.filter_meanScore, mode=args.mode) + dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) + if __name__=="__main__": __main__()