# HG changeset patch # User drosofff # Date 1443522751 14400 # Node ID bb0d4cd765c533880dfc15fa1481543445817773 # Parent 1964514aabdeb365d94d80038abdfb2c80a32124 planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82 diff -r 1964514aabde -r bb0d4cd765c5 BlastParser_and_hits.py --- a/BlastParser_and_hits.py Mon Sep 14 12:18:46 2015 -0400 +++ b/BlastParser_and_hits.py Tue Sep 29 06:32:31 2015 -0400 @@ -130,6 +130,13 @@ def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): F= open(F, "w") Fasta=open(Fasta, "w") + blasted_transcripts = [] + for subject in results: + if results[subject]["RelativeSubjectCoverage"]>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): @@ -172,18 +179,14 @@ print >> Fasta, "" # final carriage return for the sequence F.close() Fasta.close() + return blasted_transcripts -def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): +def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences): '''to output the sequences that matched and did not matched in the blast''' - blasted_transcripts = [] - for subject in blastdict: - for transcript in blastdict[subject]: - blasted_transcripts.append(transcript) - blasted_transcripts = list( set( blasted_transcripts)) F_matched = open (matched_sequences, "w") F_unmatched = open (unmatched_sequences, "w") for transcript in fastadict: - if transcript in blasted_transcripts: + if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) else: print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) @@ -195,11 +198,12 @@ args = Parser() fastadict = getfasta (args.sequences) Xblastdict = getblast (args.blast) - sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences) results = defaultdict(dict) for subject in Xblastdict: results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) - outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, - filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, - filter_meanScore=args.filter_meanScore, mode=args.mode) + blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, + filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, + filter_meanScore=args.filter_meanScore, mode=args.mode) + dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) + if __name__=="__main__": __main__() diff -r 1964514aabde -r bb0d4cd765c5 BlastParser_and_hits.xml --- a/BlastParser_and_hits.xml Mon Sep 14 12:18:46 2015 -0400 +++ b/BlastParser_and_hits.xml Tue Sep 29 06:32:31 2015 -0400 @@ -1,4 +1,4 @@ - + for virus discovery @@ -66,7 +66,7 @@ **What it does** -Parse blast outputs for viruses genome assembly. Outputs analysis and hit sequences for further assembly. Output also the contig sequences which have or not been blast aligned with the indicated cut-off +Parse blast outputs for viruses genome assembly. Outputs analysis and hit sequences for further assembly. Outputs also the contig sequences which have or not been blast aligned taking into account all indicated cut-offs