comparison BlastParser_and_hits.py @ 2:bb0d4cd765c5 draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 6dee2ab33610e7724e9423cc09818bcbbf11ea82
author drosofff
date Tue, 29 Sep 2015 06:32:31 -0400
parents 1964514aabde
children 8f5d48294f70
comparison
equal deleted inserted replaced
1:1964514aabde 2:bb0d4cd765c5
128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) 128 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
129 129
130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): 130 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"):
131 F= open(F, "w") 131 F= open(F, "w")
132 Fasta=open(Fasta, "w") 132 Fasta=open(Fasta, "w")
133 blasted_transcripts = []
134 for subject in results:
135 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
136 continue
137 for transcript in Xblastdict[subject]:
138 blasted_transcripts.append(transcript)
139 blasted_transcripts = list( set( blasted_transcripts))
133 if mode == "verbose": 140 if mode == "verbose":
134 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" 141 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n"
135 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 142 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: 143 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore:
137 continue 144 continue
170 for header in results[subject]["HitDic"]: 177 for header in results[subject]["HitDic"]:
171 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) 178 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) )
172 print >> Fasta, "" # final carriage return for the sequence 179 print >> Fasta, "" # final carriage return for the sequence
173 F.close() 180 F.close()
174 Fasta.close() 181 Fasta.close()
182 return blasted_transcripts
175 183
176 def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): 184 def dispatch_sequences (fastadict, blasted_transcripts, matched_sequences, unmatched_sequences):
177 '''to output the sequences that matched and did not matched in the blast''' 185 '''to output the sequences that matched and did not matched in the blast'''
178 blasted_transcripts = []
179 for subject in blastdict:
180 for transcript in blastdict[subject]:
181 blasted_transcripts.append(transcript)
182 blasted_transcripts = list( set( blasted_transcripts))
183 F_matched = open (matched_sequences, "w") 186 F_matched = open (matched_sequences, "w")
184 F_unmatched = open (unmatched_sequences, "w") 187 F_unmatched = open (unmatched_sequences, "w")
185 for transcript in fastadict: 188 for transcript in fastadict:
186 if transcript in blasted_transcripts: 189 if transcript in blasted_transcripts: # le list of blasted_transcripts is generated by the outputParsing function
187 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) 190 print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) )
188 else: 191 else:
189 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) 192 print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) )
190 F_matched.close() 193 F_matched.close()
191 F_unmatched.close() 194 F_unmatched.close()
193 196
194 def __main__ (): 197 def __main__ ():
195 args = Parser() 198 args = Parser()
196 fastadict = getfasta (args.sequences) 199 fastadict = getfasta (args.sequences)
197 Xblastdict = getblast (args.blast) 200 Xblastdict = getblast (args.blast)
198 sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences)
199 results = defaultdict(dict) 201 results = defaultdict(dict)
200 for subject in Xblastdict: 202 for subject in Xblastdict:
201 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) 203 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
202 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, 204 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,
203 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, 205 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,
204 filter_meanScore=args.filter_meanScore, mode=args.mode) 206 filter_meanScore=args.filter_meanScore, mode=args.mode)
207 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)
208
205 if __name__=="__main__": __main__() 209 if __name__=="__main__": __main__()