comparison BlastParser_and_hits.py @ 6:78c34df2dd8d draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
author drosofff
date Tue, 02 Feb 2016 11:38:51 -0500
parents a0dec1a0f2ef
children 1991c830504a
comparison
equal deleted inserted replaced
5:a0dec1a0f2ef 6:78c34df2dd8d
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number")
20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") 20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list")
21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") 21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list")
22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned")
23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned")
24 the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output")
24 args = the_parser.parse_args() 25 args = the_parser.parse_args()
25 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): 26 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ):
26 the_parser.error('argument(s) missing, call the -h option of the script') 27 the_parser.error('argument(s) missing, call the -h option of the script')
27 if not args.flanking: 28 if not args.flanking:
28 args.flanking = 0 29 args.flanking = 0
127 leftCoordinate -= FlankingValue 128 leftCoordinate -= FlankingValue
128 else: 129 else:
129 leftCoordinate = 1 130 leftCoordinate = 1
130 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) 131 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity)
131 132
132 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): 133 def outputParsing (dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"):
133 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): 134 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""):
134 for subject in results.keys(): 135 for subject in results.keys():
135 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: 136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov:
136 del results[subject] 137 del results[subject]
137 continue 138 continue
158 for subject in results: 159 for subject in results:
159 for transcript in Xblastdict[subject]: 160 for transcript in Xblastdict[subject]:
160 blasted_transcripts.append(transcript) 161 blasted_transcripts.append(transcript)
161 blasted_transcripts = list( set( blasted_transcripts)) 162 blasted_transcripts = list( set( blasted_transcripts))
162 if mode == "verbose": 163 if mode == "verbose":
163 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" 164 print >>F, "--- %s ---" % (dataset_name)
165 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore"
164 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 166 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
165 print >> F, "#\n# %s" % subject 167 print >> F, " \n# %s" % subject
166 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) 168 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"])
167 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) 169 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"])
168 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) 170 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"])
169 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) 171 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"])
170 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) 172 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"])
179 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] 181 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore]
180 info = [str(i) for i in info] 182 info = [str(i) for i in info]
181 info = "\t".join(info) 183 info = "\t".join(info)
182 print >> F, info 184 print >> F, info
183 else: 185 else:
186 print >>F, "--- %s ---" % (dataset_name)
184 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" 187 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score"
185 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): 188 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True):
186 line = [] 189 line = []
187 line.append(subject) 190 line.append(subject)
188 line.append(results[subject]["subjectLength"]) 191 line.append(results[subject]["subjectLength"])
217 fastadict = getfasta (args.sequences) 220 fastadict = getfasta (args.sequences)
218 Xblastdict = getblast (args.blast) 221 Xblastdict = getblast (args.blast)
219 results = defaultdict(dict) 222 results = defaultdict(dict)
220 for subject in Xblastdict: 223 for subject in Xblastdict:
221 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) 224 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking)
222 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, 225 blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict,
223 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, 226 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore,
224 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, 227 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in,
225 filter_term_out=args.filter_term_out, mode=args.mode) 228 filter_term_out=args.filter_term_out, mode=args.mode)
226 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) 229 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences)
227 230