Mercurial > repos > artbio > blast_unmatched
comparison blast_unmatched.py @ 0:f3b63b59a1ea draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
| author | artbio |
|---|---|
| date | Tue, 03 Oct 2017 07:19:17 -0400 |
| parents | |
| children | 50c1fa95a076 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f3b63b59a1ea |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 import optparse | |
| 4 | |
| 5 | |
| 6 def parse_options(): | |
| 7 """ | |
| 8 Parse the options guiven to the script | |
| 9 """ | |
| 10 parser = optparse.OptionParser(description='Get unmatched blast queries') | |
| 11 parser.add_option('-f','--fasta', dest='fasta_file', help='Query fasta file\ | |
| 12 used during blast') | |
| 13 parser.add_option('-b','--blast', dest='blast_file', help='Blast tabular\ | |
| 14 output (queries in 1rst column)') | |
| 15 parser.add_option('-o','--output', dest='output_file', help='Output file name') | |
| 16 (options, args) = parser.parse_args() | |
| 17 if len(args) > 0: | |
| 18 parser.error('Wrong number of arguments') | |
| 19 return options | |
| 20 | |
| 21 def get_matched(blast_file): | |
| 22 """ | |
| 23 Get a dictionary of all the queries that got a match | |
| 24 """ | |
| 25 matched = dict() | |
| 26 blast_file_handle = open(blast_file, 'r') | |
| 27 for line in blast_file_handle.readlines(): | |
| 28 fields = line.split("\t") | |
| 29 query_id = fields[0] | |
| 30 matched[query_id] = 1 | |
| 31 blast_file_handle.close() | |
| 32 return matched | |
| 33 | |
| 34 def get_unmatched(output_file, fasta_file, matched): | |
| 35 """ | |
| 36 Compares matched queries to query fasta file and print unmatched to ouput | |
| 37 """ | |
| 38 output_file_handle = open(output_file, 'w') | |
| 39 fasta_file_handle = open(fasta_file, 'r') | |
| 40 unmatched = False | |
| 41 for line in fasta_file_handle.readlines(): | |
| 42 if line.startswith('>'): | |
| 43 subline = line[1:100].rstrip() #qid are 100chars long in blast | |
| 44 if subline not in matched: | |
| 45 output_file_handle.write(line) | |
| 46 unmatched = True | |
| 47 else: | |
| 48 unmatched = False | |
| 49 elif unmatched: | |
| 50 output_file_handle.write(line) | |
| 51 fasta_file_handle.close() | |
| 52 output_file_handle.close() | |
| 53 | |
| 54 def __main__(): | |
| 55 opts = parse_options() | |
| 56 matched = get_matched(opts.blast_file) | |
| 57 get_unmatched(opts.output_file, opts.fasta_file, matched) | |
| 58 | |
| 59 if __main__(): | |
| 60 __main__() |
