Mercurial > repos > artbio > blast_unmatched
annotate blast_unmatched.py @ 0:f3b63b59a1ea draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
author | artbio |
---|---|
date | Tue, 03 Oct 2017 07:19:17 -0400 |
parents | |
children | 50c1fa95a076 |
rev | line source |
---|---|
0
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
2 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
3 import optparse |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
4 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
5 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
6 def parse_options(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
7 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
8 Parse the options guiven to the script |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
9 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
10 parser = optparse.OptionParser(description='Get unmatched blast queries') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
11 parser.add_option('-f','--fasta', dest='fasta_file', help='Query fasta file\ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
12 used during blast') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
13 parser.add_option('-b','--blast', dest='blast_file', help='Blast tabular\ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
14 output (queries in 1rst column)') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
15 parser.add_option('-o','--output', dest='output_file', help='Output file name') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
16 (options, args) = parser.parse_args() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
17 if len(args) > 0: |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
18 parser.error('Wrong number of arguments') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
19 return options |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
20 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
21 def get_matched(blast_file): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
22 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
23 Get a dictionary of all the queries that got a match |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
24 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
25 matched = dict() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
26 blast_file_handle = open(blast_file, 'r') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
27 for line in blast_file_handle.readlines(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
28 fields = line.split("\t") |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
29 query_id = fields[0] |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
30 matched[query_id] = 1 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
31 blast_file_handle.close() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
32 return matched |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
33 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
34 def get_unmatched(output_file, fasta_file, matched): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
35 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
36 Compares matched queries to query fasta file and print unmatched to ouput |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
37 """ |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
38 output_file_handle = open(output_file, 'w') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
39 fasta_file_handle = open(fasta_file, 'r') |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
40 unmatched = False |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
41 for line in fasta_file_handle.readlines(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
42 if line.startswith('>'): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
43 subline = line[1:100].rstrip() #qid are 100chars long in blast |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
44 if subline not in matched: |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
45 output_file_handle.write(line) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
46 unmatched = True |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
47 else: |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
48 unmatched = False |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
49 elif unmatched: |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
50 output_file_handle.write(line) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
51 fasta_file_handle.close() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
52 output_file_handle.close() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
53 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
54 def __main__(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
55 opts = parse_options() |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
56 matched = get_matched(opts.blast_file) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
57 get_unmatched(opts.output_file, opts.fasta_file, matched) |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
58 |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
59 if __main__(): |
f3b63b59a1ea
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 5bd801feb838592fbb1f6dd68b5f1a480042da40
artbio
parents:
diff
changeset
|
60 __main__() |