Mercurial > repos > artbio > cherry_pick_fasta
annotate cherry_pick_fasta.py @ 7:6c0aefd9fee3 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
author | artbio |
---|---|
date | Thu, 29 Dec 2022 11:53:05 +0000 |
parents | d8fa616a228a |
children |
rev | line source |
---|---|
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
1 import argparse |
7
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
2 from collections import defaultdict |
0
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
3 |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
4 |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
5 def Parser(): |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
6 the_parser = argparse.ArgumentParser( |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
7 description='Cherry pick fasta sequences') |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
8 the_parser.add_argument('--input', action='store', type=str, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
9 help='input fasta file') |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
10 the_parser.add_argument('--searchfor', action='store', type=str, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
11 help='with, without, or withlist, withoutlist') |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
12 the_parser.add_argument('--mode', action='store', type=str, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
13 default='includes', help='exact or includes') |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
14 the_parser.add_argument('--query-string', dest='query_string', |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
15 action='store', type=str, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
16 help='headers containing the string will be \ |
1
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
17 extracted or excluded as well as the \ |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
18 corresponding sequence') |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
19 the_parser.add_argument('--query-file', dest='query_file', |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
20 action='store', type=str, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
21 help='headers containing any of the strings \ |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
22 provided in the text file (1 string per \ |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
23 line) will be extracted or excluded as well \ |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
24 as the corresponding sequence') |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
25 the_parser.add_argument('--output', action='store', type=str, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
26 help='output fasta file') |
0
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
27 args = the_parser.parse_args() |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
28 return args |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
29 |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
30 |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
31 def parse_fasta_dict(query, fasta_dict, mode): |
7
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
32 |
1
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
33 if not isinstance(query, list): |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
34 query = [query] |
7
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
35 |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
36 def kmers(string, ksize, index): |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
37 if ksize > len(string): |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
38 return |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
39 for i in range(len(string) - ksize + 1): |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
40 kmer = string[i:i+ksize] |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
41 index[kmer].append(string) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
42 |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
43 def consult_index(word, index): |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
44 accumulator = [] |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
45 print(len(index[word])) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
46 for title in index[word]: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
47 accumulator.append(title) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
48 print(len(accumulator)) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
49 for title in set(accumulator): |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
50 print(title) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
51 |
1
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
52 accumulator = [] |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
53 if mode == 'includes': |
7
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
54 kmersizes = set([len(word) for word in query]) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
55 index = defaultdict(list) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
56 for size in kmersizes: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
57 for header in fasta_dict: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
58 kmers(header, size, index) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
59 for keyword in query: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
60 for header in index[keyword]: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
61 accumulator.append(header) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
62 accumulator = set(accumulator) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
63 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator} |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
64 return res_dict |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
65 elif mode == 'exact': |
7
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
66 for keyword in query: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
67 try: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
68 len(fasta_dict[keyword]) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
69 accumulator.append(keyword) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
70 except KeyError: |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
71 pass |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
72 accumulator = set(accumulator) |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
73 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator} |
6c0aefd9fee3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents:
6
diff
changeset
|
74 return res_dict |
1
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
75 |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
76 |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
77 def complement_fasta_dict(fasta_dict, subfasta_dict): |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
78 fasta_ids = list(fasta_dict.keys()) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
79 subfasta_ids = list(subfasta_dict.keys()) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
80 complement_ids = list(set(fasta_ids) - set(subfasta_ids)) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
81 sub_dict = {k: fasta_dict[k] for k in fasta_dict if k in complement_ids} |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
82 return sub_dict |
1
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
83 |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
84 |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
85 def getquerylist(file): |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
86 querylist = [] |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
87 for line in open(file, 'r'): |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
88 querylist.append(line.rstrip()) |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
89 return querylist |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
90 |
ea8fde9c6f82
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents:
0
diff
changeset
|
91 |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
92 def buid_fasta_dict(fasta): |
6
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
93 seq_dict = dict() |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
94 f = open(fasta, 'r') |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
95 content = f.read() |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
96 segmented_content = content.split('>') |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
97 segmented_content = segmented_content[1:] |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
98 for seq in segmented_content: |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
99 sliced_seq = seq.split('\n') |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
100 header = sliced_seq[0] |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
101 sliced_seq = sliced_seq[1:] |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
102 sequence = ''.join(sliced_seq) |
d8fa616a228a
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents:
4
diff
changeset
|
103 seq_dict[header] = sequence |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
104 return seq_dict |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
105 |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
106 |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
107 def write_fasta_result(fasta_dict, file): |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
108 line_length = 60 |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
109 with open(file, 'w') as f: |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
110 for header in sorted(fasta_dict): |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
111 f.write('>%s\n' % header) |
4
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
112 if len(fasta_dict[header]) <= line_length: |
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
113 f.write('%s\n' % fasta_dict[header]) |
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
114 else: |
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
115 for i in range(line_length, len(fasta_dict[header]), |
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
116 line_length): |
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
117 f.write('%s\n' % fasta_dict[header][i-line_length:i]) |
ba6c4aeb22ea
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents:
3
diff
changeset
|
118 f.write('%s\n' % fasta_dict[header][i:]) |
0
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
119 |
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
120 |
3
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
121 def __main__(): |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
122 ''' main function ''' |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
123 args = Parser() |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
124 fasta_dict = buid_fasta_dict(args.input) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
125 if args.query_string: |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
126 query = args.query_string |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
127 elif args.query_file: |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
128 query = getquerylist(args.query_file) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
129 if args.searchfor == 'with': |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
130 fasta_result_dict = parse_fasta_dict(query, fasta_dict, args.mode) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
131 elif args.searchfor == 'without': |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
132 fasta_result_dict = complement_fasta_dict(fasta_dict, parse_fasta_dict( |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
133 query, fasta_dict, |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
134 args.mode)) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
135 write_fasta_result(fasta_result_dict, args.output) |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
136 |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
137 |
c282a8a47dd9
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents:
2
diff
changeset
|
138 if __name__ == '__main__': |
0
e3aee4ba49c6
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff
changeset
|
139 __main__() |