Mercurial > repos > artbio > cherry_pick_fasta
comparison cherry_pick_fasta.py @ 7:6c0aefd9fee3 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
author | artbio |
---|---|
date | Thu, 29 Dec 2022 11:53:05 +0000 |
parents | d8fa616a228a |
children |
comparison
equal
deleted
inserted
replaced
6:d8fa616a228a | 7:6c0aefd9fee3 |
---|---|
1 import argparse | 1 import argparse |
2 from collections import defaultdict | |
2 | 3 |
3 | 4 |
4 def Parser(): | 5 def Parser(): |
5 the_parser = argparse.ArgumentParser( | 6 the_parser = argparse.ArgumentParser( |
6 description='Cherry pick fasta sequences') | 7 description='Cherry pick fasta sequences') |
26 args = the_parser.parse_args() | 27 args = the_parser.parse_args() |
27 return args | 28 return args |
28 | 29 |
29 | 30 |
30 def parse_fasta_dict(query, fasta_dict, mode): | 31 def parse_fasta_dict(query, fasta_dict, mode): |
32 | |
31 if not isinstance(query, list): | 33 if not isinstance(query, list): |
32 query = [query] | 34 query = [query] |
35 | |
36 def kmers(string, ksize, index): | |
37 if ksize > len(string): | |
38 return | |
39 for i in range(len(string) - ksize + 1): | |
40 kmer = string[i:i+ksize] | |
41 index[kmer].append(string) | |
42 | |
43 def consult_index(word, index): | |
44 accumulator = [] | |
45 print(len(index[word])) | |
46 for title in index[word]: | |
47 accumulator.append(title) | |
48 print(len(accumulator)) | |
49 for title in set(accumulator): | |
50 print(title) | |
51 | |
33 accumulator = [] | 52 accumulator = [] |
34 if mode == 'includes': | 53 if mode == 'includes': |
35 for seq_id in fasta_dict: | 54 kmersizes = set([len(word) for word in query]) |
36 for string in query: | 55 index = defaultdict(list) |
37 if string in seq_id: | 56 for size in kmersizes: |
38 accumulator.append(seq_id) | 57 for header in fasta_dict: |
39 continue | 58 kmers(header, size, index) |
59 for keyword in query: | |
60 for header in index[keyword]: | |
61 accumulator.append(header) | |
62 accumulator = set(accumulator) | |
63 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator} | |
64 return res_dict | |
40 elif mode == 'exact': | 65 elif mode == 'exact': |
41 for seq_id in fasta_dict: | 66 for keyword in query: |
42 for string in query: | 67 try: |
43 if string == seq_id: | 68 len(fasta_dict[keyword]) |
44 accumulator.append(seq_id) | 69 accumulator.append(keyword) |
45 continue | 70 except KeyError: |
46 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator} | 71 pass |
47 return res_dict | 72 accumulator = set(accumulator) |
73 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator} | |
74 return res_dict | |
48 | 75 |
49 | 76 |
50 def complement_fasta_dict(fasta_dict, subfasta_dict): | 77 def complement_fasta_dict(fasta_dict, subfasta_dict): |
51 fasta_ids = list(fasta_dict.keys()) | 78 fasta_ids = list(fasta_dict.keys()) |
52 subfasta_ids = list(subfasta_dict.keys()) | 79 subfasta_ids = list(subfasta_dict.keys()) |