Mercurial > repos > artbio > cherry_pick_fasta
comparison cherry_pick_fasta.py @ 1:ea8fde9c6f82 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
author | artbio |
---|---|
date | Wed, 09 Oct 2019 18:48:17 -0400 |
parents | e3aee4ba49c6 |
children | 321cad0eb507 |
comparison
equal
deleted
inserted
replaced
0:e3aee4ba49c6 | 1:ea8fde9c6f82 |
---|---|
10 def Parser(): | 10 def Parser(): |
11 the_parser = argparse.ArgumentParser( | 11 the_parser = argparse.ArgumentParser( |
12 description="Cherry pick fasta sequences") | 12 description="Cherry pick fasta sequences") |
13 the_parser.add_argument('--input', action="store", type=str, | 13 the_parser.add_argument('--input', action="store", type=str, |
14 help="input fasta file") | 14 help="input fasta file") |
15 the_parser.add_argument('--searchfor', action="store", type=str, | |
16 help="with, without, or withlist, withoutlist") | |
15 the_parser.add_argument('--query-string', dest="query_string", | 17 the_parser.add_argument('--query-string', dest="query_string", |
16 action="store", type=str, | 18 action="store", type=str, |
17 help="header containing the string will be\ | 19 help="headers containing the string will be \ |
18 extracted as well as the corresponding\ | 20 extracted or excluded as well as the \ |
19 sequence") | 21 corresponding sequence") |
22 the_parser.add_argument('--query-file', dest="query_file", | |
23 action="store", type=str, | |
24 help="headers containing any of the strings provided in the \ | |
25 text file (1 string per line) will be \ | |
26 extracted or excluded as well as the \ | |
27 corresponding sequence") | |
28 | |
20 the_parser.add_argument( | 29 the_parser.add_argument( |
21 '--output', action="store", type=str, help="output fasta file") | 30 '--output', action="store", type=str, help="output fasta file") |
22 args = the_parser.parse_args() | 31 args = the_parser.parse_args() |
23 return args | 32 return args |
24 | 33 |
25 | 34 |
35 def parse_fasta_with(query, FastaListe): | |
36 if not isinstance(query, list): | |
37 query = [query] | |
38 accumulator = [] | |
39 for sequence in FastaListe: | |
40 for string in query: | |
41 if string in sequence: | |
42 accumulator.append(sequence) | |
43 continue | |
44 return accumulator | |
45 | |
46 | |
47 def complement_fasta(fullfasta, subfasta): | |
48 return list(set(fullfasta) - set(subfasta)) | |
49 | |
50 | |
51 def getquerylist(file): | |
52 querylist = [] | |
53 for line in open(file, 'r'): | |
54 querylist.append(line.rstrip()) | |
55 return querylist | |
56 | |
57 | |
26 def __main__(): | 58 def __main__(): |
27 """ main function """ | 59 """ main function """ |
28 args = Parser() | 60 args = Parser() |
29 search_term = args.query_string | 61 searchterm = args.query_string |
30 CrudeFasta = open(args.input, "r").read() | 62 CrudeFasta = open(args.input, "r").read() |
31 Output = open(args.output, "w") | 63 Output = open(args.output, "w") |
32 FastaListe = CrudeFasta.split(">") | 64 FastaListe = CrudeFasta.split(">")[1:] |
33 for sequence in FastaListe: | 65 if args.query_string: |
34 if search_term in sequence: | 66 if args.searchfor == 'with': |
35 Output.write(">%s\n" % sequence.rstrip()) | 67 contList = parse_fasta_with(searchterm, FastaListe) |
68 contFasta = ">%s" % ">".join(contList) | |
69 Output.write(contFasta) | |
70 elif args.searchfor == 'without': | |
71 notcontList = complement_fasta(FastaListe, | |
72 parse_fasta_with(searchterm, | |
73 FastaListe)) | |
74 notcontFasta = ">%s" % ">".join(notcontList) | |
75 Output.write(notcontFasta) | |
76 if args.query_file: | |
77 searchlist = getquerylist(args.query_file) | |
78 if args.searchfor == 'with': | |
79 contList = parse_fasta_with(searchlist, FastaListe) | |
80 contFasta = ">%s" % ">".join(contList) | |
81 Output.write(contFasta) | |
82 elif args.searchfor == 'without': | |
83 notcontList = complement_fasta(FastaListe, parse_fasta_with( | |
84 searchlist, FastaListe)) | |
85 notcontFasta = ">%s" % ">".join(notcontList) | |
86 Output.write(notcontFasta) | |
36 Output.close() | 87 Output.close() |
37 | 88 |
38 | 89 |
39 if __name__ == "__main__": | 90 if __name__ == "__main__": |
40 __main__() | 91 __main__() |