comparison cherry_pick_fasta.py @ 1:ea8fde9c6f82 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
author artbio
date Wed, 09 Oct 2019 18:48:17 -0400
parents e3aee4ba49c6
children 321cad0eb507
comparison
equal deleted inserted replaced
0:e3aee4ba49c6 1:ea8fde9c6f82
10 def Parser(): 10 def Parser():
11 the_parser = argparse.ArgumentParser( 11 the_parser = argparse.ArgumentParser(
12 description="Cherry pick fasta sequences") 12 description="Cherry pick fasta sequences")
13 the_parser.add_argument('--input', action="store", type=str, 13 the_parser.add_argument('--input', action="store", type=str,
14 help="input fasta file") 14 help="input fasta file")
15 the_parser.add_argument('--searchfor', action="store", type=str,
16 help="with, without, or withlist, withoutlist")
15 the_parser.add_argument('--query-string', dest="query_string", 17 the_parser.add_argument('--query-string', dest="query_string",
16 action="store", type=str, 18 action="store", type=str,
17 help="header containing the string will be\ 19 help="headers containing the string will be \
18 extracted as well as the corresponding\ 20 extracted or excluded as well as the \
19 sequence") 21 corresponding sequence")
22 the_parser.add_argument('--query-file', dest="query_file",
23 action="store", type=str,
24 help="headers containing any of the strings provided in the \
25 text file (1 string per line) will be \
26 extracted or excluded as well as the \
27 corresponding sequence")
28
20 the_parser.add_argument( 29 the_parser.add_argument(
21 '--output', action="store", type=str, help="output fasta file") 30 '--output', action="store", type=str, help="output fasta file")
22 args = the_parser.parse_args() 31 args = the_parser.parse_args()
23 return args 32 return args
24 33
25 34
35 def parse_fasta_with(query, FastaListe):
36 if not isinstance(query, list):
37 query = [query]
38 accumulator = []
39 for sequence in FastaListe:
40 for string in query:
41 if string in sequence:
42 accumulator.append(sequence)
43 continue
44 return accumulator
45
46
47 def complement_fasta(fullfasta, subfasta):
48 return list(set(fullfasta) - set(subfasta))
49
50
51 def getquerylist(file):
52 querylist = []
53 for line in open(file, 'r'):
54 querylist.append(line.rstrip())
55 return querylist
56
57
26 def __main__(): 58 def __main__():
27 """ main function """ 59 """ main function """
28 args = Parser() 60 args = Parser()
29 search_term = args.query_string 61 searchterm = args.query_string
30 CrudeFasta = open(args.input, "r").read() 62 CrudeFasta = open(args.input, "r").read()
31 Output = open(args.output, "w") 63 Output = open(args.output, "w")
32 FastaListe = CrudeFasta.split(">") 64 FastaListe = CrudeFasta.split(">")[1:]
33 for sequence in FastaListe: 65 if args.query_string:
34 if search_term in sequence: 66 if args.searchfor == 'with':
35 Output.write(">%s\n" % sequence.rstrip()) 67 contList = parse_fasta_with(searchterm, FastaListe)
68 contFasta = ">%s" % ">".join(contList)
69 Output.write(contFasta)
70 elif args.searchfor == 'without':
71 notcontList = complement_fasta(FastaListe,
72 parse_fasta_with(searchterm,
73 FastaListe))
74 notcontFasta = ">%s" % ">".join(notcontList)
75 Output.write(notcontFasta)
76 if args.query_file:
77 searchlist = getquerylist(args.query_file)
78 if args.searchfor == 'with':
79 contList = parse_fasta_with(searchlist, FastaListe)
80 contFasta = ">%s" % ">".join(contList)
81 Output.write(contFasta)
82 elif args.searchfor == 'without':
83 notcontList = complement_fasta(FastaListe, parse_fasta_with(
84 searchlist, FastaListe))
85 notcontFasta = ">%s" % ">".join(notcontList)
86 Output.write(notcontFasta)
36 Output.close() 87 Output.close()
37 88
38 89
39 if __name__ == "__main__": 90 if __name__ == "__main__":
40 __main__() 91 __main__()