comparison format_fasta_hairpins.py @ 10:de227b7307cf draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
author artbio
date Sun, 29 Apr 2018 18:57:13 -0400
parents
children b045c30fb768
comparison
equal deleted inserted replaced
9:2a08a6eb471c 10:de227b7307cf
1 import argparse
2 import gzip
3
4
5 def Parser():
6 the_parser = argparse.ArgumentParser()
7 the_parser.add_argument(
8 '--hairpins_path', action="store", type=str,
9 help="BASE url. ex: /pub/mirbase/22/")
10 the_parser.add_argument(
11 '--output', action="store", type=str,
12 help="parsed hairpin output in fasta format")
13 the_parser.add_argument(
14 '--basename', action="store", type=str,
15 help="genome basename of the parsed fasta")
16 args = the_parser.parse_args()
17 return args
18
19
20 def get_fasta_dic(gzipfile):
21 '''
22 gzipfile value example : 'mirbase/22/hairpin.fa.gz'
23 '''
24 item_dic = {}
25 with gzip.open(gzipfile, 'rb') as f:
26 current_item = ''
27 stringlist = []
28 for line in f:
29 line = line.decode('utf-8').strip('\n')
30 if (line[0] == ">"):
31 # dump the sequence of the previous item
32 if current_item and stringlist:
33 item_dic[current_item] = "".join(stringlist)
34 # take first word of item '''
35 current_item = line[1:].split()[0]
36 stringlist = []
37 else:
38 stringlist.append(line)
39 item_dic[current_item] = "".join(stringlist) # for the last item
40 return item_dic
41
42
43 def convert_and_print_hairpins(gzipfile, basename, fasta_output):
44 raw_fasta_dict = get_fasta_dic(gzipfile)
45 parsed_fasta_dict = {}
46 trs = str.maketrans("uU", "tT")
47 for head in raw_fasta_dict:
48 if basename in head:
49 parsed_fasta_dict[head] = raw_fasta_dict[head].translate(trs)
50 with open(fasta_output, "w") as output:
51 for head in sorted(parsed_fasta_dict):
52 output.write('>%s\n%s\n' % (head, parsed_fasta_dict[head]))
53
54
55 def main(hairpins_path, basename, outfile):
56 convert_and_print_hairpins(hairpins_path, basename, outfile)
57
58
59 if __name__ == "__main__":
60 args = Parser()
61 main(args.hairpins_path, args.basename, args.output)