Mercurial > repos > artbio > mircounts
view format_fasta_hairpins.py @ 14:c163574c246f draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 51dc6c56c7d95fc229ffee958354211cd454fd36"
author | artbio |
---|---|
date | Sun, 09 May 2021 17:10:07 +0000 |
parents | b045c30fb768 |
children |
line wrap: on
line source
import argparse import gzip def Parser(): the_parser = argparse.ArgumentParser() the_parser.add_argument( '--hairpins_path', action="store", type=str, help="BASE url. ex: /pub/mirbase/22/") the_parser.add_argument( '--output', action="store", type=str, help="parsed hairpin output in fasta format") the_parser.add_argument( '--basename', action="store", type=str, help="genome basename of the parsed fasta") args = the_parser.parse_args() return args def get_fasta_dic(gzipfile): ''' gzipfile value example : 'mirbase/22/hairpin.fa.gz' ''' item_dic = {} with gzip.open(gzipfile, 'rb') as f: current_item = '' stringlist = [] for line in f: line = line.decode('utf-8').strip('\n') if (line[0] == ">"): # dump the sequence of the previous item if current_item and stringlist: item_dic[current_item] = "".join(stringlist) # take first word of item ''' current_item = line[1:].split()[0] stringlist = [] else: stringlist.append(line) item_dic[current_item] = "".join(stringlist) # for the last item return item_dic def convert_and_print_hairpins(gzipfile, basename, fasta_output): raw_fasta_dict = get_fasta_dic(gzipfile) parsed_fasta_dict = {} for head in raw_fasta_dict: if basename in head: parsed_fasta_dict[head] = raw_fasta_dict[head] parsed_fasta_dict[head] = ''.join( [i if i != 'u' else 't' for i in parsed_fasta_dict[head]]) parsed_fasta_dict[head] = ''.join( [i if i != 'U' else 'T' for i in parsed_fasta_dict[head]]) with open(fasta_output, "w") as output: for head in sorted(parsed_fasta_dict): output.write('>%s\n%s\n' % (head, parsed_fasta_dict[head])) def main(hairpins_path, basename, outfile): convert_and_print_hairpins(hairpins_path, basename, outfile) if __name__ == "__main__": args = Parser() main(args.hairpins_path, args.basename, args.output)