mircounts: mature_mir_gff_translation.py comparison

comparison mature_mir_gff_translation.py @ 5:9ea96a02c416 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 04980585c257ab5f8eb5d10de007316c47c5d1ce

author	artbio
date	Tue, 05 Sep 2017 06:33:16 -0400
parents	6b8adacd4750
children	3f62272192f9

comparison

equal deleted inserted replaced

-:da1aa7de2b19
+:9ea96a02c416
-#!/usr/bin/env python
+import argparse
-import argparse
+from datetime import datetime
 def Parser():
 the_parser = argparse.ArgumentParser()
 the_parser.add_argument(
 help="output GFF3 file with converted mature mir coordinates")
 args = the_parser.parse_args()
 return args
-GFF3_header = '''##gff-version 3
+def get_gff_header(gff_input_file):
-##generated by mature_mir_gff_translation.py
+string_list = []
-#
+for line in open(gff_input_file, "r"):
-# Chromosomal coordinates of microRNAs ** relative to the hairpin precursors **
+if line[0] == '#':
-# microRNAs:               miRBase current_version
+string_list.append(line)
-# genome-build-id:         check http://mirbase.org/
+string_list.append('# generated by mature_mir_gff_translation.py %s\n#\n' %
-#
+str(datetime.now()))
-# Hairpin precursor sequences have type "miRNA_primary_transcript".
+return ''.join(string_list)
-# Note, these sequences do not represent the full primary transcript,
-# rather a predicted stem-loop portion that includes the precursor
-# miRNA. Mature sequences have type "miRNA".
-#
-'''
 def load_gff_in_dict(gff_input_file):
 '''
 Reads the gff3 file and return a dictionary of dictionaries
 gff_dict[ID]["end"] = gff_fields[4]
 gff_dict[ID]["score"] = gff_fields[5]
 gff_dict[ID]["strand"] = gff_fields[6]
 gff_dict[ID]["phase"] = gff_fields[7]
 gff_dict[ID]["attributes"] = gff_fields[8]
-if "Derives_from" in gff_dict[ID]["attributes"]:
+if "erives_from" in gff_dict[ID]["attributes"]:
 parent_primary_transcript = gff_dict[ID]["attributes"].split(
-"Derives_from=")[1]
+"erives_from=")[1]
 parent_primary_transcript = gff_dict[parent_primary_transcript][
 "attributes"].split("Name=")[1]
 gff_dict[ID]["attributes"] = "%s;Parent_mir_Name=%s" % (
 gff_dict[ID]["attributes"], parent_primary_transcript)
 return gff_dict
-def genome_to_mir_gff(gff_dict, output):
+def genome_to_mir_gff(gff_dict, output, header):
 '''
 Converts seqid field from chromosome to item Name
 Then converts coordinates relative to "miRNA_primary_transcript"
 Note that GFF files are 1-based coordinates
 '''
 for key in gff_dict:
 name = gff_dict[key]["attributes"].split("Name=")[1].split(";")[0]
 gff_dict[key]["seqid"] = name
-if "Derives_from=" in gff_dict[key]["attributes"]:
+if "erives_from=" in gff_dict[key]["attributes"]:
 parent_ID = gff_dict[key]["attributes"].split(
-"Derives_from=")[1].split(";")[0]
+"erives_from=")[1].split(";")[0]
 gff_dict[key]["start"] = str(int(gff_dict[key]["start"])-int(
 gff_dict[parent_ID]["start"])+1)
 gff_dict[key]["end"] = str(int(gff_dict[key]["end"])-int(
 gff_dict[parent_ID]["start"])+1)
 hairpins = {}
 else:
 matures[key] = gff_dict[key]["attributes"].split(
 "Name=")[1].split(
 ";")[0]
 with open(output, "w") as output:
-output.write(GFF3_header)
+output.write(header)
 for ID in sorted(hairpins, key=hairpins.get):
 output.write("\t".join([gff_dict[ID]["seqid"],
 gff_dict[ID]["source"], gff_dict[ID]["type"],
 gff_dict[ID]["start"], gff_dict[ID]["end"],
 gff_dict[ID]["score"], gff_dict[ID]["strand"],
 output.write("\n")
 def main(infile, outfile):
 gff_dict = load_gff_in_dict(infile)
-genome_to_mir_gff(gff_dict, outfile)
+genome_to_mir_gff(gff_dict, outfile, get_gff_header(infile))
 if __name__ == "__main__":
 args = Parser()
 main(args.input, args.output)

Mercurial > repos > artbio > mircounts

comparison mature_mir_gff_translation.py @ 5:9ea96a02c416 draft