comparison mature_mir_gff_translation.py @ 13:b045c30fb768 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
author artbio
date Fri, 18 Oct 2019 19:18:50 -0400
parents de227b7307cf
children
comparison
equal deleted inserted replaced
12:6d3e98cba73a 13:b045c30fb768
8 '--gff_path', action="store", type=str, 8 '--gff_path', action="store", type=str,
9 help="path to miRBase GFF3 file") 9 help="path to miRBase GFF3 file")
10 the_parser.add_argument( 10 the_parser.add_argument(
11 '--output', action="store", type=str, 11 '--output', action="store", type=str,
12 help="output GFF3 file with converted mature mir coordinates") 12 help="output GFF3 file with converted mature mir coordinates")
13 the_parser.add_argument(
14 '--basename', action="store", type=str,
15 help="basename of the parsed gff file returned")
16 args = the_parser.parse_args() 13 args = the_parser.parse_args()
17 return args 14 return args
18 15
19 16
20 def convert_and_print_gff(gff_input_file, output): 17 def convert_and_print_gff(gff_input_file, output):
41 gff_dict[ID]["premir_name"] = gff_fields[8].split( 38 gff_dict[ID]["premir_name"] = gff_fields[8].split(
42 "Name=")[1].split(";")[0] 39 "Name=")[1].split(";")[0]
43 gff_dict[ID]["primary"] = line[:-1] 40 gff_dict[ID]["primary"] = line[:-1]
44 gff_dict[ID]["miRNAs"] = [] 41 gff_dict[ID]["miRNAs"] = []
45 elif gff_fields[2] == "miRNA": 42 elif gff_fields[2] == "miRNA":
43 if "_" in ID:
44 continue
46 parent_ID = gff_fields[8].split("erives_from=")[1] 45 parent_ID = gff_fields[8].split("erives_from=")[1]
47 gff_dict[parent_ID]["miRNAs"].append(line[:-1]) 46 gff_dict[parent_ID]["miRNAs"].append(line[:-1])
48 # Now reorganise features and recalculate coordinates of premirs and mirs 47 # Now reorganise features and recalculate coordinates of premirs and mirs
49 gff_list = [] 48 gff_list = []
50 for ID in sorted(gff_dict, key=lambda x: (gff_dict[x]['premir_name'])): 49 for ID in sorted(gff_dict, key=lambda x: (gff_dict[x]['premir_name'])):
68 source, type, newstart, newend, score, strand, 67 source, type, newstart, newend, score, strand,
69 phase, attributes)) 68 phase, attributes))
70 # ensure their is only 2 child miRNAs at best 69 # ensure their is only 2 child miRNAs at best
71 if len(gff_dict[ID]["miRNAs"]) > 2: 70 if len(gff_dict[ID]["miRNAs"]) > 2:
72 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][:2] 71 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][:2]
73 # sort child miRNAs 5p first 3p second 72 # sort child miRNAs 5p first 3p second,
74 if gff_dict[ID]["miRNAs"][0].find('5p') == -1: 73 # if there are two miR mature at least !
74 if len(gff_dict[ID]["miRNAs"]) > 1 and \
75 gff_dict[ID]["miRNAs"][0].find('5p') == -1:
75 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][::-1] 76 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][::-1]
76 for mir in gff_dict[ID]["miRNAs"]: 77 for mir in gff_dict[ID]["miRNAs"]:
77 mir_fields = mir.split('\t') 78 mir_fields = mir.split('\t')
78 mir_seqid = mir_fields[8].split("Name=")[1].split(";")[0] 79 mir_seqid = mir_fields[8].split("Name=")[1].split(";")[0]
79 mir_source = mir_fields[1] 80 mir_source = mir_fields[1]