annotate mature_mir_gff_translation.py @ 3:6b8adacd4750 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
author artbio
date Wed, 26 Jul 2017 19:15:08 -0400
parents da29af78a960
children 9ea96a02c416
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
1 #!/usr/bin/env python
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
2
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
3 import argparse
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
4
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
5
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
6 def Parser():
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
7 the_parser = argparse.ArgumentParser()
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
8 the_parser.add_argument(
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
9 '--input', action="store", type=str, help="input miRBase GFF3 file")
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
10 the_parser.add_argument(
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
11 '--output', action="store", type=str,
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
12 help="output GFF3 file with converted mature mir coordinates")
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
13 args = the_parser.parse_args()
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
14 return args
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
15
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
16
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
17 GFF3_header = '''##gff-version 3
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
18 ##generated by mature_mir_gff_translation.py
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
19 #
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
20 # Chromosomal coordinates of microRNAs ** relative to the hairpin precursors **
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
21 # microRNAs: miRBase current_version
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
22 # genome-build-id: check http://mirbase.org/
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
23 #
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
24 # Hairpin precursor sequences have type "miRNA_primary_transcript".
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
25 # Note, these sequences do not represent the full primary transcript,
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
26 # rather a predicted stem-loop portion that includes the precursor
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
27 # miRNA. Mature sequences have type "miRNA".
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
28 #
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
29 '''
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
30
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
31
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
32 def load_gff_in_dict(gff_input_file):
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
33 '''
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
34 Reads the gff3 file and return a dictionary of dictionaries
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
35 with keys equal to standard gff3 fields (9)
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
36 Note that the key of the primary dictionary is the ID
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
37 '''
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
38 gff_dict = {}
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
39 for line in open(gff_input_file, "r"):
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
40 if line[0] == "#":
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
41 continue
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
42 gff_fields = line[:-1].split("\t")
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
43 ID = gff_fields[8].split("ID=")[1].split(";")[0]
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
44 gff_dict[ID] = {}
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
45 gff_dict[ID]["seqid"] = gff_fields[0]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
46 gff_dict[ID]["source"] = gff_fields[1]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
47 gff_dict[ID]["type"] = gff_fields[2]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
48 gff_dict[ID]["start"] = gff_fields[3]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
49 gff_dict[ID]["end"] = gff_fields[4]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
50 gff_dict[ID]["score"] = gff_fields[5]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
51 gff_dict[ID]["strand"] = gff_fields[6]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
52 gff_dict[ID]["phase"] = gff_fields[7]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
53 gff_dict[ID]["attributes"] = gff_fields[8]
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
54 if "Derives_from" in gff_dict[ID]["attributes"]:
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
55 parent_primary_transcript = gff_dict[ID]["attributes"].split(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
56 "Derives_from=")[1]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
57 parent_primary_transcript = gff_dict[parent_primary_transcript][
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
58 "attributes"].split("Name=")[1]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
59 gff_dict[ID]["attributes"] = "%s;Parent_mir_Name=%s" % (
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
60 gff_dict[ID]["attributes"], parent_primary_transcript)
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
61 return gff_dict
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
62
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
63
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
64 def genome_to_mir_gff(gff_dict, output):
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
65 '''
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
66 Converts seqid field from chromosome to item Name
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
67 Then converts coordinates relative to "miRNA_primary_transcript"
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
68 Note that GFF files are 1-based coordinates
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
69 '''
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
70 for key in gff_dict:
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
71 name = gff_dict[key]["attributes"].split("Name=")[1].split(";")[0]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
72 gff_dict[key]["seqid"] = name
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
73 if "Derives_from=" in gff_dict[key]["attributes"]:
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
74 parent_ID = gff_dict[key]["attributes"].split(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
75 "Derives_from=")[1].split(";")[0]
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
76 gff_dict[key]["start"] = str(int(gff_dict[key]["start"])-int(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
77 gff_dict[parent_ID]["start"])+1)
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
78 gff_dict[key]["end"] = str(int(gff_dict[key]["end"])-int(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
79 gff_dict[parent_ID]["start"])+1)
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
80 hairpins = {}
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
81 matures = {}
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
82 # treats miRNA_primary_transcript coordinates
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
83 # in a second loop to avoid errors in conversion
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
84 for key in gff_dict:
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
85 if gff_dict[key]["type"] == "miRNA_primary_transcript":
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
86 gff_dict[key]["end"] = str(int(gff_dict[key]["end"])-int(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
87 gff_dict[key]["start"]) + 1)
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
88 gff_dict[key]["start"] = '1'
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
89 # now, do a dict[ID]=Name but only for miRNA_primary_transcript
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
90 hairpins[key] = gff_dict[key]["attributes"].split(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
91 "Name=")[1].split(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
92 ";")[0]
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
93 else:
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
94 matures[key] = gff_dict[key]["attributes"].split(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
95 "Name=")[1].split(
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
96 ";")[0]
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
97 with open(output, "w") as output:
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
98 output.write(GFF3_header)
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
99 for ID in sorted(hairpins, key=hairpins.get):
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
100 output.write("\t".join([gff_dict[ID]["seqid"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
101 gff_dict[ID]["source"], gff_dict[ID]["type"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
102 gff_dict[ID]["start"], gff_dict[ID]["end"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
103 gff_dict[ID]["score"], gff_dict[ID]["strand"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
104 gff_dict[ID]["phase"], gff_dict[ID]["attributes"]]))
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
105 output.write("\n")
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
106 for id in sorted(matures, key=matures.get, reverse=True):
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
107 if ID in gff_dict[id]["attributes"]:
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
108 output.write("\t".join([gff_dict[id]["seqid"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
109 gff_dict[id]["source"], gff_dict[id]["type"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
110 gff_dict[id]["start"], gff_dict[id]["end"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
111 gff_dict[id]["score"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
112 gff_dict[id]["strand"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
113 gff_dict[id]["phase"],
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
114 gff_dict[id]["attributes"]]))
0
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
115 output.write("\n")
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
116
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
117
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
118 def main(infile, outfile):
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
119 gff_dict = load_gff_in_dict(infile)
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
120 genome_to_mir_gff(gff_dict, outfile)
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
121
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
122
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
123 if __name__ == "__main__":
da29af78a960 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
artbio
parents:
diff changeset
124 args = Parser()
3
6b8adacd4750 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit fa65a844f9041a83767f5305ab360abfdf68f59f
artbio
parents: 0
diff changeset
125 main(args.input, args.output)