Mercurial > repos > petr-novak > dante
comparison dante_gff_output_filtering.py @ 17:1a766f9f623d draft
Uploaded
author | petr-novak |
---|---|
date | Mon, 16 Sep 2019 03:54:45 -0400 |
parents | 3151a72a6671 |
children | 1eabd42e00ef |
comparison
equal
deleted
inserted
replaced
16:0e820310d4dc | 17:1a766f9f623d |
---|---|
118 xminimals_all = [] | 118 xminimals_all = [] |
119 xmaximals_all = [] | 119 xmaximals_all = [] |
120 domains_all = [] | 120 domains_all = [] |
121 start = True | 121 start = True |
122 for line in gff_all: | 122 for line in gff_all: |
123 attributes = line.rstrip().split("\t")[-1] | 123 gff_line = parse_gff_line(line) |
124 classification = attributes.split(";")[1].split("=")[1] | 124 classification = gff_line['attributes']['Final_Classification'] |
125 orig_class_dict[classification] += 1 | 125 orig_class_dict[classification] += 1 |
126 ## ambiguous domains filtered out automatically | 126 ## ambiguous domains filtered out automatically |
127 if classification != configuration.AMBIGUOUS_TAG: | 127 if classification != configuration.AMBIGUOUS_TAG: |
128 gff_line = parse_gff_line(line) | |
129 al_identity = float(gff_line['attributes']['Identity']) | 128 al_identity = float(gff_line['attributes']['Identity']) |
130 al_similarity = float(gff_line['attributes']['Similarity']) | 129 al_similarity = float(gff_line['attributes']['Similarity']) |
131 al_length = float(gff_line['attributes']['Relat_Length']) | 130 al_length = float(gff_line['attributes']['Relat_Length']) |
132 relat_interrupt = float(gff_line['attributes']['Relat_Interruptions']) | 131 relat_interrupt = float(gff_line['attributes']['Relat_Interruptions']) |
133 db_len_proportion = float(gff_line['attributes']['Hit_to_DB_Length']) | 132 db_len_proportion = float(gff_line['attributes']['Hit_to_DB_Length']) |
134 dom_type = gff_line['attributes']['Final_Classification'] | 133 dom_type = gff_line['attributes']['Name'] |
135 seq_id = gff_line['seqid'] | 134 seq_id = gff_line['seqid'] |
136 xminimal = int(gff_line['start']) | 135 xminimal = int(gff_line['start']) |
137 xmaximal = int(gff_line['end']) | 136 xmaximal = int(gff_line['end']) |
138 c1 = al_identity >= TH_IDENTITY | 137 c1 = al_identity >= TH_IDENTITY |
139 c2 = al_similarity >= TH_SIMILARITY | 138 c2 = al_similarity >= TH_SIMILARITY |