comparison dante_gff_output_filtering.py @ 17:1a766f9f623d draft

Uploaded
author petr-novak
date Mon, 16 Sep 2019 03:54:45 -0400
parents 3151a72a6671
children 1eabd42e00ef
comparison
equal deleted inserted replaced
16:0e820310d4dc 17:1a766f9f623d
118 xminimals_all = [] 118 xminimals_all = []
119 xmaximals_all = [] 119 xmaximals_all = []
120 domains_all = [] 120 domains_all = []
121 start = True 121 start = True
122 for line in gff_all: 122 for line in gff_all:
123 attributes = line.rstrip().split("\t")[-1] 123 gff_line = parse_gff_line(line)
124 classification = attributes.split(";")[1].split("=")[1] 124 classification = gff_line['attributes']['Final_Classification']
125 orig_class_dict[classification] += 1 125 orig_class_dict[classification] += 1
126 ## ambiguous domains filtered out automatically 126 ## ambiguous domains filtered out automatically
127 if classification != configuration.AMBIGUOUS_TAG: 127 if classification != configuration.AMBIGUOUS_TAG:
128 gff_line = parse_gff_line(line)
129 al_identity = float(gff_line['attributes']['Identity']) 128 al_identity = float(gff_line['attributes']['Identity'])
130 al_similarity = float(gff_line['attributes']['Similarity']) 129 al_similarity = float(gff_line['attributes']['Similarity'])
131 al_length = float(gff_line['attributes']['Relat_Length']) 130 al_length = float(gff_line['attributes']['Relat_Length'])
132 relat_interrupt = float(gff_line['attributes']['Relat_Interruptions']) 131 relat_interrupt = float(gff_line['attributes']['Relat_Interruptions'])
133 db_len_proportion = float(gff_line['attributes']['Hit_to_DB_Length']) 132 db_len_proportion = float(gff_line['attributes']['Hit_to_DB_Length'])
134 dom_type = gff_line['attributes']['Final_Classification'] 133 dom_type = gff_line['attributes']['Name']
135 seq_id = gff_line['seqid'] 134 seq_id = gff_line['seqid']
136 xminimal = int(gff_line['start']) 135 xminimal = int(gff_line['start'])
137 xmaximal = int(gff_line['end']) 136 xmaximal = int(gff_line['end'])
138 c1 = al_identity >= TH_IDENTITY 137 c1 = al_identity >= TH_IDENTITY
139 c2 = al_similarity >= TH_SIMILARITY 138 c2 = al_similarity >= TH_SIMILARITY