comparison dante_gff_output_filtering.py @ 22:1eabd42e00ef draft

Uploaded
author petr-novak
date Fri, 03 Apr 2020 07:27:59 -0400
parents 1a766f9f623d
children
comparison
equal deleted inserted replaced
21:65a6fb89495d 22:1eabd42e00ef
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 2 import sys
3 import time 3 import time
4 import configuration 4 import configuration
5 import os 5 import os
6 import textwrap 6 import textwrap
7 import subprocess 7 import subprocess
37 count_comment += 1 37 count_comment += 1
38 return count_comment 38 return count_comment
39 39
40 40
41 def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, 41 def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict,
42 dom_dict, version_lines): 42 dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY,
43 TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM):
43 ''' 44 '''
44 Write domains statistics in beginning of filtered GFF 45 Write domains statistics in beginning of filtered GFF
45 ''' 46 '''
46 with open(FILT_DOM_GFF, "w") as filt_gff: 47 with open(FILT_DOM_GFF, "w") as filt_gff:
47 for line in version_lines: 48 for line in version_lines:
48 filt_gff.write(line) 49 filt_gff.write(line)
50 filt_gff.write(("##Filtering thresholdss: min identity: {}, min similarity: {},"
51 " min relative alingment length: {}, max interuptions(stop or "
52 "frameshift): {}, max relative alignment length: {}, selected"
53 " domains: {} \n").format(TH_IDENTITY,
54 TH_SIMILARITY,
55 TH_LENGTH,
56 TH_INTERRUPT,
57 TH_LEN_RATIO,
58 SELECTED_DOM))
49 filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n") 59 filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n")
50 if not orig_class_dict: 60 if not orig_class_dict:
51 filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n") 61 filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n")
52 for classification in sorted(orig_class_dict.keys()): 62 for classification in sorted(orig_class_dict.keys()):
53 if classification in filt_class_dict.keys(): 63 if classification in filt_class_dict.keys():
157 xminimals.append(xminimal) 167 xminimals.append(xminimal)
158 xmaximals.append(xmaximal) 168 xmaximals.append(xmaximal)
159 domains.append(dom_type) 169 domains.append(dom_type)
160 path = os.path.dirname(os.path.realpath(__file__)) 170 path = os.path.dirname(os.path.realpath(__file__))
161 write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, 171 write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict,
162 dom_dict, version_lines) 172 dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY,
173 TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM)
163 os.unlink(filt_dom_tmp.name) 174 os.unlink(filt_dom_tmp.name)
164 xminimals_all.append(xminimals) 175 xminimals_all.append(xminimals)
165 xmaximals_all.append(xmaximals) 176 xmaximals_all.append(xmaximals)
166 domains_all.append(domains) 177 domains_all.append(domains)
167 return xminimals_all, xmaximals_all, domains_all, seq_ids_all 178 return xminimals_all, xmaximals_all, domains_all, seq_ids_all