Mercurial > repos > petr-novak > dante
comparison dante_gff_output_filtering.py @ 22:1eabd42e00ef draft
Uploaded
author | petr-novak |
---|---|
date | Fri, 03 Apr 2020 07:27:59 -0400 |
parents | 1a766f9f623d |
children |
comparison
equal
deleted
inserted
replaced
21:65a6fb89495d | 22:1eabd42e00ef |
---|---|
1 #!/usr/bin/env python3 | 1 #!/usr/bin/env python3 |
2 | 2 import sys |
3 import time | 3 import time |
4 import configuration | 4 import configuration |
5 import os | 5 import os |
6 import textwrap | 6 import textwrap |
7 import subprocess | 7 import subprocess |
37 count_comment += 1 | 37 count_comment += 1 |
38 return count_comment | 38 return count_comment |
39 | 39 |
40 | 40 |
41 def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, | 41 def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, |
42 dom_dict, version_lines): | 42 dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY, |
43 TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM): | |
43 ''' | 44 ''' |
44 Write domains statistics in beginning of filtered GFF | 45 Write domains statistics in beginning of filtered GFF |
45 ''' | 46 ''' |
46 with open(FILT_DOM_GFF, "w") as filt_gff: | 47 with open(FILT_DOM_GFF, "w") as filt_gff: |
47 for line in version_lines: | 48 for line in version_lines: |
48 filt_gff.write(line) | 49 filt_gff.write(line) |
50 filt_gff.write(("##Filtering thresholdss: min identity: {}, min similarity: {}," | |
51 " min relative alingment length: {}, max interuptions(stop or " | |
52 "frameshift): {}, max relative alignment length: {}, selected" | |
53 " domains: {} \n").format(TH_IDENTITY, | |
54 TH_SIMILARITY, | |
55 TH_LENGTH, | |
56 TH_INTERRUPT, | |
57 TH_LEN_RATIO, | |
58 SELECTED_DOM)) | |
49 filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n") | 59 filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n") |
50 if not orig_class_dict: | 60 if not orig_class_dict: |
51 filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n") | 61 filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n") |
52 for classification in sorted(orig_class_dict.keys()): | 62 for classification in sorted(orig_class_dict.keys()): |
53 if classification in filt_class_dict.keys(): | 63 if classification in filt_class_dict.keys(): |
157 xminimals.append(xminimal) | 167 xminimals.append(xminimal) |
158 xmaximals.append(xmaximal) | 168 xmaximals.append(xmaximal) |
159 domains.append(dom_type) | 169 domains.append(dom_type) |
160 path = os.path.dirname(os.path.realpath(__file__)) | 170 path = os.path.dirname(os.path.realpath(__file__)) |
161 write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, | 171 write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, |
162 dom_dict, version_lines) | 172 dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY, |
173 TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM) | |
163 os.unlink(filt_dom_tmp.name) | 174 os.unlink(filt_dom_tmp.name) |
164 xminimals_all.append(xminimals) | 175 xminimals_all.append(xminimals) |
165 xmaximals_all.append(xmaximals) | 176 xmaximals_all.append(xmaximals) |
166 domains_all.append(domains) | 177 domains_all.append(domains) |
167 return xminimals_all, xmaximals_all, domains_all, seq_ids_all | 178 return xminimals_all, xmaximals_all, domains_all, seq_ids_all |