# HG changeset patch # User petr-novak # Date 1585913279 14400 # Node ID 1eabd42e00efcc75ce5cc74aa4817326ab2098fc # Parent 65a6fb89495d7abfb9ae608235c7fb6e6adb0bd6 Uploaded diff -r 65a6fb89495d -r 1eabd42e00ef dante.xml --- a/dante.xml Tue Sep 24 08:12:01 2019 -0400 +++ b/dante.xml Fri Apr 03 07:27:59 2020 -0400 @@ -1,4 +1,4 @@ - + Tool for annotation of transposable elements based on the similarity to conserved protein domains database. last @@ -26,6 +26,14 @@ --classification \${REXDB}/${db_type}_class --scoring_matrix ${scoring_matrix} + && + python3 ${__tool_directory__}/dante_gff_output_filtering.py --dom_gff ${DomGff} + --domains_prot_seq ${Domains_filtered} --domains_filtered ${DomGff_filtered} + --output_dir . + --selected_dom All --th_identity 0.35 + --th_similarity 0.45 --th_length 0.8 + --interruptions 3 --max_len_proportion 1.2 + --element_type '' #if str($input_type.input_type_selector) == "aln" && @@ -100,7 +108,9 @@ - + + + iterative == "Yes" @@ -122,7 +132,7 @@ - THIS IS A PRIMARY OUTPUT THAT SHOULD UNDERGO FURTHER QUALITY FILTERING TO GET RID OFF POTENTIAL FALSE POSITIVE DOMAINS + **WHAT IT DOES** diff -r 65a6fb89495d -r 1eabd42e00ef dante_gff_output_filtering.py --- a/dante_gff_output_filtering.py Tue Sep 24 08:12:01 2019 -0400 +++ b/dante_gff_output_filtering.py Fri Apr 03 07:27:59 2020 -0400 @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import sys import time import configuration import os @@ -39,13 +39,23 @@ def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, - dom_dict, version_lines): + dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY, + TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM): ''' Write domains statistics in beginning of filtered GFF ''' with open(FILT_DOM_GFF, "w") as filt_gff: for line in version_lines: filt_gff.write(line) + filt_gff.write(("##Filtering thresholdss: min identity: {}, min similarity: {}," + " min relative alingment length: {}, max interuptions(stop or " + "frameshift): {}, max relative alignment length: {}, selected" + " domains: {} \n").format(TH_IDENTITY, + TH_SIMILARITY, + TH_LENGTH, + TH_INTERRUPT, + TH_LEN_RATIO, + SELECTED_DOM)) filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n") if not orig_class_dict: filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n") @@ -159,7 +169,8 @@ domains.append(dom_type) path = os.path.dirname(os.path.realpath(__file__)) write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, - dom_dict, version_lines) + dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY, + TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM) os.unlink(filt_dom_tmp.name) xminimals_all.append(xminimals) xmaximals_all.append(xmaximals) diff -r 65a6fb89495d -r 1eabd42e00ef dante_gff_output_filtering.xml --- a/dante_gff_output_filtering.xml Tue Sep 24 08:12:01 2019 -0400 +++ b/dante_gff_output_filtering.xml Fri Apr 03 07:27:59 2020 -0400 @@ -1,4 +1,4 @@ - + Tool for filtering of gff3 output from DANTE. Filtering can be performed based domain type and alignment quality. diff -r 65a6fb89495d -r 1eabd42e00ef tool_dependencies.xml --- a/tool_dependencies.xml Tue Sep 24 08:12:01 2019 -0400 +++ b/tool_dependencies.xml Fri Apr 03 07:27:59 2020 -0400 @@ -1,7 +1,7 @@ - + prepare rexdb database for dante