Mercurial > repos > petr-novak > dante
changeset 22:1eabd42e00ef draft
Uploaded
author | petr-novak |
---|---|
date | Fri, 03 Apr 2020 07:27:59 -0400 |
parents | 65a6fb89495d |
children | e2bbc79f0fac |
files | dante.xml dante_gff_output_filtering.py dante_gff_output_filtering.xml tool_dependencies.xml |
diffstat | 4 files changed, 29 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/dante.xml Tue Sep 24 08:12:01 2019 -0400 +++ b/dante.xml Fri Apr 03 07:27:59 2020 -0400 @@ -1,4 +1,4 @@ -<tool id="dante" name="Domain based ANnotation of Transposable Elements - DANTE" version="1.0.0"> +<tool id="dante" name="Domain based ANnotation of Transposable Elements - DANTE" version="1.1.0"> <description> Tool for annotation of transposable elements based on the similarity to conserved protein domains database. </description> <requirements> <requirement type="package">last</requirement> @@ -26,6 +26,14 @@ --classification \${REXDB}/${db_type}_class --scoring_matrix ${scoring_matrix} + && + python3 ${__tool_directory__}/dante_gff_output_filtering.py --dom_gff ${DomGff} + --domains_prot_seq ${Domains_filtered} --domains_filtered ${DomGff_filtered} + --output_dir . + --selected_dom All --th_identity 0.35 + --th_similarity 0.45 --th_length 0.8 + --interruptions 3 --max_len_proportion 1.2 + --element_type '' #if str($input_type.input_type_selector) == "aln" && @@ -100,7 +108,9 @@ </inputs> <outputs> - <data format="gff3" name="DomGff" label="DANTE on ${on_string}" /> + <data format="gff3" name="DomGff" label="DANTE on ${on_string}, full output" /> + <data format="gff3" name="DomGff_filtered" label="DANTE on ${on_string}, filtered output" /> + <data format="fasta" name="Domains_filtered" label="DANTE on ${on_string}, protein domains, filtered output" /> <data format="gff3" name="DomGff2" label="DANTE on ${on_string}: 2nd pass"> <filter>iterative == "Yes" </filter> </data> @@ -122,7 +132,7 @@ <help> - THIS IS A PRIMARY OUTPUT THAT SHOULD UNDERGO FURTHER QUALITY FILTERING TO GET RID OFF POTENTIAL FALSE POSITIVE DOMAINS + **WHAT IT DOES**
--- a/dante_gff_output_filtering.py Tue Sep 24 08:12:01 2019 -0400 +++ b/dante_gff_output_filtering.py Fri Apr 03 07:27:59 2020 -0400 @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +import sys import time import configuration import os @@ -39,13 +39,23 @@ def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, - dom_dict, version_lines): + dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY, + TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM): ''' Write domains statistics in beginning of filtered GFF ''' with open(FILT_DOM_GFF, "w") as filt_gff: for line in version_lines: filt_gff.write(line) + filt_gff.write(("##Filtering thresholdss: min identity: {}, min similarity: {}," + " min relative alingment length: {}, max interuptions(stop or " + "frameshift): {}, max relative alignment length: {}, selected" + " domains: {} \n").format(TH_IDENTITY, + TH_SIMILARITY, + TH_LENGTH, + TH_INTERRUPT, + TH_LEN_RATIO, + SELECTED_DOM)) filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n") if not orig_class_dict: filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n") @@ -159,7 +169,8 @@ domains.append(dom_type) path = os.path.dirname(os.path.realpath(__file__)) write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict, - dom_dict, version_lines) + dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY, + TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM) os.unlink(filt_dom_tmp.name) xminimals_all.append(xminimals) xmaximals_all.append(xmaximals)
--- a/dante_gff_output_filtering.xml Tue Sep 24 08:12:01 2019 -0400 +++ b/dante_gff_output_filtering.xml Fri Apr 03 07:27:59 2020 -0400 @@ -1,4 +1,4 @@ -<tool id="domains_filter" name="Protein Domains Filter" version="1.0.0"> +<tool id="domains_filter" name="Protein Domains Filter" version="1.0.1"> <description> Tool for filtering of gff3 output from DANTE. Filtering can be performed based domain type and alignment quality. </description> <stdio> <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
--- a/tool_dependencies.xml Tue Sep 24 08:12:01 2019 -0400 +++ b/tool_dependencies.xml Fri Apr 03 07:27:59 2020 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0" ?> <tool_dependency> <package name="rexdb" version="1.0"> - <repository changeset_revision="c769e54cd079" name="package_rexdb_1_0" owner="petr-novak" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu"/> + <repository changeset_revision="ac89c185fbd0" name="package_rexdb_1_0" owner="petr-novak" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu"/> <readme> prepare rexdb database for dante </readme>