changeset 22:1eabd42e00ef draft

Uploaded
author petr-novak
date Fri, 03 Apr 2020 07:27:59 -0400
parents 65a6fb89495d
children e2bbc79f0fac
files dante.xml dante_gff_output_filtering.py dante_gff_output_filtering.xml tool_dependencies.xml
diffstat 4 files changed, 29 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/dante.xml	Tue Sep 24 08:12:01 2019 -0400
+++ b/dante.xml	Fri Apr 03 07:27:59 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="dante" name="Domain based ANnotation of Transposable Elements - DANTE" version="1.0.0">
+<tool id="dante" name="Domain based ANnotation of Transposable Elements - DANTE" version="1.1.0">
   <description> Tool for annotation of transposable elements based on the similarity to conserved protein domains database. </description>
   <requirements>
     <requirement type="package">last</requirement>
@@ -26,6 +26,14 @@
 	  --classification \${REXDB}/${db_type}_class
     --scoring_matrix ${scoring_matrix}
 
+    &amp;&amp;
+    python3 ${__tool_directory__}/dante_gff_output_filtering.py --dom_gff ${DomGff}
+    --domains_prot_seq ${Domains_filtered} --domains_filtered ${DomGff_filtered}
+    --output_dir .
+    --selected_dom All --th_identity 0.35
+    --th_similarity 0.45 --th_length 0.8
+    --interruptions 3 --max_len_proportion 1.2
+    --element_type ''
 
     #if str($input_type.input_type_selector) == "aln"
      &amp;&amp;
@@ -100,7 +108,9 @@
   </inputs>
 
   <outputs>
-    <data format="gff3" name="DomGff"  label="DANTE on ${on_string}" />
+    <data format="gff3" name="DomGff"  label="DANTE on ${on_string}, full output" />
+    <data format="gff3" name="DomGff_filtered"  label="DANTE on ${on_string}, filtered output" />
+    <data format="fasta" name="Domains_filtered"  label="DANTE on ${on_string}, protein domains, filtered output" />
     <data format="gff3" name="DomGff2" label="DANTE on ${on_string}: 2nd pass">
       <filter>iterative == "Yes" </filter>
     </data>
@@ -122,7 +132,7 @@
   <help>
 
 
-    THIS IS A PRIMARY OUTPUT THAT SHOULD UNDERGO FURTHER QUALITY FILTERING TO GET RID OFF POTENTIAL FALSE POSITIVE DOMAINS
+    
 
     **WHAT IT DOES**
 
--- a/dante_gff_output_filtering.py	Tue Sep 24 08:12:01 2019 -0400
+++ b/dante_gff_output_filtering.py	Fri Apr 03 07:27:59 2020 -0400
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-
+import sys
 import time
 import configuration
 import os
@@ -39,13 +39,23 @@
 
 
 def write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict,
-               dom_dict, version_lines):
+               dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY,
+               TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM):
     '''
 	Write domains statistics in beginning of filtered GFF
 	'''
     with open(FILT_DOM_GFF, "w") as filt_gff:
         for line in version_lines:
             filt_gff.write(line)
+        filt_gff.write(("##Filtering thresholdss: min identity: {}, min similarity: {},"
+                        " min relative alingment length: {}, max interuptions(stop or "
+                        "frameshift): {}, max relative alignment length: {}, selected"
+                        " domains: {} \n").format(TH_IDENTITY,
+                                                  TH_SIMILARITY,
+                                                  TH_LENGTH,
+                                                  TH_INTERRUPT,
+                                                  TH_LEN_RATIO,
+                                                  SELECTED_DOM))
         filt_gff.write("##CLASSIFICATION\tORIGINAL_COUNTS\tFILTERED_COUNTS\n")
         if not orig_class_dict:
             filt_gff.write("##NO DOMAINS CLASSIFICATIONS\n")
@@ -159,7 +169,8 @@
                     domains.append(dom_type)
     path = os.path.dirname(os.path.realpath(__file__))
     write_info(filt_dom_tmp, FILT_DOM_GFF, orig_class_dict, filt_class_dict,
-               dom_dict, version_lines)
+               dom_dict, version_lines, TH_IDENTITY, TH_SIMILARITY,
+               TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM)
     os.unlink(filt_dom_tmp.name)
     xminimals_all.append(xminimals)
     xmaximals_all.append(xmaximals)
--- a/dante_gff_output_filtering.xml	Tue Sep 24 08:12:01 2019 -0400
+++ b/dante_gff_output_filtering.xml	Fri Apr 03 07:27:59 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="domains_filter" name="Protein Domains Filter" version="1.0.0">
+<tool id="domains_filter" name="Protein Domains Filter" version="1.0.1">
   <description> Tool for filtering of gff3 output from DANTE. Filtering can be performed based domain type and alignment quality. </description>
   <stdio>
     <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
--- a/tool_dependencies.xml	Tue Sep 24 08:12:01 2019 -0400
+++ b/tool_dependencies.xml	Fri Apr 03 07:27:59 2020 -0400
@@ -1,7 +1,7 @@
 <?xml version="1.0" ?>
 <tool_dependency>
     <package name="rexdb" version="1.0">
-        <repository changeset_revision="c769e54cd079" name="package_rexdb_1_0" owner="petr-novak" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu"/>
+        <repository changeset_revision="ac89c185fbd0" name="package_rexdb_1_0" owner="petr-novak" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu"/>
         <readme>
       prepare rexdb database for dante
     </readme>