Mercurial > repos > bgruening > antismash
changeset 1:593bb8f5488b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/antismash commit 654a4f3b3a1602cec2510d51fb953fd456427e08
author | bgruening |
---|---|
date | Wed, 07 Feb 2018 06:22:58 -0500 |
parents | 5db064bbb3be |
children | 3f0077c88c16 |
files | antismash.xml readme.rst repository_dependencies.xml static/images/antismash_user_interface.png test-data/ARBH01000003.1.cluster001 test-data/ARBH01000003.1.final test-data/index.2.html test-data/index.html test-data/sequence.fasta test-data/sequence.gb tool-data/antismash.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 13 files changed, 1521 insertions(+), 479 deletions(-) [+] |
line wrap: on
line diff
--- a/antismash.xml Tue Jul 15 14:34:55 2014 -0400 +++ b/antismash.xml Wed Feb 07 06:22:58 2018 -0500 @@ -1,253 +1,166 @@ -<tool id="antismash" name="Secondary Metabolites" version="2.0.2.2"> - <description>and Antibiotics Analysis (antiSMASH)</description> +<?xml version='1.0' encoding='utf-8'?> +<tool id="antismash" name="Antismash" version="4.0.2" profile="17.01"> + <description>allows the genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters</description> <requirements> - <requirement type="package" version="3.0">hmmer</requirement> - <requirement type="package">hmmer</requirement> - <requirement type="package" version="2.2.28">blast+</requirement> - <requirement type="package">blast+</requirement> - <requirement type="package" version="3.8.31">muscle</requirement> - <requirement type="package">muscle</requirement> - <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement> - <requirement type="package">straight.plugin</requirement> - <requirement type="package" version="1.62">biopython</requirement> - <requirement type="package">biopython</requirement> - <requirement type="package" version="1.2.6">pyquery</requirement> - <requirement type="package">pyquery</requirement> - <requirement type="package" version="0.1.2">helperlibs</requirement> - <requirement type="package">helperlibs</requirement> - <requirement type="package" version="0.9">cssselect</requirement> - <requirement type="package">cssselect</requirement> - <requirement type="package" version="2.0.2">antismash</requirement> - <requirement type="package">antismash</requirement> - <requirement type="package">glimmer</requirement> + <requirement type="package" version="4.0.2">antismash</requirement> </requirements> - <command> + <version_command>antismash --version</version_command> + <command detect_errors="aggressive"> +<![CDATA[ #import os, glob - #set $outputfolder = $html.files_path + #set $htmloutputfolder = $html.files_path #if str($infile.ext) == 'genbank': #set $file_extension = 'gb' #else: - ## TODO add embl as input file - #set $file_extension = 'gb' - #end if - - ln -s $infile #echo 'input_tempfile.' + $file_extension#; - mkdir -p $outputfolder; - run_antismash.py - --cpus "\${GALAXY_SLOTS:-12}" - --enable $types - --input-type 'nucl' - $smcogs - $clusterblast - $subclusterblast - $inclusive - $full_hmmer - $full_blast - $eukaryotic - - - #if str($pfam_database) != "None": - --pfamdir $pfam_database.fields.path - #end if - - ##--debug - - --disable-embl - --outputfolder $outputfolder - - #echo 'input_tempfile.' + $file_extension# - - ## leave out the start and end features, it can be easily replaced with Galaxy tools - ##--from START Start analysis at nucleotide specified - ##--to END - - 2>&1 - - ## - ## shuffling files to create the correct outputs for Galaxy - ## - - ## html output - ; - cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null - - ## gene clusters - #if 'geneclusterprots_tabular' in str($outputs).split(','): - ; - cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null - #end if - - #if 'geneclusterprots_fasta' in str($outputs).split(','): - ; - cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null - #end if - - - ##SVG images - #if 'archive_svgs' in str($outputs).split(','): - ; - cd #echo os.path.join($outputfolder, 'svg')# - #if $clusterblast: - ; - tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null - #else: - ; - tar cfz $archive_svgs genecluster* - #end if - #end if - - ##all files in a archive - #if 'archive' in str($outputs).split(','): - ; - cd $outputfolder; - tar cf $archive *.zip 2> /dev/null + #set $file_extension = $infile.ext #end if - ## genbank - #if 'gb' in str($outputs).split(','): - ; - cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null - #end if + ln -s '$infile' input_tempfile.$file_extension && + + ## create html folder + mkdir -p $htmloutputfolder && + + antismash + --cpus "\${GALAXY_SLOTS:-12}" + --taxon '${taxon}' + --input-type '${input_type}' + ${clusterblast} + ${subclusterblast} + ${smcogs} + ${inclusive} + ${borderpredict} + ${tta} + ${asf} + ${full_hmmer} + + input_tempfile.$file_extension && + + ## copy all content to html folder + cp input_tempfile/index.html '${html}' 2> /dev/null && + cp -r input_tempfile/* '${htmloutputfolder}' + +]]> </command> <inputs> - <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/> + <param name="infile" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank,EMBL or FASTA format"/> - <param name="eukaryotic" type="select" label="Origin of DNA"> - <option value="" selected="True">Prokaryotic</option> - <option value="--eukaryotic">Eukaryotic</option> + <param argument="--taxon" type="select" label="Origin of DNA"> + <option value="bacteria" selected="True">Bacteria</option> + <option value="fungi">Fungi</option> </param> - <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" - help="(--clusterblast)" - truevalue="--clusterblast" falsevalue="" checked="True" /> - <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis" - help="(--subclusterblast)" - truevalue="--subclusterblast" falsevalue="" checked="false" /> - <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)" - falsevalue="" truevalue="--smcogs" checked="True" /> - - <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" - help="(--full-blast)" - truevalue="--full-blast" falsevalue="" checked="False" /> - <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" - help="(--full-hmmer)" - truevalue="--full-hmmer" falsevalue="" checked="false" /> - - <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection" - help="(--inclusive)" - truevalue="--inclusive" falsevalue="" checked="false" /> - - <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models"> - <options from_file="antismash.loc"> - <column name="value" index="0"/> - <column name="name" index="1"/> - <column name="path" index="2"/> - </options> + <param argument="--input_type" type="select" label="Origin of DNA"> + <option value="nucl" selected="True">Nucleotide</option> + <option value="prot">Amino-acid</option> </param> - <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search"> - <option value="t1pks" selected="True">type I polyketide synthases</option> - <option value="t2pks" selected="True">type II polyketide synthases</option> - <option value="t3pks" selected="True">type III polyketide synthases</option> - <option value="t4pks" selected="True">type IV polyketide synthases</option> - <option value="transatpks" selected="True">trans-AT PKS</option> - <option value="nrps" selected="True">nonribosomal peptide synthetases</option> - <option value="terpene" selected="True">terpene synthases</option> - <option value="lantipeptide" selected="True">lantipeptides</option> - <option value="bacteriocin" selected="True">bacteriocins</option> - <option value="blactam" selected="True">beta-lactams</option> - <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option> - <option value="aminocoumarin" selected="True">aminocoumarins</option> - <option value="siderophore" selected="True">siderophores</option> - <option value="ectoine" selected="True">ectoines</option> - <option value="butyrolactone" selected="True">butyrolactones</option> - <option value="indole" selected="True">indoles</option> - <option value="nucleoside" selected="True">nucleosides</option> - <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option> - <option value="oligosaccharide" selected="True">oligosaccharides</option> - <option value="furan" selected="True">furans</option> - <option value="hserlactone" selected="True">hserlactones</option> - <option value="thiopeptide" selected="True">thiopeptides</option> - <option value="phenazine" selected="True">phenazines</option> - <option value="phosphonate" selected="True">phosphonates</option> - <option value="other" selected="True">others</option> - </param> + <param argument="--clusterblast" type="boolean" truevalue="--clusterblast" falsevalue="" checked="False" + label="BLAST identified clusters against known clusters" + help="Compare identified clusters against a database of antiSMASH-predicted clusters." /> + <param argument="--subclusterblast" type="boolean" truevalue="--subclusterblast" falsevalue="" checked="True" + label="Subcluster BLAST analysis" + help="Compare identified clusters against known subclusters responsible for synthesising precursors." /> + <param argument="--knownclusterblast" type="boolean" truevalue="--knownclusterblast" falsevalue="" checked="True" + label="KnowCluster BLAST analysis" + help="Compare identified clusters against known gene clusters from the MIBiG database."/> + <param argument="--smcogs" type="boolean" checked="True" truevalue="--smcogs" falsevalue="" + label="Analysis of secondary metabolism gene families (smCOGs)" + help="Look for sec. met. clusters of orthologous groups."/> + <param argument="--inclusive" type="boolean" truevalue="--inclusive" falsevalue="" checked="False" + label="Inclusive ClusterFinder algorithm" + help="Use inclusive ClusterFinder algorithm for additional cluster detection."/> + <param argument="--borderpredict" type="boolean" truevalue="--borderpredict" falsevalue="" checked="False" + label="Predict gene cluster borders with ClusterFinder" + help="Use ClusterFinder algorithm to predict gene cluster borders."/> + <param argument="--asf" type="boolean" truevalue="--asf" falsevalue="" checked="True" + label="Run active site finder module" /> + <param argument="--tta" type="boolean" truevalue="--tta" falsevalue="" checked="False" + label="Run TTA codon detection module" /> + <param argument="--full_hmmer" type="boolean" truevalue="--full-hmmer" falsevalue="" checked="False" + label="Run a whole-genome Pfam analysis" /> - <param name="outputs" type="select" multiple="true" label="Additional outputs"> - <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option> - <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option> - <option value="archive_svgs">All clusters as image (compressed)</option> - <option value="archive">All files compressed</option> - <option value="gb">Annotated genome (GenBank)</option> + <param name="outputs" type="select" multiple="true" label="Outputs"> + <option value="html" selected="True">HTML file</option> + <option value="all">All results</option> + <option value="embl">EMBL files</option> + <option value="gb">GenBank files</option> + <option value="genecluster_tabular">Gene clusters</option> </param> </inputs> <outputs> - <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)"> - <filter>'geneclusterprots_fasta' in outputs</filter> - </data> - <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)"> - <filter>'geneclusterprots_tabular' in outputs</filter> - </data> - <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)"> - <filter>'archive' in outputs</filter> - </data> - <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)"> - <filter>'archive_svgs' in outputs</filter> - </data> - <data format="html" name="html" label="${tool.name} on ${on_string} (html report)"> - <!-- html is default output at any time. - <filter>'html' in outputs</filter> - --> - </data> - <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)"> - <filter>'gb' in outputs</filter> - </data> + <collection type="list" name="genecluster_tabular" label="${tool.name} on ${on_string} (Gene Cluster)"> + <discover_datasets pattern="(?P<designation>.*)\.txt" directory="input_tempfile" ext="txt" visible="false" /> + <filter>'genecluster_tabular' in outputs</filter> + </collection> + <collection name="genbank" type="list" label="${tool.name} on ${on_string} (GenBank)"> + <discover_datasets pattern="(?P<designation>.*)\.gbk" directory="input_tempfile" ext="genbank" visible="false" /> + <filter>'gb' in outputs</filter> + </collection> + <collection name="embl" type="list" label="${tool.name} on ${on_string} (EMBL)"> + <discover_datasets pattern="(?P<designation>.*)\.gbk" directory="input_tempfile" ext="embl" visible="false" /> + <filter>'embl' in outputs</filter> + </collection> + <collection name="archive" type="list" label="${tool.name} on ${on_string} (all files compressed)"> + <discover_datasets pattern="(?P<designation>.*)\.zip" directory="input_tempfile" ext="zip" visible="false" /> + <filter>'all' in outputs</filter> + </collection> + <data format="html" name="html" label="${tool.name} on ${on_string} (html report)" /> </outputs> - <help> - -.. class:: infomark + <tests> + <test> + <param name="infile" value="sequence.fasta"/> + <output name="html" file="index.html"/> + </test> + <test> + <param name="infile" value="sequence.gb"/> + <param name="outputs" value="html,gb"/> + <output_collection name="genbank" type="list"> + <element name="ARBH01000003.1.cluster001" file="ARBH01000003.1.cluster001" ftype="genbank" compare="sim_size" /> + <element name="ARBH01000003.1.final" file="ARBH01000003.1.final" ftype="genbank"/> + </output_collection> + <output name="html" file="index.2.html"/> + </test> + </tests> + <help> +<![CDATA[ **What it does** -antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. +AntiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier. +antiSMASH is powered by several open source tools: NCBI BLAST+, HMMer 3, Muscle 3, Glimmer 3, FastTree, TreeGraph 2, Indigo-depict, PySVG and JQuery SVG. **Input** -The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, -we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy. - +The ideal input for antiSMASH is an annotated nucleotide file in Genbank format or EMBL format. +You can either upload a GenBank/EMBL file manually, or simply enter the GenBank/RefSeq accession number of your sequence for antiSMASH to upload it. +If no annotation is available, we recommend running your sequence through an annotation pipeline like RAST to obtain GBK/EMBL files with high-quality annotations. -There are several optional analyses that may or may not be run on your sequence. -Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a -query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify -the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence. - +Alternatively, you can provide a FASTA file containing a single sequence. antiSMASH will generate a preliminary annotation using Prodigal, and use that to run the rest of the analysis. +You can also provide gene annotations in GFF3 foramt. Input files should be properly formatted. +If you are creating your GBK/EMBL/FASTA file manually, be sure to do so in a plain text editor like Notepad or Emacs, and saving your files as "All files (.)", ending with the correct extension (for example ".fasta", ".gbk", or ".embl". -Also available is the analysis of secondary metabolism gene families (smCOGs). -This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene -family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. -Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment. +There are several optional analyses that may or may not be run on your sequence. Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence. +This analysis is selected by default +Also available is the analysis of secondary metabolism gene families (smCOGs). This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. +Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment. This analysis is selected by default -For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. -Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage -(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). -Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented. +**Ouput** - -**References** +The output of the antiSMASH analysis pipeline is organized in an interactive HTML page with SVG graphics, and different parts of the analysis are displayed in different panels for every gene cluster -Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, -Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466. - -http://antismash.secondarymetabolites.org/help.html +In the upper right, a small list of buttons offers further functionality. The house-shaped button will get you back on the antiSMASH start page. +The question-mark button will get you to this help page. The exclamation-mark button leads to a page explaining about antiSMASH. +The downward-pointing arrow will open a menu offering to download the complete set of results from the antiSMASH run, a summary Excel file and to the summary EMBL/GenBank output file. +The EMBL/GenBank file can be viewed in a genome browser such as Artemis. - -Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash - - </help> +]]> + </help> + <citations> + <citation type="doi">10.1093/nar/gkv437</citation> + </citations> </tool>
--- a/readme.rst Tue Jul 15 14:34:55 2014 -0400 +++ b/readme.rst Wed Feb 07 06:22:58 2018 -0500 @@ -17,28 +17,12 @@ Installation ============ -Galaxy should be able to automatically install the dependencies, i.e. the -'package_hmmer_3_0' or 'package_blast_plus_2_2_28' repository. - -You must tell Galaxy about any system level Pfam databases using the configuration -file antismash.loc. - -You can download the Pfam provided databases as compressed archives from here: - -* ftp://ftp.sanger.ac.uk/pub/databases/Pfam/current_release/ - - -External Data -============= - -The antismash.loc file contains the path to a Pfam database. -For any other tool that also uses a Pfam database *.log file, you can sync or link both *.loc files. - - +Galaxy should be able to automatically install the dependencies using conda History ======= +v0.2 - Antismash update 4.0.2 v0.1 - Initial public release @@ -62,4 +46,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -
--- a/repository_dependencies.xml Tue Jul 15 14:34:55 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -<?xml version="1.0"?> -<repositories description="AntiSmash requires the Galaxy applicable data formats used by Emboss tools, especially genbank."> - <repository changeset_revision="a89163f31369" name="emboss_datatypes" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> -</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ARBH01000003.1.cluster001 Wed Feb 07 06:22:58 2018 -0500 @@ -0,0 +1,309 @@ +LOCUS ARBH01000003 3500 bp DNA linear BCT 22-APR-2013 +DEFINITION Amycolatopsis balhimycina FH 1894 strain DSM 44591 + A3CEDRAFT_scaffold1.1_C3, whole genome shotgun sequence. +ACCESSION ARBH01000003 +VERSION ARBH01000003.1 +KEYWORDS . +SOURCE Amycolatopsis balhimycina FH 1894 + ORGANISM Amycolatopsis balhimycina FH 1894 + Bacteria; Actinobacteria; Pseudonocardiales; Pseudonocardiaceae; + Amycolatopsis. +FEATURES Location/Qualifiers + source 1..3500 + /organism="Amycolatopsis balhimycina FH 1894" + /mol_type="genomic DNA" + /strain="DSM 44591" + /culture_collection="DSM:44591" + /db_xref="taxon:1089545" + cluster 1..3500 + /note="Cluster number: 1" + /note="Detection rule(s) for this cluster type: t1pks: + ((PKS_KS & PKS_AT) or (ene_KS & PKS_AT) or (mod_KS & + PKS_AT) or (hyb_KS & PKS_AT) or (itr_KS & PKS_AT) or + (tra_KS & PKS_AT));" + /note="Monomers prediction: (mal)" + /note="Structure image: structures/genecluster1.png" + /cutoff=20000 + /extension=20000 + /product="t1pks" + /contig_edge="True" + CDS 3..3500 + /aSProdPred="mal" + /note="smCOG: + SMCOG1001:short-chain_dehydrogenase/reductase_SDR (Score: + 64.2; E-value: 2.2e-19);" + /locus_tag="ctg1_1" + /translation="GGVLALPAELDRRGGDRLAAVLSGATGEDQLAIRAAGVFGRRVVR + APAGDRAPARTWTPRGTTLITGGTGTLAPHLARWLAEQGAEHIVLTSRTGAEAPKARQL + LAELGETVEAVACDVTDKAALAALLARLRAEGRTVRNVVHTAAVIELHTLAETDLAAFS + RTVHAKVVGARNLDELLDTDELDAFVLYSSTAGLWGTGAHAAYVAGNAYLHALAAHRRA + RGLRATALSWGIWADDRELGRVDPEQIVRSGLVFMAPELALEGLRRALDDDETALAVAD + LDWERYYPVYTAVRPTLLFDELPEVRRLTEAAAATAATGAGGEFAARLRTLPEAERAHL + LLELVRAEAAAVLGHASADALPEDRAFRDVGFDSVTAVDLRNRISAGTGLTLPATMVFD + HPTPRRLAGFLAATITGSGAVEQAPAVAGVDTGEPVAIIGMACRYPGGANTPERLWDLV + VGGVDAISGFPADRNWPTDALYDPDPDAGGKTYSVQGGFLHEAAEFDPGFFGISPREAL + SMDPQQRLLLETAWEAFERAGIDPHTLRGSGTGTFIGASYQDYTAAVSGAVDNADGHMI + TGSLGSILSGRLSYLFGLEGPAVTLDTACSSSLVAIHLAAQSLRSGESSLALAGGVSVM + ATPGAFVGFSRQRALATDGRCKAYSDRADGMTLGEGVGLVLLEKLSDAQRNGHRILAVV + RGSATNQDGASNGMTAPSGPSQQRVIRQALANARLSASEVDVIEGHGTGTALGDPIEAQ + ALLATYGQDRERPLLLGSVKSNIGHTQMASGVAGVIKVVQALRHGLVPKTLHVDEPSTH + VDWSTGSIELPSGSVPWPESGRPRRAGISSFGLSGTNVHTILEQAPEPAAEAGPEPEPG + LVPVPLSGRTEAALRAQAATVLDTLDDGVSPAVLGYSLASTRSAFEHRAVLLAEDHDEL + RRGLAALAGDQPDGGVVRGTVTRGRTAFLFAGQGSQRAGMGRELYERHPVFADALDAVL + GHFDLPRALRDVMWDDDSTALDETGYTQPALFAFEVALFRLLESWGVTPDYLAGHSIGE + IAAAHVAGVLSLADACALVAARGALMQALPSGGAMVSVRGSEADVAGHLGEDVAVAAVN + GPESVVLAGTEDAVLQAAGRLEAAGHKVRRLRVSHAFHSPLMDPVLAEFATVAQGLTYH + " + /sec_met="Type: t1pks" + /sec_met="Domains detected: PP-binding (E-value: 1.3e-15, + bitscore: 44.0, seeds: 164); PKS_AT (E-value: 8.7e-82, + bitscore: 260.9, seeds: 1682); mod_KS (E-value: 9.4e-224, + bitscore: 729.0, seeds: 217); adh_short (E-value: 7.1e-52, + bitscore: 162.1, seeds: 230)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: Type I Modular PKS" + /sec_met="NRPS/PKS Domain: PKS_KR (60-237). E-value: + 1.4e-53. Score: 173.1; Predicted KR activity: active; + Predicted KR stereochemistry: A2;" + /sec_met="NRPS/PKS Domain: ACP (341-412). E-value: 7.8e-31. + Score: 97.8;" + /sec_met="NRPS/PKS Domain: PKS_KS (434-858). E-value: + 3e-179. Score: 587.9;" + /sec_met="NRPS/PKS Domain: PKS_AT (958-1166). E-value: + 4e-80. Score: 260.9; Substrate specificity predictions: mal + (PKS signature), mal (Minowa), mal (consensus);" + aSDomain 183..713 + /domain="PKS_KR" + /locus_tag="ctg1_1" + /detection="hmmscan" + /database="nrpspksdomains.hmm" + /evalue="1.40E-53" + /score="173.1" + /translation="GTTLITGGTGTLAPHLARWLAEQGAEHIVLTSRTGAEAPKARQLL + AELGETVEAVACDVTDKAALAALLARLRAEGRTVRNVVHTAAVIELHTLAETDLAAFSR + TVHAKVVGARNLDELLDTDELDAFVLYSSTAGLWGTGAHAAYVAGNAYLHALAAHRRAR + GLRATALSWGIWAD" + /label="ctg1_1_KR1" + /asDomain_id="nrpspksdomains_ctg1_1_KR1" + /specificity="KR activity: active" + /specificity="KR stereochemistry: A2" + CDS_motif 192..284 + /note="NRPS/PKS Motif: PKSI-KR_m1 (e-value: 3.8e-15, + bit-score: 48.4)" + /locus_tag="ctg1_1" + /motif="PKSI-KR_m1" + /database="abmotifs" + /evalue="3.80E-15" + /asDomain_id="nrpspksmotif_ctg1_1_0001" + /detection="hmmscan" + /score="48.4" + /aSTool="pksnrpsmotif" + /translation="LITGGTGTLAPHLARWLAEQGAEHIVLTSRT" + /label="PKSI-KR_m1" + CDS_motif 555..665 + /note="NRPS/PKS Motif: PKSI-KR_m4 (e-value: 1.6e-14, + bit-score: 46.4)" + /locus_tag="ctg1_1" + /motif="PKSI-KR_m4" + /database="abmotifs" + /evalue="1.60E-14" + /asDomain_id="nrpspksmotif_ctg1_1_0002" + /detection="hmmscan" + /score="46.4" + /aSTool="pksnrpsmotif" + /translation="LDAFVLYSSTAGLWGTGAHAAYVAGNAYLHALAAHRR" + /label="PKSI-KR_m4" + aSDomain 1026..1238 + /domain="ACP" + /locus_tag="ctg1_1" + /detection="hmmscan" + /database="nrpspksdomains.hmm" + /evalue="7.80E-31" + /score="97.8" + /translation="LELVRAEAAAVLGHASADALPEDRAFRDVGFDSVTAVDLRNRISA + GTGLTLPATMVFDHPTPRRLAGFLAA" + /asDomain_id="nrpspksdomains_ctg1_1_Xdom01" + aSDomain 1305..2576 + /domain="PKS_KS" + /locus_tag="ctg1_1" + /detection="hmmscan" + /database="nrpspksdomains.hmm" + /evalue="3.00E-179" + /score="587.9" + /translation="VAIIGMACRYPGGANTPERLWDLVVGGVDAISGFPADRNWPTDAL + YDPDPDAGGKTYSVQGGFLHEAAEFDPGFFGISPREALSMDPQQRLLLETAWEAFERAG + IDPHTLRGSGTGTFIGASYQDYTAAVSGAVDNADGHMITGSLGSILSGRLSYLFGLEGP + AVTLDTACSSSLVAIHLAAQSLRSGESSLALAGGVSVMATPGAFVGFSRQRALATDGRC + KAYSDRADGMTLGEGVGLVLLEKLSDAQRNGHRILAVVRGSATNQDGASNGMTAPSGPS + QQRVIRQALANARLSASEVDVIEGHGTGTALGDPIEAQALLATYGQDRERPLLLGSVKS + NIGHTQMASGVAGVIKVVQALRHGLVPKTLHVDEPSTHVDWSTGSIELPSGSVPWPESG + RPRRAGISSFGLSGTNVHTILEQAP" + /asDomain_id="nrpspksdomains_ctg1_1_Xdom02" + CDS_motif 1788..1835 + /note="NRPS/PKS Motif: PKSI-KS_m3 (e-value: 2e-06, + bit-score: 19.7)" + /locus_tag="ctg1_1" + /motif="PKSI-KS_m3" + /database="abmotifs" + /evalue="2.00E-06" + /asDomain_id="nrpspksmotif_ctg1_1_0003" + /detection="hmmscan" + /score="19.7" + /aSTool="pksnrpsmotif" + /translation="GPAVTLDTACSSSLVA" + /label="PKSI-KS_m3" + CDS_motif 2211..2255 + /note="NRPS/PKS Motif: PKSI-KS_m5 (e-value: 0.0019, + bit-score: 10.8)" + /locus_tag="ctg1_1" + /motif="PKSI-KS_m5" + /database="abmotifs" + /evalue="1.90E-03" + /asDomain_id="nrpspksmotif_ctg1_1_0004" + /detection="hmmscan" + /score="10.8" + /aSTool="pksnrpsmotif" + /translation="IEGHGTGTALGDPIE" + /label="PKSI-KS_m5" + CDS_motif 2310..2354 + /note="NRPS/PKS Motif: PKSI-KS_m6 (e-value: 0.0011, + bit-score: 11.5)" + /locus_tag="ctg1_1" + /motif="PKSI-KS_m6" + /database="abmotifs" + /evalue="1.10E-03" + /asDomain_id="nrpspksmotif_ctg1_1_0005" + /detection="hmmscan" + /score="11.5" + /aSTool="pksnrpsmotif" + /translation="GSVKSNIGHTQMASG" + /label="PKSI-KS_m6" + CDS_motif 2874..2927 + /note="NRPS/PKS Motif: PKSI-AT-mM_m1 (e-value: 2.3e-06, + bit-score: 19.8)" + /locus_tag="ctg1_1" + /motif="PKSI-AT-mM_m1" + /database="abmotifs" + /evalue="2.30E-06" + /asDomain_id="nrpspksmotif_ctg1_1_0006" + /detection="hmmscan" + /score="19.8" + /aSTool="pksnrpsmotif" + /translation="FLFAGQGSQRAGMGRELY" + /label="PKSI-AT-mM_m1" + aSDomain 2877..3500 + /domain="PKS_AT" + /locus_tag="ctg1_1" + /detection="hmmscan" + /database="nrpspksdomains.hmm" + /evalue="4.00E-80" + /score="260.9" + /translation="LFAGQGSQRAGMGRELYERHPVFADALDAVLGHFDLPRALRDVMW + DDDSTALDETGYTQPALFAFEVALFRLLESWGVTPDYLAGHSIGEIAAAHVAGVLSLAD + ACALVAARGALMQALPSGGAMVSVRGSEADVAGHLGEDVAVAAVNGPESVVLAGTEDAV + LQAAGRLEAAGHKVRRLRVSHAFHSPLMDPVLAEFATVAQGLTYH" + /label="ctg1_1_AT1" + /asDomain_id="nrpspksdomains_ctg1_1_AT1" + /specificity="PKS signature: mal" + /specificity="Minowa: mal" + /specificity="consensus: mal" + CDS_motif 3033..3095 + /note="NRPS/PKS Motif: PKSI-AT-mM_m2 (e-value: 0.0032, + bit-score: 10.9)" + /locus_tag="ctg1_1" + /motif="PKSI-AT-mM_m2" + /database="abmotifs" + /evalue="3.20E-03" + /asDomain_id="nrpspksmotif_ctg1_1_0007" + /detection="hmmscan" + /score="10.9" + /aSTool="pksnrpsmotif" + /translation="DETGYTQPALFAFEVALFRLL" + /label="PKSI-AT-mM_m2" + CDS_motif 3105..3227 + /note="NRPS/PKS Motif: PKSI-AT-M_m3 (e-value: 2.3e-22, + bit-score: 71.3)" + /locus_tag="ctg1_1" + /motif="PKSI-AT-M_m3" + /database="abmotifs" + /evalue="2.30E-22" + /asDomain_id="nrpspksmotif_ctg1_1_0008" + /detection="hmmscan" + /score="71.3" + /aSTool="pksnrpsmotif" + /translation="GVTPDYLAGHSIGEIAAAHVAGVLSLADACALVAARGALMQ" + /label="PKSI-AT-M_m3" + CDS_motif 3312..3350 + /note="NRPS/PKS Motif: PKSI-AT-M_m5 (e-value: 0.00047, + bit-score: 13.4)" + /locus_tag="ctg1_1" + /motif="PKSI-AT-M_m5" + /database="abmotifs" + /evalue="4.70E-04" + /asDomain_id="nrpspksmotif_ctg1_1_0009" + /detection="hmmscan" + /score="13.4" + /aSTool="pksnrpsmotif" + /translation="AAVNGPESVVLAG" + /label="PKSI-AT-M_m5" +ORIGIN + 1 ggggcggcgt gctcgccctg cccgccgagc tggaccgccg cggcggcgac cggctggcgg + 61 ctgtgctgtc cggcgccacc ggcgaagacc agctggccat ccgcgccgcc ggcgtgttcg + 121 gccgccgcgt ggtgcgggcc ccggccggcg accgcgcgcc ggcgcggacc tggaccccgc + 181 gcggcaccac gctgatcacc ggcggcaccg gcaccctggc cccgcacctg gcccgctggc + 241 tggccgagca gggcgccgag cacatcgtgc tgaccagccg caccggcgcc gaggccccga + 301 aggcccggca gctgctggcg gagctgggcg agaccgtcga ggcggtggcc tgcgacgtca + 361 ccgacaaggc ggcgctggcc gccctgctgg cgcggttgcg ggccgagggc cggaccgtgc + 421 ggaacgtggt gcacacggcc gccgtgatcg agctgcacac gctggccgag accgacctgg + 481 ccgcgttctc ccggaccgtg cacgccaagg tggtgggcgc gcgcaacctg gacgagctgc + 541 tcgacaccga cgagctcgat gcgttcgtgc tgtactcctc caccgccggc ctgtggggca + 601 ccggcgcgca cgccgcctac gtggccggca acgcgtactt gcacgcgctg gcagcccacc + 661 ggcgcgcccg ggggctgcgg gccaccgcgc tgtcgtgggg catctgggcc gacgaccgcg + 721 aactcggccg ggtcgacccg gagcagatcg tgcgcagcgg cctggtgttc atggcgccgg + 781 agctggcgct ggagggtctg cgccgggccc tggacgacga cgagaccgcg ctggccgtgg + 841 ccgatctgga ctgggagcgg tactacccgg tctacaccgc cgtccggccg acgctgctgt + 901 tcgacgagct gccggaggtg cggcggctca ccgaggccgc cgccgccacg gccgccaccg + 961 gcgccggcgg cgagttcgcc gcccggctgc gcacgctgcc cgaggccgag cgcgcccacc + 1021 tgctcctgga actggtccgg gccgaggccg cggccgtgct gggccacgcg tcggccgacg + 1081 cgctgcccga ggaccgcgcc ttccgcgacg tcggcttcga ctcggtcacc gcggtcgacc + 1141 tgcgcaaccg gatctccgcc ggcaccggcc tgaccctgcc cgccaccatg gtgttcgacc + 1201 acccgacgcc gaggcggctg gccgggttcc tggccgccac gatcaccggc tcgggtgccg + 1261 tcgagcaggc accggccgtg gccggcgtgg acaccggcga gcccgtcgcc atcatcggga + 1321 tggcctgccg ctacccgggt ggcgcgaaca ccccggaacg gttgtgggac ctggtcgtgg + 1381 gcggcgtgga cgccatctcc ggcttcccgg ccgaccgcaa ctggccgacc gacgcgctct + 1441 acgacccgga cccggacgcc ggcggcaaga cctattcggt gcagggcggc ttcctgcacg + 1501 aggcggccga gttcgacccg ggcttcttcg gcatctcgcc gcgggaggca ctgtccatgg + 1561 atccgcagca gcgcctgctg ctggagacgg cgtgggaggc gttcgagcgg gccgggatcg + 1621 acccgcacac gctgcggggc agcggcaccg gcaccttcat cggggccagc taccaggact + 1681 acaccgcggc cgtgtccggc gcggtggaca acgccgacgg ccacatgatc accggctcgc + 1741 tgggcagcat cctgtccggc cggctctcct acctgttcgg gctggagggc ccggcggtca + 1801 ccctggacac cgcctgctcg tcgtcgctgg tcgccatcca cctggccgcg cagtcgctgc + 1861 ggtcggggga gagcagcctg gcgctggccg gcggggtgag cgtgatggcg acgccggggg + 1921 cgttcgtcgg cttctcgcgc cagcgcgcac tggccacgga cggccgttgc aaggcctact + 1981 cggaccgggc cgacggcatg accctcggcg agggcgtcgg cctggtgctg ctggagaagc + 2041 tgtccgacgc gcagcgcaac gggcaccgga tcctggcggt ggtccggggt tcggccacga + 2101 accaggacgg cgcgtccaac ggcatgaccg cgcccagcgg cccgtcccag cagcgggtga + 2161 tccggcaggc gctggccaac gcgcggctct cggcgtccga ggtggacgtg atcgagggcc + 2221 acggcaccgg caccgcgctg ggcgacccga tcgaggccca ggccctgctg gccacctacg + 2281 gccaggaccg ggaacggccg ctgctgctcg gctcggtgaa gtccaacatc ggccacaccc + 2341 agatggcctc cggcgtggcc ggcgtgatca aggtggtgca ggcgctgcgg cacgggctgg + 2401 tacccaagac gctgcacgtg gacgagccct ccacgcacgt cgactggagc accggctcga + 2461 tcgagctgcc gtccggcagc gtgccgtggc cggagagcgg ccggccgcgc cgggccggta + 2521 tctcgtcctt cgggctgagc ggcacgaacg tgcacaccat cctcgagcag gccccggaac + 2581 cggccgccga agccggcccc gagccggagc ccggcctggt gccggtcccg ctgtccggcc + 2641 ggacggaagc agcgctgcgc gctcaggccg ccaccgtgct ggacaccctg gacgacggcg + 2701 tgtcgccggc cgtgctcggg tactcgctgg cctccacccg gtcggccttc gaacaccgtg + 2761 cggtgctgct ggccgaggac cacgacgaac tgcggcgcgg cctggccgca ctggccggcg + 2821 accagccgga cggcggcgtg gtgcggggca ccgtgacgcg gggccgcacg gcgttcctgt + 2881 tcgccggcca gggcagccag cgggccggga tgggccgcga gctgtacgag cgccacccgg + 2941 tgttcgccga cgcgctggac gcggtgctgg ggcacttcga cctgccccgt gcgctgcggg + 3001 acgtgatgtg ggacgacgat tccacggccc tcgacgagac ggggtacacc cagccggcgt + 3061 tgttcgcctt cgaggtggcg ttgttccggt tgctggagtc gtggggtgtg acgccggatt + 3121 acctggccgg gcattcgatc ggtgagatcg ccgcggcgca cgtggccgga gtgttgtcgc + 3181 tggccgatgc ctgtgcgttg gtcgctgcgc ggggtgcgct gatgcaggcg ctgccgtccg + 3241 gcggggccat ggtttcggtg cgcggctccg aggccgacgt cgccgggcac ctcggcgagg + 3301 acgtcgccgt cgcggcggtc aacgggcccg agtcggtggt gctggccggg accgaggacg + 3361 cggtgctcca ggcggccggc cgcctggagg ccgccggcca caaggtccgc cgcctgcggg + 3421 tcagccacgc cttccactcg cccttgatgg atcccgtgct ggccgagttc gcgacggtgg + 3481 ctcagggcct gacctaccac +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ARBH01000003.1.final Wed Feb 07 06:22:58 2018 -0500 @@ -0,0 +1,339 @@ +LOCUS ARBH01000003 3500 bp DNA linear BCT 22-APR-2013 +DEFINITION Amycolatopsis balhimycina FH 1894 strain DSM 44591 + A3CEDRAFT_scaffold1.1_C3, whole genome shotgun sequence. +ACCESSION ARBH01000003 +VERSION ARBH01000003.1 +DBLINK BioProject:PRJNA165347 + BioSample:SAMN02256403 +KEYWORDS WGS; GSC:MIGS:2.1; IMPROVED_HIGH_QUALITY_DRAFT. +SOURCE Amycolatopsis balhimycina FH 1894 + ORGANISM Amycolatopsis balhimycina FH 1894 + Bacteria; Actinobacteria; Pseudonocardiales; Pseudonocardiaceae; + Amycolatopsis. +REFERENCE 1 (bases 1 to 3500) + AUTHORS Klenk,H.-P., Huntemann,M., Han,J., Chen,A., Kyrpides,N., + Mavromatis,K., Markowitz,V., Palaniappan,K., Ivanova,N., + Schaumberg,A., Pati,A., Liolios,K., Nordberg,H.P., Cantor,M.N., + Hua,S.X. and Woyke,T. + TITLE Direct Submission + JOURNAL Submitted (16-APR-2013) DOE Joint Genome Institute, 2800 Mitchell + Drive, Walnut Creek, CA 94598-1698, USA +COMMENT URL -- http://www.jgi.doe.gov + JGI Project ID: 404843 + Source DNA and Organism available from Hans-Peter Klenk + (hpk@dsmz.de) + Source DNA available from Hans-Peter Klenk (hpk@dsmz.de) + Organism available from Hans-Peter Klenk (hpk@dsmz.de) + Contacts: Hans-Peter Klenk (hpk@dsmz.de) + Tanja Woyke (microbe@cuba.jgi-psf.org) + Whole genome sequencing and draft assembly at JGI-PGF + Annotation by JGI-ORNL + The JGI and collaborators endorse the principles for the + distribution and use of large scale sequencing data adopted by the + larger genome sequencing community and urge users of this data to + follow them. It is our intention to publish the work of this + project in a timely fashion and we welcome collaborative + interaction on the project and analysis. + (http://www.genome.gov/page.cfm?pageID=10506376) + Full annotations are available from IMG. + v. 4.24 + Phrap v. 4.24 +FEATURES Location/Qualifiers + source 1..3500 + /culture_collection="DSM:44591" + /db_xref="taxon:1089545" + /mol_type="genomic DNA" + /organism="Amycolatopsis balhimycina FH 1894" + /strain="DSM 44591" + cluster 1..3500 + /contig_edge="True" + /cutoff=20000 + /extension=20000 + /note="Cluster number: 1" + /note="Detection rule(s) for this cluster type: t1pks: + ((PKS_KS & PKS_AT) or (ene_KS & PKS_AT) or (mod_KS & + PKS_AT) or (hyb_KS & PKS_AT) or (itr_KS & PKS_AT) or + (tra_KS & PKS_AT));" + /note="Monomers prediction: (mal)" + /note="Structure image: structures/genecluster1.png" + /product="t1pks" + CDS 3..3500 + /aSProdPred="mal" + /locus_tag="ctg1_1" + /note="smCOG: + SMCOG1001:short-chain_dehydrogenase/reductase_SDR (Score: + 64.2; E-value: 2.2e-19);" + /sec_met="Type: t1pks" + /sec_met="Domains detected: PP-binding (E-value: 1.3e-15, + bitscore: 44.0, seeds: 164); PKS_AT (E-value: 8.7e-82, + bitscore: 260.9, seeds: 1682); mod_KS (E-value: 9.4e-224, + bitscore: 729.0, seeds: 217); adh_short (E-value: 7.1e-52, + bitscore: 162.1, seeds: 230)" + /sec_met="Kind: biosynthetic" + /sec_met="NRPS/PKS subtype: Type I Modular PKS" + /sec_met="NRPS/PKS Domain: PKS_KR (60-237). E-value: + 1.4e-53. Score: 173.1; Predicted KR activity: active; + Predicted KR stereochemistry: A2;" + /sec_met="NRPS/PKS Domain: ACP (341-412). E-value: 7.8e-31. + Score: 97.8;" + /sec_met="NRPS/PKS Domain: PKS_KS (434-858). E-value: + 3e-179. Score: 587.9;" + /sec_met="NRPS/PKS Domain: PKS_AT (958-1166). E-value: + 4e-80. Score: 260.9; Substrate specificity predictions: mal + (PKS signature), mal (Minowa), mal (consensus);" + /translation="GGVLALPAELDRRGGDRLAAVLSGATGEDQLAIRAAGVFGRRVVR + APAGDRAPARTWTPRGTTLITGGTGTLAPHLARWLAEQGAEHIVLTSRTGAEAPKARQL + LAELGETVEAVACDVTDKAALAALLARLRAEGRTVRNVVHTAAVIELHTLAETDLAAFS + RTVHAKVVGARNLDELLDTDELDAFVLYSSTAGLWGTGAHAAYVAGNAYLHALAAHRRA + RGLRATALSWGIWADDRELGRVDPEQIVRSGLVFMAPELALEGLRRALDDDETALAVAD + LDWERYYPVYTAVRPTLLFDELPEVRRLTEAAAATAATGAGGEFAARLRTLPEAERAHL + LLELVRAEAAAVLGHASADALPEDRAFRDVGFDSVTAVDLRNRISAGTGLTLPATMVFD + HPTPRRLAGFLAATITGSGAVEQAPAVAGVDTGEPVAIIGMACRYPGGANTPERLWDLV + VGGVDAISGFPADRNWPTDALYDPDPDAGGKTYSVQGGFLHEAAEFDPGFFGISPREAL + SMDPQQRLLLETAWEAFERAGIDPHTLRGSGTGTFIGASYQDYTAAVSGAVDNADGHMI + TGSLGSILSGRLSYLFGLEGPAVTLDTACSSSLVAIHLAAQSLRSGESSLALAGGVSVM + ATPGAFVGFSRQRALATDGRCKAYSDRADGMTLGEGVGLVLLEKLSDAQRNGHRILAVV + RGSATNQDGASNGMTAPSGPSQQRVIRQALANARLSASEVDVIEGHGTGTALGDPIEAQ + ALLATYGQDRERPLLLGSVKSNIGHTQMASGVAGVIKVVQALRHGLVPKTLHVDEPSTH + VDWSTGSIELPSGSVPWPESGRPRRAGISSFGLSGTNVHTILEQAPEPAAEAGPEPEPG + LVPVPLSGRTEAALRAQAATVLDTLDDGVSPAVLGYSLASTRSAFEHRAVLLAEDHDEL + RRGLAALAGDQPDGGVVRGTVTRGRTAFLFAGQGSQRAGMGRELYERHPVFADALDAVL + GHFDLPRALRDVMWDDDSTALDETGYTQPALFAFEVALFRLLESWGVTPDYLAGHSIGE + IAAAHVAGVLSLADACALVAARGALMQALPSGGAMVSVRGSEADVAGHLGEDVAVAAVN + GPESVVLAGTEDAVLQAAGRLEAAGHKVRRLRVSHAFHSPLMDPVLAEFATVAQGLTYH + " + aSDomain 183..713 + /asDomain_id="nrpspksdomains_ctg1_1_KR1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PKS_KR" + /evalue="1.40E-53" + /label="ctg1_1_KR1" + /locus_tag="ctg1_1" + /score="173.1" + /specificity="KR activity: active" + /specificity="KR stereochemistry: A2" + /translation="GTTLITGGTGTLAPHLARWLAEQGAEHIVLTSRTGAEAPKARQLL + AELGETVEAVACDVTDKAALAALLARLRAEGRTVRNVVHTAAVIELHTLAETDLAAFSR + TVHAKVVGARNLDELLDTDELDAFVLYSSTAGLWGTGAHAAYVAGNAYLHALAAHRRAR + GLRATALSWGIWAD" + CDS_motif 192..284 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0001" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.80E-15" + /label="PKSI-KR_m1" + /locus_tag="ctg1_1" + /motif="PKSI-KR_m1" + /note="NRPS/PKS Motif: PKSI-KR_m1 (e-value: 3.8e-15, + bit-score: 48.4)" + /score="48.4" + /translation="LITGGTGTLAPHLARWLAEQGAEHIVLTSRT" + CDS_motif 555..665 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0002" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.60E-14" + /label="PKSI-KR_m4" + /locus_tag="ctg1_1" + /motif="PKSI-KR_m4" + /note="NRPS/PKS Motif: PKSI-KR_m4 (e-value: 1.6e-14, + bit-score: 46.4)" + /score="46.4" + /translation="LDAFVLYSSTAGLWGTGAHAAYVAGNAYLHALAAHRR" + aSDomain 1026..1238 + /asDomain_id="nrpspksdomains_ctg1_1_Xdom01" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="ACP" + /evalue="7.80E-31" + /locus_tag="ctg1_1" + /score="97.8" + /translation="LELVRAEAAAVLGHASADALPEDRAFRDVGFDSVTAVDLRNRISA + GTGLTLPATMVFDHPTPRRLAGFLAA" + aSDomain 1305..2576 + /asDomain_id="nrpspksdomains_ctg1_1_Xdom02" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PKS_KS" + /evalue="3.00E-179" + /locus_tag="ctg1_1" + /score="587.9" + /translation="VAIIGMACRYPGGANTPERLWDLVVGGVDAISGFPADRNWPTDAL + YDPDPDAGGKTYSVQGGFLHEAAEFDPGFFGISPREALSMDPQQRLLLETAWEAFERAG + IDPHTLRGSGTGTFIGASYQDYTAAVSGAVDNADGHMITGSLGSILSGRLSYLFGLEGP + AVTLDTACSSSLVAIHLAAQSLRSGESSLALAGGVSVMATPGAFVGFSRQRALATDGRC + KAYSDRADGMTLGEGVGLVLLEKLSDAQRNGHRILAVVRGSATNQDGASNGMTAPSGPS + QQRVIRQALANARLSASEVDVIEGHGTGTALGDPIEAQALLATYGQDRERPLLLGSVKS + NIGHTQMASGVAGVIKVVQALRHGLVPKTLHVDEPSTHVDWSTGSIELPSGSVPWPESG + RPRRAGISSFGLSGTNVHTILEQAP" + CDS_motif 1788..1835 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0003" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.00E-06" + /label="PKSI-KS_m3" + /locus_tag="ctg1_1" + /motif="PKSI-KS_m3" + /note="NRPS/PKS Motif: PKSI-KS_m3 (e-value: 2e-06, + bit-score: 19.7)" + /score="19.7" + /translation="GPAVTLDTACSSSLVA" + CDS_motif 2211..2255 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0004" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.90E-03" + /label="PKSI-KS_m5" + /locus_tag="ctg1_1" + /motif="PKSI-KS_m5" + /note="NRPS/PKS Motif: PKSI-KS_m5 (e-value: 0.0019, + bit-score: 10.8)" + /score="10.8" + /translation="IEGHGTGTALGDPIE" + CDS_motif 2310..2354 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0005" + /database="abmotifs" + /detection="hmmscan" + /evalue="1.10E-03" + /label="PKSI-KS_m6" + /locus_tag="ctg1_1" + /motif="PKSI-KS_m6" + /note="NRPS/PKS Motif: PKSI-KS_m6 (e-value: 0.0011, + bit-score: 11.5)" + /score="11.5" + /translation="GSVKSNIGHTQMASG" + CDS_motif 2874..2927 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0006" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.30E-06" + /label="PKSI-AT-mM_m1" + /locus_tag="ctg1_1" + /motif="PKSI-AT-mM_m1" + /note="NRPS/PKS Motif: PKSI-AT-mM_m1 (e-value: 2.3e-06, + bit-score: 19.8)" + /score="19.8" + /translation="FLFAGQGSQRAGMGRELY" + aSDomain 2877..3500 + /asDomain_id="nrpspksdomains_ctg1_1_AT1" + /database="nrpspksdomains.hmm" + /detection="hmmscan" + /domain="PKS_AT" + /evalue="4.00E-80" + /label="ctg1_1_AT1" + /locus_tag="ctg1_1" + /score="260.9" + /specificity="PKS signature: mal" + /specificity="Minowa: mal" + /specificity="consensus: mal" + /translation="LFAGQGSQRAGMGRELYERHPVFADALDAVLGHFDLPRALRDVMW + DDDSTALDETGYTQPALFAFEVALFRLLESWGVTPDYLAGHSIGEIAAAHVAGVLSLAD + ACALVAARGALMQALPSGGAMVSVRGSEADVAGHLGEDVAVAAVNGPESVVLAGTEDAV + LQAAGRLEAAGHKVRRLRVSHAFHSPLMDPVLAEFATVAQGLTYH" + CDS_motif 3033..3095 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0007" + /database="abmotifs" + /detection="hmmscan" + /evalue="3.20E-03" + /label="PKSI-AT-mM_m2" + /locus_tag="ctg1_1" + /motif="PKSI-AT-mM_m2" + /note="NRPS/PKS Motif: PKSI-AT-mM_m2 (e-value: 0.0032, + bit-score: 10.9)" + /score="10.9" + /translation="DETGYTQPALFAFEVALFRLL" + CDS_motif 3105..3227 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0008" + /database="abmotifs" + /detection="hmmscan" + /evalue="2.30E-22" + /label="PKSI-AT-M_m3" + /locus_tag="ctg1_1" + /motif="PKSI-AT-M_m3" + /note="NRPS/PKS Motif: PKSI-AT-M_m3 (e-value: 2.3e-22, + bit-score: 71.3)" + /score="71.3" + /translation="GVTPDYLAGHSIGEIAAAHVAGVLSLADACALVAARGALMQ" + CDS_motif 3312..3350 + /aSTool="pksnrpsmotif" + /asDomain_id="nrpspksmotif_ctg1_1_0009" + /database="abmotifs" + /detection="hmmscan" + /evalue="4.70E-04" + /label="PKSI-AT-M_m5" + /locus_tag="ctg1_1" + /motif="PKSI-AT-M_m5" + /note="NRPS/PKS Motif: PKSI-AT-M_m5 (e-value: 0.00047, + bit-score: 13.4)" + /score="13.4" + /translation="AAVNGPESVVLAG" +ORIGIN + 1 ggggcggcgt gctcgccctg cccgccgagc tggaccgccg cggcggcgac cggctggcgg + 61 ctgtgctgtc cggcgccacc ggcgaagacc agctggccat ccgcgccgcc ggcgtgttcg + 121 gccgccgcgt ggtgcgggcc ccggccggcg accgcgcgcc ggcgcggacc tggaccccgc + 181 gcggcaccac gctgatcacc ggcggcaccg gcaccctggc cccgcacctg gcccgctggc + 241 tggccgagca gggcgccgag cacatcgtgc tgaccagccg caccggcgcc gaggccccga + 301 aggcccggca gctgctggcg gagctgggcg agaccgtcga ggcggtggcc tgcgacgtca + 361 ccgacaaggc ggcgctggcc gccctgctgg cgcggttgcg ggccgagggc cggaccgtgc + 421 ggaacgtggt gcacacggcc gccgtgatcg agctgcacac gctggccgag accgacctgg + 481 ccgcgttctc ccggaccgtg cacgccaagg tggtgggcgc gcgcaacctg gacgagctgc + 541 tcgacaccga cgagctcgat gcgttcgtgc tgtactcctc caccgccggc ctgtggggca + 601 ccggcgcgca cgccgcctac gtggccggca acgcgtactt gcacgcgctg gcagcccacc + 661 ggcgcgcccg ggggctgcgg gccaccgcgc tgtcgtgggg catctgggcc gacgaccgcg + 721 aactcggccg ggtcgacccg gagcagatcg tgcgcagcgg cctggtgttc atggcgccgg + 781 agctggcgct ggagggtctg cgccgggccc tggacgacga cgagaccgcg ctggccgtgg + 841 ccgatctgga ctgggagcgg tactacccgg tctacaccgc cgtccggccg acgctgctgt + 901 tcgacgagct gccggaggtg cggcggctca ccgaggccgc cgccgccacg gccgccaccg + 961 gcgccggcgg cgagttcgcc gcccggctgc gcacgctgcc cgaggccgag cgcgcccacc + 1021 tgctcctgga actggtccgg gccgaggccg cggccgtgct gggccacgcg tcggccgacg + 1081 cgctgcccga ggaccgcgcc ttccgcgacg tcggcttcga ctcggtcacc gcggtcgacc + 1141 tgcgcaaccg gatctccgcc ggcaccggcc tgaccctgcc cgccaccatg gtgttcgacc + 1201 acccgacgcc gaggcggctg gccgggttcc tggccgccac gatcaccggc tcgggtgccg + 1261 tcgagcaggc accggccgtg gccggcgtgg acaccggcga gcccgtcgcc atcatcggga + 1321 tggcctgccg ctacccgggt ggcgcgaaca ccccggaacg gttgtgggac ctggtcgtgg + 1381 gcggcgtgga cgccatctcc ggcttcccgg ccgaccgcaa ctggccgacc gacgcgctct + 1441 acgacccgga cccggacgcc ggcggcaaga cctattcggt gcagggcggc ttcctgcacg + 1501 aggcggccga gttcgacccg ggcttcttcg gcatctcgcc gcgggaggca ctgtccatgg + 1561 atccgcagca gcgcctgctg ctggagacgg cgtgggaggc gttcgagcgg gccgggatcg + 1621 acccgcacac gctgcggggc agcggcaccg gcaccttcat cggggccagc taccaggact + 1681 acaccgcggc cgtgtccggc gcggtggaca acgccgacgg ccacatgatc accggctcgc + 1741 tgggcagcat cctgtccggc cggctctcct acctgttcgg gctggagggc ccggcggtca + 1801 ccctggacac cgcctgctcg tcgtcgctgg tcgccatcca cctggccgcg cagtcgctgc + 1861 ggtcggggga gagcagcctg gcgctggccg gcggggtgag cgtgatggcg acgccggggg + 1921 cgttcgtcgg cttctcgcgc cagcgcgcac tggccacgga cggccgttgc aaggcctact + 1981 cggaccgggc cgacggcatg accctcggcg agggcgtcgg cctggtgctg ctggagaagc + 2041 tgtccgacgc gcagcgcaac gggcaccgga tcctggcggt ggtccggggt tcggccacga + 2101 accaggacgg cgcgtccaac ggcatgaccg cgcccagcgg cccgtcccag cagcgggtga + 2161 tccggcaggc gctggccaac gcgcggctct cggcgtccga ggtggacgtg atcgagggcc + 2221 acggcaccgg caccgcgctg ggcgacccga tcgaggccca ggccctgctg gccacctacg + 2281 gccaggaccg ggaacggccg ctgctgctcg gctcggtgaa gtccaacatc ggccacaccc + 2341 agatggcctc cggcgtggcc ggcgtgatca aggtggtgca ggcgctgcgg cacgggctgg + 2401 tacccaagac gctgcacgtg gacgagccct ccacgcacgt cgactggagc accggctcga + 2461 tcgagctgcc gtccggcagc gtgccgtggc cggagagcgg ccggccgcgc cgggccggta + 2521 tctcgtcctt cgggctgagc ggcacgaacg tgcacaccat cctcgagcag gccccggaac + 2581 cggccgccga agccggcccc gagccggagc ccggcctggt gccggtcccg ctgtccggcc + 2641 ggacggaagc agcgctgcgc gctcaggccg ccaccgtgct ggacaccctg gacgacggcg + 2701 tgtcgccggc cgtgctcggg tactcgctgg cctccacccg gtcggccttc gaacaccgtg + 2761 cggtgctgct ggccgaggac cacgacgaac tgcggcgcgg cctggccgca ctggccggcg + 2821 accagccgga cggcggcgtg gtgcggggca ccgtgacgcg gggccgcacg gcgttcctgt + 2881 tcgccggcca gggcagccag cgggccggga tgggccgcga gctgtacgag cgccacccgg + 2941 tgttcgccga cgcgctggac gcggtgctgg ggcacttcga cctgccccgt gcgctgcggg + 3001 acgtgatgtg ggacgacgat tccacggccc tcgacgagac ggggtacacc cagccggcgt + 3061 tgttcgcctt cgaggtggcg ttgttccggt tgctggagtc gtggggtgtg acgccggatt + 3121 acctggccgg gcattcgatc ggtgagatcg ccgcggcgca cgtggccgga gtgttgtcgc + 3181 tggccgatgc ctgtgcgttg gtcgctgcgc ggggtgcgct gatgcaggcg ctgccgtccg + 3241 gcggggccat ggtttcggtg cgcggctccg aggccgacgt cgccgggcac ctcggcgagg + 3301 acgtcgccgt cgcggcggtc aacgggcccg agtcggtggt gctggccggg accgaggacg + 3361 cggtgctcca ggcggccggc cgcctggagg ccgccggcca caaggtccgc cgcctgcggg + 3421 tcagccacgc cttccactcg cccttgatgg atcccgtgct ggccgagttc gcgacggtgg + 3481 ctcagggcct gacctaccac +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/index.2.html Wed Feb 07 06:22:58 2018 -0500 @@ -0,0 +1,273 @@ +<!doctype html> +<html> + <head> + <title>ARBH01000003.1 - 1 clusters - antiSMASH results</title> + <link rel="stylesheet" type="text/css" href="css/bacteria.css"> + <meta charset="utf-8"> + </head> + <body> + <div id="header"> + <div class="top-header"> + <img class="antismash-logo" src="images/bacteria_logo.png" alt="antiSMASH"> + <span class="antismash-title"><a class="main-link" href="http://antismash.secondarymetabolites.org/">antibiotics & Secondary Metabolite Analysis SHell</a><br> + <span class="white">Version <span id="antismash-version">4.0.2</span></span> + </span> + <div id="icons"> + <a class="main-link" href="http://antismash.secondarymetabolites.org/"><img src="images/bacteria_home.png" alt="home" title="Go to start page"></a> + <a class="help-link" href="http://antismash.secondarymetabolites.org/#!/help"><img src="images/bacteria_help.png" alt="help" title="Get help using antiSMASH"></a> + <a class="about-link" href="http://antismash.secondarymetabolites.org/#!/about"><img src="images/bacteria_about.png" alt="about" title="About antiSMASH"></a> + <a href="#" id="download"><img src="images/bacteria_download.png" alt="download" title="Download results"></a> + <div id="downloadmenu"> + <ul id="downloadoptions"> + <li><a href="ARBH01000003.1.zip">Download all results</a></li><li><a href="ARBH01000003.1.geneclusters.xls">Download XLS overview file</a></li><li><a href="ARBH01000003.1.final.embl">Download EMBL summary file</a></li><li><a href="ARBH01000003.1.final.gbk">Download GenBank summary file</a></li></ul> + </div> + </div> + </div> + <div id="buttons"> + <span id="cluster-type">Select Gene Cluster:</span> + <ul id="clusterbuttons"> + <li><div class="arrow-left" id="prev-cluster"></div></li> + <li class="clbutton"><a href="#">Overview</a></li> + <li class="clbutton t1pks cluster-1"><a href="#cluster-1">1</a></li><li id="last-clbutton"><div class="arrow-right" id="next-cluster"></div></li> + </ul> + </div> + </div> + + <!-- overview page --> + <div class="page" id="overview"> + <h3>Identified secondary metabolite clusters<span id="truncated"></span></h3> + <table id="cluster-overview"> + <thead> + <tr> + <th>Cluster</th> + <th>Type</th> + <th>From</th> + <th>To</th> + <th>Most similar known cluster</th> + <th>MIBiG BGC-ID</th> + </tr> + </thead> + <tbody> + <tr class="separator-row"><td class="separator-text" colspan="2">The following clusters are from record ARBH01000003.1:</td></tr><tr><td class="clbutton t1pks"><a href="#cluster-1">Cluster 1</a></td><td><a href="http://antismash.secondarymetabolites.org/help#t1pks" target="_blank">T1pks</a></td><td class="digits">1</td><td class="digits">3500</td><td>-</td><td>-</td></tr></tbody> + </table> + </div> + + <div class="page" id="cluster-1"><h3>ARBH01000003 - Cluster 1 - T1pks</h3><div class="sidepanel"><div class="structure"><h3>Predicted core structure</h3><a href="images/nostructure_icon.png" target="_new"><img src="images/nostructure_icon.png"></a><div class="as-structure-warning">Rough prediction of core scaffold based on assumed PKS/NRPS colinearity; tailoring reactions not taken into account</div></div><div class="more-details"><h3>Prediction details</h3><dl class="prediction-text"><dt>Monomers prediction:</dt><dd>(mal)</dd><dt>ctg1_1</dt><dd>PKS signature: mal<br></dd><dd>Minowa: mal<br></dd><dd>consensus: mal<br></dd></dl></div></div><div class="content"><div class="description-container"><h3>Gene cluster description</h3><div class="cluster-download"><a href="ARBH01000003.1.cluster001.gbk">Download cluster GenBank file</a></div><div class="description-text">ARBH01000003 - Gene Cluster 1. Type = t1pks. Location: 1 - 3500 nt. Click on genes for more information.</div><a class="cluster-rules-header" id="cluster-1-rules-header" href="#cluster-1">Show pHMM detection rules used</a><div class="cluster-rules" id="cluster-1-rules">t1pks: ((PKS_KS & PKS_AT) or (ene_KS & PKS_AT) or (mod_KS & PKS_AT) or (hyb_KS & PKS_AT) or (itr_KS & PKS_AT) or (tra_KS & PKS_AT))<br></div><div id="cluster-1-svg"></div></div><div class="legend"><h4>Legend:</h4><div><div><div class="legend-field legend-type-biosynthetic"></div><div class="legend-label">core biosynthetic genes</div></div><div><div class="legend-field legend-type-biosynthetic-additional"></div><div class="legend-label">additional biosynthetic genes</div></div><div><div class="legend-field legend-type-transport"></div><div class="legend-label">transport-related genes</div></div><div><div class="legend-field legend-type-regulatory"></div><div class="legend-label">regulatory genes</div></div><div><div class="legend-field legend-type-other"></div><div class="legend-label">other genes</div></div></div></div><div class="details"><h3>Detailed annotation</h3><div class="details-svg" id="cluster-1-details-svg"></div></div></div></div><div id="footer"> + <div id="logos"> + <table id="logo-table"> + <tr> + <td> + <img src="images/tueblogo.gif"> + </td> + <td> + <img src="images/ruglogo.gif"> + </td> + <td> + <img src="images/ucsflogo.gif"> + </td> + <td> + <img src="images/wur-logo.png"> + </td> + </tr> + <tr> + <td> + <img src="images/uomlogo.jpg"> + </td> + <td> + <img src="images/dziflogo.png"> + </td> + <td> + <img src="images/cfb-logo.png"> + </td> + <td> + </td> + </tr> + </table> + </div> + <div id="copyright"> + If you have found antiSMASH useful, please <a href="http://antismash.secondarymetabolites.org/about">cite us</a>. + </div> + </div> + + <script src="js/jquery.js"></script> + <script src="js/purl.js"></script> + <script src="js/d3.v2.js"></script> + <script src="js/svgene.js"></script> + <script src="js/jsdomain.js"></script> + <script src="js/clusterblast.js"></script> + <script src="js/domainalign.js"></script> + <script src="geneclusters.js"></script> + <script type="text/javascript"> +function toggle_downloadmenu(event) { + event.preventDefault(); + $("#downloadmenu").fadeToggle("fast", "linear"); +} + +function switch_to_cluster() { + setTimeout(function() { + var url = $.url(); + $(".page").hide(); + $("li.clbutton").removeClass("active"); + var anchor = url.data.attr.fragment; + if (anchor == "") { + anchor = "overview"; + } + $("#" + anchor).show(); + if (anchor != "overview") { + $("li.clbutton." + anchor).addClass("active"); + } + + if (geneclusters[anchor] !== undefined) { + svgene.drawClusters(anchor+"-svg", [geneclusters[anchor]], 20, 700); + } + if ($("#" + anchor + "-details-svg").length > 0) { + jsdomain.drawDomains(anchor+ "-details-svg", details_data[anchor], 40, 700); + } + $("#" + anchor + " .clusterblast-selector").change(); + $("#" + anchor + " .domainalign-selector").change(); + }, 1); +} + +function next_cluster() { + var num_clusters = Object.keys(geneclusters).length; + var url = $.url(); + var anchor = url.data.attr.fragment; + var href = "#" + anchor; + if (anchor == "" || anchor == "overview") { + anchor = "cluster-0"; + } + var cluster_number = parseInt(anchor.split('-')[1]); + var next_cluster_number = cluster_number + 1; + if (next_cluster_number <= num_clusters) { + href = "#cluster-" + next_cluster_number; + } else { + href = "#overview"; + } + window.location.href = href; + switch_to_cluster(); +} + +function previous_cluster() { + var num_clusters = Object.keys(geneclusters).length; + var url = $.url(); + var anchor = url.data.attr.fragment; + var href = "#" + anchor; + if (anchor == "" || anchor == "overview") { + anchor = "cluster-0"; + } + var cluster_number = parseInt(anchor.split('-')[1]); + var prev_cluster_number = cluster_number - 1; + if (prev_cluster_number == 0 ) { + href = "#overview"; + } else if (prev_cluster_number < 0){ + href = "#cluster-" + num_clusters; + } else { + href = "#cluster-" + prev_cluster_number; + } + window.location.href = href; + switch_to_cluster(); +} + +function toggle_cluster_rules(ev) { + ev.preventDefault(); + var id = $(this).attr('id').replace(/-header/, ''); + var rules = $('#' + id); + if (rules.css('display') == "none") { + $(this).text('Hide pHMM detection rules used'); + } else { + $(this).text('Show pHMM detection rules used'); + } + rules.fadeToggle("fast", "linear"); +} + +function map_type_to_desc(type) { + switch(type) { + case "nrps": return "NRPS"; + case "t1pks": return "Type I PKS"; + case "t2pks": return "Type II PKS"; + case "t3pks": return "Type III PKS"; + case "t4pks": return "Type IV PKS"; + default: return type; + } +} + +function copyToClipboard (text) { + window.prompt ("Copy to clipboard: Ctrl+C, Enter", text); +} + +$(document).ready(function() { + + $("#download").click(toggle_downloadmenu); + + $("#next-cluster").click(next_cluster); + $("#prev-cluster").click(previous_cluster); + + $(".clbutton").click(function() { + /* Make sure that even if user missed the link and clicked the + background we still have the correct anchor */ + var href = $(this).children().first().attr('href'); + + if (href === undefined) { + return; + } + window.location.href = href; + + switch_to_cluster(); + }).mouseover(function() { + /* Set the select cluster label text to cluster type */ + var classes = $(this).attr('class').split(' '); + if (classes.length < 2) { + return; + } + if (classes[1] == 'separator') { + return; + } + var cluster_type = map_type_to_desc(classes[1]); + var label = $('#cluster-type'); + label.data("orig_text", label.text()); + label.text(cluster_type + ":"); + }).mouseout(function() { + /* and reset the select cluster label text */ + var label = $('#cluster-type'); + label.text(label.data("orig_text")); + }); + + $('.clusterblast-selector').change(function() { + var id = $(this).attr('id').replace('-select', ''); + var url = $(this).val(); + $.get(url, function(data) { + $('#' + id + '-svg').html(data); + clusterblast.init(id + '-svg'); + // id = + }, 'html'); + $('#' + id + '-download').off('click'); + $('#' + id + '-download').click(function () { + var url = $("#" + id + "-select").val(); + window.open(url, '_blank'); + }); + }); + + $('.domainalign-selector').change(function() { + var id = $(this).attr('id').replace('-select', ''); + var url = $(this).val(); + $.get(url, function(data) { + $('#' + id + '-svg').html(data); + domainalign.init(id + '-svg'); + // id = + }, 'html'); + $('#' + id + '-download').off('click'); + $('#' + id + '-download').click(function () { + var url = $("#" + id + "-select").val(); + window.open(url, '_blank'); + }); + }); + + $('.cluster-rules-header').click(toggle_cluster_rules); + + switch_to_cluster(); + +}); + </script> + + </body> +</html> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/index.html Wed Feb 07 06:22:58 2018 -0500 @@ -0,0 +1,273 @@ +<!doctype html> +<html> + <head> + <title>ARBH01000003.1 - 1 clusters - antiSMASH results</title> + <link rel="stylesheet" type="text/css" href="css/bacteria.css"> + <meta charset="utf-8"> + </head> + <body> + <div id="header"> + <div class="top-header"> + <img class="antismash-logo" src="images/bacteria_logo.png" alt="antiSMASH"> + <span class="antismash-title"><a class="main-link" href="http://antismash.secondarymetabolites.org/">antibiotics & Secondary Metabolite Analysis SHell</a><br> + <span class="white">Version <span id="antismash-version">4.0.2</span></span> + </span> + <div id="icons"> + <a class="main-link" href="http://antismash.secondarymetabolites.org/"><img src="images/bacteria_home.png" alt="home" title="Go to start page"></a> + <a class="help-link" href="http://antismash.secondarymetabolites.org/#!/help"><img src="images/bacteria_help.png" alt="help" title="Get help using antiSMASH"></a> + <a class="about-link" href="http://antismash.secondarymetabolites.org/#!/about"><img src="images/bacteria_about.png" alt="about" title="About antiSMASH"></a> + <a href="#" id="download"><img src="images/bacteria_download.png" alt="download" title="Download results"></a> + <div id="downloadmenu"> + <ul id="downloadoptions"> + <li><a href="ARBH01000003.1.zip">Download all results</a></li><li><a href="ARBH01000003.1.geneclusters.xls">Download XLS overview file</a></li><li><a href="ARBH01000003.1.final.embl">Download EMBL summary file</a></li><li><a href="ARBH01000003.1.final.gbk">Download GenBank summary file</a></li></ul> + </div> + </div> + </div> + <div id="buttons"> + <span id="cluster-type">Select Gene Cluster:</span> + <ul id="clusterbuttons"> + <li><div class="arrow-left" id="prev-cluster"></div></li> + <li class="clbutton"><a href="#">Overview</a></li> + <li class="clbutton t1pks cluster-1"><a href="#cluster-1">1</a></li><li id="last-clbutton"><div class="arrow-right" id="next-cluster"></div></li> + </ul> + </div> + </div> + + <!-- overview page --> + <div class="page" id="overview"> + <h3>Identified secondary metabolite clusters<span id="truncated"></span></h3> + <table id="cluster-overview"> + <thead> + <tr> + <th>Cluster</th> + <th>Type</th> + <th>From</th> + <th>To</th> + <th>Most similar known cluster</th> + <th>MIBiG BGC-ID</th> + </tr> + </thead> + <tbody> + <tr class="separator-row"><td class="separator-text" colspan="2">The following clusters are from record ARBH01000003.1:</td></tr><tr><td class="clbutton t1pks"><a href="#cluster-1">Cluster 1</a></td><td><a href="http://antismash.secondarymetabolites.org/help#t1pks" target="_blank">T1pks</a></td><td class="digits">1</td><td class="digits">3500</td><td>-</td><td>-</td></tr></tbody> + </table> + </div> + + <div class="page" id="cluster-1"><h3>ARBH01000003.1 - Cluster 1 - T1pks</h3><div class="sidepanel"><div class="structure"><h3>Predicted core structure</h3><a href="images/nostructure_icon.png" target="_new"><img src="images/nostructure_icon.png"></a><div class="as-structure-warning">Rough prediction of core scaffold based on assumed PKS/NRPS colinearity; tailoring reactions not taken into account</div></div><div class="more-details"><h3>Prediction details</h3><dl class="prediction-text"><dt>Monomers prediction:</dt><dd>(mal)</dd><dt>ctg1_1</dt><dd>PKS signature: mal<br></dd><dd>Minowa: mal<br></dd><dd>consensus: mal<br></dd></dl></div></div><div class="content"><div class="description-container"><h3>Gene cluster description</h3><div class="cluster-download"><a href="ARBH01000003.1.cluster001.gbk">Download cluster GenBank file</a></div><div class="description-text">ARBH01000003.1 - Gene Cluster 1. Type = t1pks. Location: 1 - 3500 nt. Click on genes for more information.</div><a class="cluster-rules-header" id="cluster-1-rules-header" href="#cluster-1">Show pHMM detection rules used</a><div class="cluster-rules" id="cluster-1-rules">t1pks: ((PKS_KS & PKS_AT) or (ene_KS & PKS_AT) or (mod_KS & PKS_AT) or (hyb_KS & PKS_AT) or (itr_KS & PKS_AT) or (tra_KS & PKS_AT))<br></div><div id="cluster-1-svg"></div></div><div class="legend"><h4>Legend:</h4><div><div><div class="legend-field legend-type-biosynthetic"></div><div class="legend-label">core biosynthetic genes</div></div><div><div class="legend-field legend-type-biosynthetic-additional"></div><div class="legend-label">additional biosynthetic genes</div></div><div><div class="legend-field legend-type-transport"></div><div class="legend-label">transport-related genes</div></div><div><div class="legend-field legend-type-regulatory"></div><div class="legend-label">regulatory genes</div></div><div><div class="legend-field legend-type-other"></div><div class="legend-label">other genes</div></div></div></div><div class="details"><h3>Detailed annotation</h3><div class="details-svg" id="cluster-1-details-svg"></div></div></div></div><div id="footer"> + <div id="logos"> + <table id="logo-table"> + <tr> + <td> + <img src="images/tueblogo.gif"> + </td> + <td> + <img src="images/ruglogo.gif"> + </td> + <td> + <img src="images/ucsflogo.gif"> + </td> + <td> + <img src="images/wur-logo.png"> + </td> + </tr> + <tr> + <td> + <img src="images/uomlogo.jpg"> + </td> + <td> + <img src="images/dziflogo.png"> + </td> + <td> + <img src="images/cfb-logo.png"> + </td> + <td> + </td> + </tr> + </table> + </div> + <div id="copyright"> + If you have found antiSMASH useful, please <a href="http://antismash.secondarymetabolites.org/about">cite us</a>. + </div> + </div> + + <script src="js/jquery.js"></script> + <script src="js/purl.js"></script> + <script src="js/d3.v2.js"></script> + <script src="js/svgene.js"></script> + <script src="js/jsdomain.js"></script> + <script src="js/clusterblast.js"></script> + <script src="js/domainalign.js"></script> + <script src="geneclusters.js"></script> + <script type="text/javascript"> +function toggle_downloadmenu(event) { + event.preventDefault(); + $("#downloadmenu").fadeToggle("fast", "linear"); +} + +function switch_to_cluster() { + setTimeout(function() { + var url = $.url(); + $(".page").hide(); + $("li.clbutton").removeClass("active"); + var anchor = url.data.attr.fragment; + if (anchor == "") { + anchor = "overview"; + } + $("#" + anchor).show(); + if (anchor != "overview") { + $("li.clbutton." + anchor).addClass("active"); + } + + if (geneclusters[anchor] !== undefined) { + svgene.drawClusters(anchor+"-svg", [geneclusters[anchor]], 20, 700); + } + if ($("#" + anchor + "-details-svg").length > 0) { + jsdomain.drawDomains(anchor+ "-details-svg", details_data[anchor], 40, 700); + } + $("#" + anchor + " .clusterblast-selector").change(); + $("#" + anchor + " .domainalign-selector").change(); + }, 1); +} + +function next_cluster() { + var num_clusters = Object.keys(geneclusters).length; + var url = $.url(); + var anchor = url.data.attr.fragment; + var href = "#" + anchor; + if (anchor == "" || anchor == "overview") { + anchor = "cluster-0"; + } + var cluster_number = parseInt(anchor.split('-')[1]); + var next_cluster_number = cluster_number + 1; + if (next_cluster_number <= num_clusters) { + href = "#cluster-" + next_cluster_number; + } else { + href = "#overview"; + } + window.location.href = href; + switch_to_cluster(); +} + +function previous_cluster() { + var num_clusters = Object.keys(geneclusters).length; + var url = $.url(); + var anchor = url.data.attr.fragment; + var href = "#" + anchor; + if (anchor == "" || anchor == "overview") { + anchor = "cluster-0"; + } + var cluster_number = parseInt(anchor.split('-')[1]); + var prev_cluster_number = cluster_number - 1; + if (prev_cluster_number == 0 ) { + href = "#overview"; + } else if (prev_cluster_number < 0){ + href = "#cluster-" + num_clusters; + } else { + href = "#cluster-" + prev_cluster_number; + } + window.location.href = href; + switch_to_cluster(); +} + +function toggle_cluster_rules(ev) { + ev.preventDefault(); + var id = $(this).attr('id').replace(/-header/, ''); + var rules = $('#' + id); + if (rules.css('display') == "none") { + $(this).text('Hide pHMM detection rules used'); + } else { + $(this).text('Show pHMM detection rules used'); + } + rules.fadeToggle("fast", "linear"); +} + +function map_type_to_desc(type) { + switch(type) { + case "nrps": return "NRPS"; + case "t1pks": return "Type I PKS"; + case "t2pks": return "Type II PKS"; + case "t3pks": return "Type III PKS"; + case "t4pks": return "Type IV PKS"; + default: return type; + } +} + +function copyToClipboard (text) { + window.prompt ("Copy to clipboard: Ctrl+C, Enter", text); +} + +$(document).ready(function() { + + $("#download").click(toggle_downloadmenu); + + $("#next-cluster").click(next_cluster); + $("#prev-cluster").click(previous_cluster); + + $(".clbutton").click(function() { + /* Make sure that even if user missed the link and clicked the + background we still have the correct anchor */ + var href = $(this).children().first().attr('href'); + + if (href === undefined) { + return; + } + window.location.href = href; + + switch_to_cluster(); + }).mouseover(function() { + /* Set the select cluster label text to cluster type */ + var classes = $(this).attr('class').split(' '); + if (classes.length < 2) { + return; + } + if (classes[1] == 'separator') { + return; + } + var cluster_type = map_type_to_desc(classes[1]); + var label = $('#cluster-type'); + label.data("orig_text", label.text()); + label.text(cluster_type + ":"); + }).mouseout(function() { + /* and reset the select cluster label text */ + var label = $('#cluster-type'); + label.text(label.data("orig_text")); + }); + + $('.clusterblast-selector').change(function() { + var id = $(this).attr('id').replace('-select', ''); + var url = $(this).val(); + $.get(url, function(data) { + $('#' + id + '-svg').html(data); + clusterblast.init(id + '-svg'); + // id = + }, 'html'); + $('#' + id + '-download').off('click'); + $('#' + id + '-download').click(function () { + var url = $("#" + id + "-select").val(); + window.open(url, '_blank'); + }); + }); + + $('.domainalign-selector').change(function() { + var id = $(this).attr('id').replace('-select', ''); + var url = $(this).val(); + $.get(url, function(data) { + $('#' + id + '-svg').html(data); + domainalign.init(id + '-svg'); + // id = + }, 'html'); + $('#' + id + '-download').off('click'); + $('#' + id + '-download').click(function () { + var url = $("#" + id + "-select").val(); + window.open(url, '_blank'); + }); + }); + + $('.cluster-rules-header').click(toggle_cluster_rules); + + switch_to_cluster(); + +}); + </script> + + </body> +</html> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sequence.fasta Wed Feb 07 06:22:58 2018 -0500 @@ -0,0 +1,51 @@ +>ARBH01000003.1 Amycolatopsis balhimycina FH 1894 strain DSM 44591 A3CEDRAFT_scaffold1.1_C3, whole genome shotgun sequence +GGGGCGGCGTGCTCGCCCTGCCCGCCGAGCTGGACCGCCGCGGCGGCGACCGGCTGGCGGCTGTGCTGTC +CGGCGCCACCGGCGAAGACCAGCTGGCCATCCGCGCCGCCGGCGTGTTCGGCCGCCGCGTGGTGCGGGCC +CCGGCCGGCGACCGCGCGCCGGCGCGGACCTGGACCCCGCGCGGCACCACGCTGATCACCGGCGGCACCG +GCACCCTGGCCCCGCACCTGGCCCGCTGGCTGGCCGAGCAGGGCGCCGAGCACATCGTGCTGACCAGCCG +CACCGGCGCCGAGGCCCCGAAGGCCCGGCAGCTGCTGGCGGAGCTGGGCGAGACCGTCGAGGCGGTGGCC +TGCGACGTCACCGACAAGGCGGCGCTGGCCGCCCTGCTGGCGCGGTTGCGGGCCGAGGGCCGGACCGTGC +GGAACGTGGTGCACACGGCCGCCGTGATCGAGCTGCACACGCTGGCCGAGACCGACCTGGCCGCGTTCTC +CCGGACCGTGCACGCCAAGGTGGTGGGCGCGCGCAACCTGGACGAGCTGCTCGACACCGACGAGCTCGAT +GCGTTCGTGCTGTACTCCTCCACCGCCGGCCTGTGGGGCACCGGCGCGCACGCCGCCTACGTGGCCGGCA +ACGCGTACTTGCACGCGCTGGCAGCCCACCGGCGCGCCCGGGGGCTGCGGGCCACCGCGCTGTCGTGGGG +CATCTGGGCCGACGACCGCGAACTCGGCCGGGTCGACCCGGAGCAGATCGTGCGCAGCGGCCTGGTGTTC +ATGGCGCCGGAGCTGGCGCTGGAGGGTCTGCGCCGGGCCCTGGACGACGACGAGACCGCGCTGGCCGTGG +CCGATCTGGACTGGGAGCGGTACTACCCGGTCTACACCGCCGTCCGGCCGACGCTGCTGTTCGACGAGCT +GCCGGAGGTGCGGCGGCTCACCGAGGCCGCCGCCGCCACGGCCGCCACCGGCGCCGGCGGCGAGTTCGCC +GCCCGGCTGCGCACGCTGCCCGAGGCCGAGCGCGCCCACCTGCTCCTGGAACTGGTCCGGGCCGAGGCCG +CGGCCGTGCTGGGCCACGCGTCGGCCGACGCGCTGCCCGAGGACCGCGCCTTCCGCGACGTCGGCTTCGA +CTCGGTCACCGCGGTCGACCTGCGCAACCGGATCTCCGCCGGCACCGGCCTGACCCTGCCCGCCACCATG +GTGTTCGACCACCCGACGCCGAGGCGGCTGGCCGGGTTCCTGGCCGCCACGATCACCGGCTCGGGTGCCG +TCGAGCAGGCACCGGCCGTGGCCGGCGTGGACACCGGCGAGCCCGTCGCCATCATCGGGATGGCCTGCCG +CTACCCGGGTGGCGCGAACACCCCGGAACGGTTGTGGGACCTGGTCGTGGGCGGCGTGGACGCCATCTCC +GGCTTCCCGGCCGACCGCAACTGGCCGACCGACGCGCTCTACGACCCGGACCCGGACGCCGGCGGCAAGA +CCTATTCGGTGCAGGGCGGCTTCCTGCACGAGGCGGCCGAGTTCGACCCGGGCTTCTTCGGCATCTCGCC +GCGGGAGGCACTGTCCATGGATCCGCAGCAGCGCCTGCTGCTGGAGACGGCGTGGGAGGCGTTCGAGCGG +GCCGGGATCGACCCGCACACGCTGCGGGGCAGCGGCACCGGCACCTTCATCGGGGCCAGCTACCAGGACT +ACACCGCGGCCGTGTCCGGCGCGGTGGACAACGCCGACGGCCACATGATCACCGGCTCGCTGGGCAGCAT +CCTGTCCGGCCGGCTCTCCTACCTGTTCGGGCTGGAGGGCCCGGCGGTCACCCTGGACACCGCCTGCTCG +TCGTCGCTGGTCGCCATCCACCTGGCCGCGCAGTCGCTGCGGTCGGGGGAGAGCAGCCTGGCGCTGGCCG +GCGGGGTGAGCGTGATGGCGACGCCGGGGGCGTTCGTCGGCTTCTCGCGCCAGCGCGCACTGGCCACGGA +CGGCCGTTGCAAGGCCTACTCGGACCGGGCCGACGGCATGACCCTCGGCGAGGGCGTCGGCCTGGTGCTG +CTGGAGAAGCTGTCCGACGCGCAGCGCAACGGGCACCGGATCCTGGCGGTGGTCCGGGGTTCGGCCACGA +ACCAGGACGGCGCGTCCAACGGCATGACCGCGCCCAGCGGCCCGTCCCAGCAGCGGGTGATCCGGCAGGC +GCTGGCCAACGCGCGGCTCTCGGCGTCCGAGGTGGACGTGATCGAGGGCCACGGCACCGGCACCGCGCTG +GGCGACCCGATCGAGGCCCAGGCCCTGCTGGCCACCTACGGCCAGGACCGGGAACGGCCGCTGCTGCTCG +GCTCGGTGAAGTCCAACATCGGCCACACCCAGATGGCCTCCGGCGTGGCCGGCGTGATCAAGGTGGTGCA +GGCGCTGCGGCACGGGCTGGTACCCAAGACGCTGCACGTGGACGAGCCCTCCACGCACGTCGACTGGAGC +ACCGGCTCGATCGAGCTGCCGTCCGGCAGCGTGCCGTGGCCGGAGAGCGGCCGGCCGCGCCGGGCCGGTA +TCTCGTCCTTCGGGCTGAGCGGCACGAACGTGCACACCATCCTCGAGCAGGCCCCGGAACCGGCCGCCGA +AGCCGGCCCCGAGCCGGAGCCCGGCCTGGTGCCGGTCCCGCTGTCCGGCCGGACGGAAGCAGCGCTGCGC +GCTCAGGCCGCCACCGTGCTGGACACCCTGGACGACGGCGTGTCGCCGGCCGTGCTCGGGTACTCGCTGG +CCTCCACCCGGTCGGCCTTCGAACACCGTGCGGTGCTGCTGGCCGAGGACCACGACGAACTGCGGCGCGG +CCTGGCCGCACTGGCCGGCGACCAGCCGGACGGCGGCGTGGTGCGGGGCACCGTGACGCGGGGCCGCACG +GCGTTCCTGTTCGCCGGCCAGGGCAGCCAGCGGGCCGGGATGGGCCGCGAGCTGTACGAGCGCCACCCGG +TGTTCGCCGACGCGCTGGACGCGGTGCTGGGGCACTTCGACCTGCCCCGTGCGCTGCGGGACGTGATGTG +GGACGACGATTCCACGGCCCTCGACGAGACGGGGTACACCCAGCCGGCGTTGTTCGCCTTCGAGGTGGCG +TTGTTCCGGTTGCTGGAGTCGTGGGGTGTGACGCCGGATTACCTGGCCGGGCATTCGATCGGTGAGATCG +CCGCGGCGCACGTGGCCGGAGTGTTGTCGCTGGCCGATGCCTGTGCGTTGGTCGCTGCGCGGGGTGCGCT +GATGCAGGCGCTGCCGTCCGGCGGGGCCATGGTTTCGGTGCGCGGCTCCGAGGCCGACGTCGCCGGGCAC +CTCGGCGAGGACGTCGCCGTCGCGGCGGTCAACGGGCCCGAGTCGGTGGTGCTGGCCGGGACCGAGGACG +CGGTGCTCCAGGCGGCCGGCCGCCTGGAGGCCGCCGGCCACAAGGTCCGCCGCCTGCGGGTCAGCCACGC +CTTCCACTCGCCCTTGATGGATCCCGTGCTGGCCGAGTTCGCGACGGTGGCTCAGGGCCTGACCTACCAC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sequence.gb Wed Feb 07 06:22:58 2018 -0500 @@ -0,0 +1,146 @@ +LOCUS ARBH01000003 3500 bp DNA linear BCT 22-APR-2013 +DEFINITION Amycolatopsis balhimycina FH 1894 strain DSM 44591 + A3CEDRAFT_scaffold1.1_C3, whole genome shotgun sequence. +ACCESSION ARBH01000003 ARBH01000000 +VERSION ARBH01000003.1 +DBLINK BioProject: PRJNA165347 + BioSample: SAMN02256403 +KEYWORDS WGS; GSC:MIGS:2.1; IMPROVED_HIGH_QUALITY_DRAFT. +SOURCE Amycolatopsis balhimycina FH 1894 + ORGANISM Amycolatopsis balhimycina FH 1894 + Bacteria; Actinobacteria; Pseudonocardiales; Pseudonocardiaceae; + Amycolatopsis. +REFERENCE 1 (bases 1 to 3500) + AUTHORS Klenk,H.-P., Huntemann,M., Han,J., Chen,A., Kyrpides,N., + Mavromatis,K., Markowitz,V., Palaniappan,K., Ivanova,N., + Schaumberg,A., Pati,A., Liolios,K., Nordberg,H.P., Cantor,M.N., + Hua,S.X. and Woyke,T. + TITLE Direct Submission + JOURNAL Submitted (16-APR-2013) DOE Joint Genome Institute, 2800 Mitchell + Drive, Walnut Creek, CA 94598-1698, USA +COMMENT URL -- http://www.jgi.doe.gov + JGI Project ID: 404843 + Source DNA and Organism available from Hans-Peter Klenk + (hpk@dsmz.de) + Source DNA available from Hans-Peter Klenk (hpk@dsmz.de) + Organism available from Hans-Peter Klenk (hpk@dsmz.de) + Contacts: Hans-Peter Klenk (hpk@dsmz.de) + Tanja Woyke (microbe@cuba.jgi-psf.org) + Whole genome sequencing and draft assembly at JGI-PGF + Annotation by JGI-ORNL + The JGI and collaborators endorse the principles for the + distribution and use of large scale sequencing data adopted by the + larger genome sequencing community and urge users of this data to + follow them. It is our intention to publish the work of this + project in a timely fashion and we welcome collaborative + interaction on the project and analysis. + (http://www.genome.gov/page.cfm?pageID=10506376) + Full annotations are available from IMG. + + ##MIGS-Data-START## + investigation_type :: bacteria_archaea + project_name :: Amycolatopsis balhimycina DSM 44591 + collection_date :: Missing + lat_lon :: Missing + depth :: Missing + alt_elev :: Missing + country :: India + num_replicons :: Missing + ref_biomaterial :: DSM 44591 + biotic_relationship :: Free living + rel_to_oxygen :: Aerobe + isol_growth_condt :: Missing + assembly :: Velvet v. 1.1.05; ALLPATHS v. r38445; Phrap + v. 4.24 + finishing_strategy :: Missing + environment :: Soil + trophic_level :: Missing + sequencing_meth :: WGS + GOLD Stamp ID :: Gi10728 + Type Strain :: Yes + Funding Program :: DOE-CSP 2011 + Isolation Site :: Soil + Cell Shape :: Filament-shaped + Motility :: Nonmotile + Sporulation :: Sporulating + Temperature Range :: Mesophile + Gram Staining :: Gram+ + Diseases :: None + ##MIGS-Data-END## + + ##Genome-Assembly-Data-START## + Finishing Goal :: Improved High-Quality Draft + Current Finishing Status :: Improved High-Quality Draft + Assembly Method :: Velvet v. 1.1.05; ALLPATHS v. r38445; + Phrap v. 4.24 + Genome Coverage :: Unknown + Sequencing Technology :: Illumina GAii + ##Genome-Assembly-Data-END## +FEATURES Location/Qualifiers + source 1..3500 + /organism="Amycolatopsis balhimycina FH 1894" + /mol_type="genomic DNA" + /strain="DSM 44591" + /culture_collection="DSM:44591" + /db_xref="taxon:1089545" +ORIGIN + 1 ggggcggcgt gctcgccctg cccgccgagc tggaccgccg cggcggcgac cggctggcgg + 61 ctgtgctgtc cggcgccacc ggcgaagacc agctggccat ccgcgccgcc ggcgtgttcg + 121 gccgccgcgt ggtgcgggcc ccggccggcg accgcgcgcc ggcgcggacc tggaccccgc + 181 gcggcaccac gctgatcacc ggcggcaccg gcaccctggc cccgcacctg gcccgctggc + 241 tggccgagca gggcgccgag cacatcgtgc tgaccagccg caccggcgcc gaggccccga + 301 aggcccggca gctgctggcg gagctgggcg agaccgtcga ggcggtggcc tgcgacgtca + 361 ccgacaaggc ggcgctggcc gccctgctgg cgcggttgcg ggccgagggc cggaccgtgc + 421 ggaacgtggt gcacacggcc gccgtgatcg agctgcacac gctggccgag accgacctgg + 481 ccgcgttctc ccggaccgtg cacgccaagg tggtgggcgc gcgcaacctg gacgagctgc + 541 tcgacaccga cgagctcgat gcgttcgtgc tgtactcctc caccgccggc ctgtggggca + 601 ccggcgcgca cgccgcctac gtggccggca acgcgtactt gcacgcgctg gcagcccacc + 661 ggcgcgcccg ggggctgcgg gccaccgcgc tgtcgtgggg catctgggcc gacgaccgcg + 721 aactcggccg ggtcgacccg gagcagatcg tgcgcagcgg cctggtgttc atggcgccgg + 781 agctggcgct ggagggtctg cgccgggccc tggacgacga cgagaccgcg ctggccgtgg + 841 ccgatctgga ctgggagcgg tactacccgg tctacaccgc cgtccggccg acgctgctgt + 901 tcgacgagct gccggaggtg cggcggctca ccgaggccgc cgccgccacg gccgccaccg + 961 gcgccggcgg cgagttcgcc gcccggctgc gcacgctgcc cgaggccgag cgcgcccacc + 1021 tgctcctgga actggtccgg gccgaggccg cggccgtgct gggccacgcg tcggccgacg + 1081 cgctgcccga ggaccgcgcc ttccgcgacg tcggcttcga ctcggtcacc gcggtcgacc + 1141 tgcgcaaccg gatctccgcc ggcaccggcc tgaccctgcc cgccaccatg gtgttcgacc + 1201 acccgacgcc gaggcggctg gccgggttcc tggccgccac gatcaccggc tcgggtgccg + 1261 tcgagcaggc accggccgtg gccggcgtgg acaccggcga gcccgtcgcc atcatcggga + 1321 tggcctgccg ctacccgggt ggcgcgaaca ccccggaacg gttgtgggac ctggtcgtgg + 1381 gcggcgtgga cgccatctcc ggcttcccgg ccgaccgcaa ctggccgacc gacgcgctct + 1441 acgacccgga cccggacgcc ggcggcaaga cctattcggt gcagggcggc ttcctgcacg + 1501 aggcggccga gttcgacccg ggcttcttcg gcatctcgcc gcgggaggca ctgtccatgg + 1561 atccgcagca gcgcctgctg ctggagacgg cgtgggaggc gttcgagcgg gccgggatcg + 1621 acccgcacac gctgcggggc agcggcaccg gcaccttcat cggggccagc taccaggact + 1681 acaccgcggc cgtgtccggc gcggtggaca acgccgacgg ccacatgatc accggctcgc + 1741 tgggcagcat cctgtccggc cggctctcct acctgttcgg gctggagggc ccggcggtca + 1801 ccctggacac cgcctgctcg tcgtcgctgg tcgccatcca cctggccgcg cagtcgctgc + 1861 ggtcggggga gagcagcctg gcgctggccg gcggggtgag cgtgatggcg acgccggggg + 1921 cgttcgtcgg cttctcgcgc cagcgcgcac tggccacgga cggccgttgc aaggcctact + 1981 cggaccgggc cgacggcatg accctcggcg agggcgtcgg cctggtgctg ctggagaagc + 2041 tgtccgacgc gcagcgcaac gggcaccgga tcctggcggt ggtccggggt tcggccacga + 2101 accaggacgg cgcgtccaac ggcatgaccg cgcccagcgg cccgtcccag cagcgggtga + 2161 tccggcaggc gctggccaac gcgcggctct cggcgtccga ggtggacgtg atcgagggcc + 2221 acggcaccgg caccgcgctg ggcgacccga tcgaggccca ggccctgctg gccacctacg + 2281 gccaggaccg ggaacggccg ctgctgctcg gctcggtgaa gtccaacatc ggccacaccc + 2341 agatggcctc cggcgtggcc ggcgtgatca aggtggtgca ggcgctgcgg cacgggctgg + 2401 tacccaagac gctgcacgtg gacgagccct ccacgcacgt cgactggagc accggctcga + 2461 tcgagctgcc gtccggcagc gtgccgtggc cggagagcgg ccggccgcgc cgggccggta + 2521 tctcgtcctt cgggctgagc ggcacgaacg tgcacaccat cctcgagcag gccccggaac + 2581 cggccgccga agccggcccc gagccggagc ccggcctggt gccggtcccg ctgtccggcc + 2641 ggacggaagc agcgctgcgc gctcaggccg ccaccgtgct ggacaccctg gacgacggcg + 2701 tgtcgccggc cgtgctcggg tactcgctgg cctccacccg gtcggccttc gaacaccgtg + 2761 cggtgctgct ggccgaggac cacgacgaac tgcggcgcgg cctggccgca ctggccggcg + 2821 accagccgga cggcggcgtg gtgcggggca ccgtgacgcg gggccgcacg gcgttcctgt + 2881 tcgccggcca gggcagccag cgggccggga tgggccgcga gctgtacgag cgccacccgg + 2941 tgttcgccga cgcgctggac gcggtgctgg ggcacttcga cctgccccgt gcgctgcggg + 3001 acgtgatgtg ggacgacgat tccacggccc tcgacgagac ggggtacacc cagccggcgt + 3061 tgttcgcctt cgaggtggcg ttgttccggt tgctggagtc gtggggtgtg acgccggatt + 3121 acctggccgg gcattcgatc ggtgagatcg ccgcggcgca cgtggccgga gtgttgtcgc + 3181 tggccgatgc ctgtgcgttg gtcgctgcgc ggggtgcgct gatgcaggcg ctgccgtccg + 3241 gcggggccat ggtttcggtg cgcggctccg aggccgacgt cgccgggcac ctcggcgagg + 3301 acgtcgccgt cgcggcggtc aacgggcccg agtcggtggt gctggccggg accgaggacg + 3361 cggtgctcca ggcggccggc cgcctggagg ccgccggcca caaggtccgc cgcctgcggg + 3421 tcagccacgc cttccactcg cccttgatgg atcccgtgct ggccgagttc gcgacggtgg + 3481 ctcagggcct gacctaccac +//
--- a/tool-data/antismash.loc.sample Tue Jul 15 14:34:55 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -#This is a sample file distributed with Galaxy that is used to define a -#list of PFAM hmms, using three columns tab separated -#(longer whitespace are TAB characters): -# -#The entries are as follows: -# -#<unique_id> <PFAM name> <path> -# -#Your antismash.loc file should include an entry per line. For example: -# -#pfam01 PFAM 27 08 Aug 2013 /data/0/galaxy_data/antismash/pfam/08_08_2013/ -# -#...etc... -# - -
--- a/tool_data_table_conf.xml.sample Tue Jul 15 14:34:55 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> -<tables> - <!-- Locations of indexes in the AntiSmash Pfam database --> - <table name="antismash_pfam" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="tool-data/antismash.loc" /> - </table> -</tables>
--- a/tool_dependencies.xml Tue Jul 15 14:34:55 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,217 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="blast+" version="2.2.28"> - <repository changeset_revision="23b9ba41ad00" name="package_blast_plus_2_2_28" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="hmmer" version="3.0"> - <repository changeset_revision="bae6be83ada5" name="package_hmmer_3_0" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="muscle" version="3.8.31"> - <repository changeset_revision="8f20b47725d8" name="package_muscle_3_8_31" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="biopython" version="1.62"> - <repository changeset_revision="3e82cbc44886" name="package_biopython_1_62" owner="biopython" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="helperlibs" version="0.1.2"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://pypi.python.org/packages/source/h/helperlibs/helperlibs-0.1.2.tar.gz</action> - <action type="make_directory">$INSTALL_DIR/lib/python</action> - <action type="shell_command"> - export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && - python setup.py install --install-lib $INSTALL_DIR/lib/python --install-scripts $INSTALL_DIR/bin - </action> - <action type="set_environment"> - <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> - helperlibs python library - </readme> - </package> - <package name="cssselect" version="0.9"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://pypi.python.org/packages/source/c/cssselect/cssselect-0.9.tar.gz</action> - <action type="make_directory">$INSTALL_DIR/lib/python</action> - <action type="shell_command"> - export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && - python setup.py install --install-lib $INSTALL_DIR/lib/python --install-scripts $INSTALL_DIR/bin - </action> - <action type="set_environment"> - <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> - cssselect python library - </readme> - </package> - <package name="pyquery" version="1.2.6"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://github.com/gawel/pyquery/archive/1.2.6.tar.gz</action> - <action type="make_directory">$INSTALL_DIR/lib/python</action> - <action type="shell_command"> - export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && - python setup.py install --install-lib $INSTALL_DIR/lib/python --install-scripts $INSTALL_DIR/bin - </action> - <action type="set_environment"> - <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> - pyquery python library - </readme> - </package> - <package name="straight.plugin" version="1.4.0-post-1"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://pypi.python.org/packages/source/s/straight.plugin/straight.plugin-1.4.0-post-1.tar.gz</action> - <action type="make_directory">$INSTALL_DIR/lib/python</action> - <action type="shell_command"> - export PYTHONPATH=$PYTHONPATH:$INSTALL_DIR/lib/python && - python setup.py install --install-lib $INSTALL_DIR/lib/python --install-scripts $INSTALL_DIR/bin - </action> - <action type="set_environment"> - <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> - straight.plugin python library - </readme> - </package> - <package name="antismash" version="2.0.2"> - <install version="1.0"> - <actions_group> - <!-- Download the binaries for MUSCLE compatible with 64-bit OSX. --> - <actions architecture="x86_64" os="darwin"> - <action type="download_by_url">https://bitbucket.org/antismash/antismash2/downloads/antiSMASH2.0.2_macosx.zip</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - <action type="chmod"> - <file mode="750">$INSTALL_DIR/run_antismash.py</file> - </action> - <!-- As last step we created an additional output plugin, that generates protein FASTA files from all clusters and - we need to copy this plugin into the correct location. - --> - <action type="download_file">https://raw.github.com/bgruening/galaxytools/master/antismash/src/genecluster_sequence/__init__.py</action> - <action type="make_directory">$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</action> - <action type="move_file"> - <source>__init__.py</source> - <destination>$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</destination> - </action> - <!-- AntiSmash has a prerequirement check that is not useful for us, because we - do not offer the option for genprediction and do not install glimmer and Co. - For that reason deactivate the check. - --> - <action type="shell_command">sed -i 's/check_prereqs(plugins, options) > 0/False/g' $INSTALL_DIR/run_antismash.py</action> - <action type="download_file">https://bitbucket.org/antismash/antismash2/downloads/clusterblast.tar.gz</action> - <action type="shell_command">tar xfvz clusterblast.tar.gz -C $INSTALL_DIR/antismash/generic_modules/clusterblast</action> - </actions> - <!-- Download the binaries for MUSCLE compatible with 64-bit OSX. --> - <actions architecture="i386" os="darwin"> - <action type="download_by_url">https://bitbucket.org/antismash/antismash2/downloads/antiSMASH2.0.2_macosx.zip</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - <action type="chmod"> - <file mode="750">$INSTALL_DIR/run_antismash.py</file> - </action> - <!-- As last step we created an additional output plugin, that generates protein FASTA files from all clusters and - we need to copy this plugin into the correct location. - --> - <action type="download_file">https://raw.github.com/bgruening/galaxytools/master/antismash/src/genecluster_sequence/__init__.py</action> - <action type="make_directory">$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</action> - <action type="move_file"> - <source>__init__.py</source> - <destination>$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</destination> - </action> - <!-- AntiSmash has a prerequirement check that is not useful for us, because we - do not offer the option for genprediction and do not install glimmer and Co. - For that reason deactivate the check. - --> - <action type="shell_command">sed -i 's/check_prereqs(plugins, options) > 0/False/g' $INSTALL_DIR/run_antismash.py</action> - <action type="download_file">https://bitbucket.org/antismash/antismash2/downloads/clusterblast.tar.gz</action> - <action type="shell_command">tar xfvz clusterblast.tar.gz -C $INSTALL_DIR/antismash/generic_modules/clusterblast</action> - </actions> - <!-- Download the binaries for AntiSmash compatible with 64-bit Linux. --> - <actions architecture="x86_64" os="linux"> - <action target_filename="antismash-2.0.2.tar.bz2" type="download_by_url">https://bitbucket.org/antismash/antismash2/downloads/antismash-2.0.2.x86_64.tar.bz2</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - <action type="chmod"> - <file mode="750">$INSTALL_DIR/run_antismash.py</file> - </action> - <!-- As last step we created an additional output plugin, that generates protein FASTA files from all clusters and - we need to copy this plugin into the correct location. - --> - <action type="download_file">https://raw.github.com/bgruening/galaxytools/master/antismash/src/genecluster_sequence/__init__.py</action> - <action type="make_directory">$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</action> - <action type="move_file"> - <source>__init__.py</source> - <destination>$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</destination> - </action> - <!-- AntiSmash has a prerequirement check that is not useful for us, because we - do not offer the option for genprediction and do not install glimmer and Co. - For that reason deactivate the check. - --> - <action type="shell_command">sed -i 's/check_prereqs(plugins, options) > 0/False/g' $INSTALL_DIR/run_antismash.py</action> - <action type="download_file">https://bitbucket.org/antismash/antismash2/downloads/clusterblast.tar.gz</action> - <action type="shell_command">tar xfvz clusterblast.tar.gz -C $INSTALL_DIR/antismash/generic_modules/clusterblast</action> - </actions> - <!-- Download the binaries for AntiSmash compatible with 32-bit Linux. --> - <actions architecture="i386" os="linux"> - <action type="download_by_url">https://bitbucket.org/antismash/antismash2/downloads/antismash-2.0.2.i686.tar.bz2</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - <action type="chmod"> - <file mode="750">$INSTALL_DIR/run_antismash.py</file> - </action> - <!-- As last step we created an additional output plugin, that generates protein FASTA files from all clusters and - we need to copy this plugin into the correct location. - --> - <action type="download_file">https://raw.github.com/bgruening/galaxytools/master/antismash/src/genecluster_sequence/__init__.py</action> - <action type="make_directory">$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</action> - <action type="move_file"> - <source>__init__.py</source> - <destination>$INSTALL_DIR/antismash/output_modules/genecluster_sequence/</destination> - </action> - <!-- AntiSmash has a prerequirement check that is not useful for us, because we - do not offer the option for genprediction and do not install glimmer and Co. - For that reason deactivate the check. - --> - <action type="shell_command">sed -i 's/check_prereqs(plugins, options) > 0/False/g' $INSTALL_DIR/run_antismash.py</action> - <action type="download_file">https://bitbucket.org/antismash/antismash2/downloads/clusterblast.tar.gz</action> - <action type="shell_command">tar xfvz clusterblast.tar.gz -C $INSTALL_DIR/antismash/generic_modules/clusterblast</action> - </actions> - <actions> - - </actions> - <!-- The $PATH environment variable is only set if one of the above <actions> tags resulted in a successful installation. --> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable> - <environment_variable action="append_to" name="PYTHONPATH">$INSTALL_DIR</environment_variable> - </action> - </actions_group> - </install> - <readme> - AntiSmash installation with all dependencies. - Java needs to be installed. - </readme> - </package> -</tool_dependency>