Mercurial > repos > gga > tripal_analysis_load_gff3
changeset 0:5ce8ae1288c1 draft
planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/tripal commit f745b23c84a615bf434d717c8c0e553a012f0268
| author | gga | 
|---|---|
| date | Mon, 11 Sep 2017 05:51:14 -0400 | 
| parents | |
| children | f772d70f80e9 | 
| files | analysis_load_gff3.xml macros.xml test-data/blast.xml test-data/blast2go.gaf test-data/citrus_genome.fasta test-data/interpro.xml test-data/sample.gff3 tripal.py | 
| diffstat | 8 files changed, 1022 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/analysis_load_gff3.xml Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,187 @@ +<?xml version="1.0"?> +<tool id="analysis_load_gff3" profile="16.04" name="Load a GFF3 annotation file" version="@WRAPPER_VERSION@.0"> + <description>into Tripal</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <code file="tripal.py"/> + <expand macro="stdio"/> + <command><![CDATA[ + tmp_dir=`mktemp -d -p "@DATA_DIR@"` && chmod a+rx "\${tmp_dir}" + + && + + cp '${gff3}' "\${tmp_dir}/annotation.gff3" + + && + + @AUTH@ + + tripaille analysis load_gff3 + + --organism_id '${organism}' + + --analysis_id '${analysis}' + + --import_mode '${import_mode}' + + #if str($naming.mode) == 'manual': + --re_mrna '${naming.re_mrna}' + --re_protein '${naming.re_protein}' + #end if + + #if str($advanced.landmark_type): + --landmark_type '${advanced.landmark_type}' + #end if + + #if str($advanced.alt_id_attr): + --alt_id_attr '${advanced.alt_id_attr}' + #end if + + ${advanced.create_organism} + + #if str($target.target_organism): + --target_organism '${target.target_organism}' + #end if + + #if str($target.target_type): + --target_type '${target.target_type}' + #end if + + ${target.target_create} + + "\${tmp_dir}/annotation.gff3" + + && + + echo "Data loaded" > $results + ]]></command> + <inputs> + <param name="gff3" + type="data" + format="gff3" + label="Annotation file" /> + + <param argument="--organism" + type="select" + dynamic_options="list_organisms()" + label="Organism" /> + + <param argument="--analysis" + type="select" + dynamic_options="list_analyses()" + label="Analysis" /> + + <param name="import_mode" + argument="--import_mode" + type="select" + label="Loading method"> + <option value="update" selected="true">Import everything, update when already existing</option> + <option value="add_only">Import only new features</option> + </param> + + <conditional name="naming"> + <param name="mode" + type="select" + label="Naming method for proteins" + help="in case your GFF file does not contain polypeptide features"> + <option value="auto">Automatic</option> + <option value="manual">Manual</option> + </param> + <when value="auto"/> + <when value="manual"> + <param name="re_mrna" + argument="--re_mrna" + type="text" + label="Regular expression for the mRNA name" + help="this regex will be used to extract parts of the mRNA feature name"> + <expand macro="sanitized"/> + </param> + + <param name="re_protein" + argument="--re_protein" + type="text" + label="Replacement string for the protein name" + help="will be used to generate the protein name based on the mRNA name"> + <expand macro="sanitized"/> + </param> + </when> + </conditional> + + <section name="advanced" title="Advanced options" expanded="False"> + <param name="landmark_type" + argument="--landmark_type" + type="text" + optional="true" + label="Landmark type" + help="A Sequence Ontology type for the landmark sequences in the GFF fie (e.g. \'chromosome\'). Will be used to create them if they don't already exist." /> + + <param name="alt_id_attr" + argument="--alt_id_attr" + type="text" + optional="true" + label="ID attribute" + help="Name of the GFF attribute that contains a unique identifier for each feature. Leave empty to use the 'ID' attribute" /> + + <param name="create_organism" + argument="--create_organism" + type="boolean" + checked="false" + truevalue="--create_organism" + falsevalue="" + label="Create organisms specified in 'organism' attribute" + help="If not found, create features referenced in the target attribute." /> + </section> + + <section name="target" title="Target attribute handling" expanded="False"> + <param name="target_organism" + argument="--target_organism" + type="text" + optional="true" + label="Target organism name" + help="Name of organism corresponding to target attribute. Abbreviation or common name as created with the 'Create Organism' tool." /> + <param name="target_type" + argument="--target_type" + type="text" + optional="true" + label="Target feature type" + help="Type of features referenced in the target attribute. Should be a Sequence Ontology term." /> + <param name="target_create" + argument="--target_create" + type="boolean" + checked="false" + truevalue="--target_create" + falsevalue="" + label="Create target features" + help="If not found, create features referenced in the target attribute." /> + </section> + </inputs> + <outputs> + <data format="txt" name="results" label="Load GFF3 into Tripal" /> + </outputs> + <tests> + <test expect_failure="true" expect_exit_code="1"> + <param name="gff3" value="sample.gff3" /> + <param name="organism" value="Testus testus" /> + <param name="analysis" value="Annotation xx" /> + <conditional name="naming"> + <param name="mode" value="manual" /> + <param name="re_mrna" value="([a-z]{4}[0-9]+)_rna" /> + <param name="re_protein" value="([a-z]{4}[0-9]+)_prot" /> + </conditional> + + <expand macro="test_result" /> + </test> + </tests> + <help><![CDATA[ + @HELP_OVERVIEW@ + + **Load GFF3** + + With this tool, you can load features from a GFF3 file into the Tripal/Chado database. + + @HELP@ + ]]></help> + <expand macro="citation"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,215 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.0.4">python-tripal</requirement> + <yield/> + </requirements> + </xml> + + <xml name="stdio"> + <stdio> + <regex level="fatal" match="Exception:" source="stderr" /> + <regex level="fatal" match="error" source="stderr" /> + <exit_code range="1:" /> + </stdio> + </xml> + + <token name="@WRAPPER_VERSION@">2.0.4</token> + + <xml name="citation"> + <citations> + <citation type="doi">10.1093/database/bat075</citation> + </citations> + </xml> + + <token name="@HELP_OVERVIEW@"><![CDATA[ + **Python-tripal Overview** + + Python-tripal provides several tools allowing to load data into a remote Chado-based Tripal database. + The tripal_rest_api Tripal module must be installed on the remote server to use these tools. + ]]></token> + + <token name="@HELP@"><![CDATA[ + **Useful Links** + + `Tripal project website <http://tripal.info/>`_ + + `Tripal REST API module <http://github.com/abretaud/tripal_rest_api>`_: a Tripal module required to use these galaxy tools + ]]></token> + + <token name="@DATA_DIR@">\$GALAXY_TRIPAL_SHARED_DIR</token> + + <token name="@AUTH@"><![CDATA[ + echo "__default: local" > '.auth.yml' && + echo "local:" >> '.auth.yml' && + echo " tripal_url: \"\$GALAXY_TRIPAL_URL\"" >> '.auth.yml' && + echo " username: \"\$GALAXY_TRIPAL_USER\"" >> '.auth.yml' && + echo " password: \"\$GALAXY_TRIPAL_PASSWORD\"" >> '.auth.yml' && + + TRIPAILLE_GLOBAL_CONFIG_PATH='.auth.yml' + ]]></token> + + <xml name="test_result"> + <assert_stderr> + <has_text text="MissingSchema" /> + </assert_stderr> + </xml> + + <xml name="analysis"> + <section name="analysis" title="Analysis" expanded="True"> + <param name="name" + type="text" + label="The analysis name [mandatory]"> + <expand macro="sanitized"/> + </param> + + <param name="program" + type="text" + label="Program name [mandatory]" + help="Name of the program that was used to perform this analysis"> + <expand macro="sanitized"/> + </param> + <param name="program_version" + type="text" + label="Program version [mandatory]" + help="Version of the program that was used to perform this analysis"/> + <param name="source" + type="text" + label="Source name [mandatory]" + help="Where this analysis comes from"> + <expand macro="sanitized"/> + </param> + + <param name="source_version" + argument="--sourceversion" + type="text" + optional="True" + label="Source version" /> + <param name="source_uri" + argument="--sourceuri" + type="text" + optional="True" + label="Source URI" + help="URI where source data was retrieved" /> + <param name="algorithm" + argument="--algorithm" + type="text" + label="Algorithm" + optional="True" + help="Algorithm of the program that was used to perform this analysis" /> + <param name="description" + argument="--description" + type="text" + optional="True" + label="Analysis description"> + <expand macro="sanitized"/> + </param> + <param name="date" + argument="--date_executed" + type="text" + optional="True" + label="Execution date" + help="Format: yyyy-mm-dd Default: Today"> + <validator type="regex" message="Date in YYYY-MM-DD format">^[0-9]{4}-[0-9]{2}-[0-9]{2}$</validator> + </param> + </section> + </xml> + + <xml name="feature_rel"> + <param name="rel_subject_re" + argument="--rel-subject-re" + type="text" + label="Regular expression to extract the unique name of the parent feature" + help="this regex will be applied on the fasta definition line to generate the unique name of the parent feature"> + <expand macro="sanitized"/> + </param> + + <param name="rel_subject_type" + argument="--rel-subject-type" + type="text" + label="Sequence type of the parent" + help="this should be a Sequence Ontology term" /> + </xml> + + <xml name="match_type"> + <param name="match_type" + argument="--match-type" + type="select" + label="Match type for already loaded features"> + <option value="uniquename" selected="true">Unique name</option> + <option value="name">Name</option> + </param> + </xml> + + <xml name="matching"> + <section name="matching" title="Feature matching" expanded="True"> + <param name="query_type" + type="text" + label="The feature type of the blast query" + help="It must be a valid Sequence Ontology term. e.g. 'contig', 'gene', 'mRNA', 'polypeptide'" /> + + <param name="query_uniquename" + type="boolean" + checked="false" + truevalue="--query_uniquename" + falsevalue="" + label="Find blast query features using their Unique name" + help="Feature name will be used otherwise" /> + + <param name="query_re" + type="text" + optional="true" + label="Regular expression to extract the feature name from the blast query name" + help="leave empty if the first word in query name is sufficient"> + <expand macro="sanitized"/> + </param> + </section> + </xml> + + <xml name="sanitized"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="'"'"'"/> + <add source="(" target="\("/> + <add source=")" target="\)"/> + </mapping> + </sanitizer> + </xml> + + <token name="@MATCHING@"><![CDATA[ + #if str($matching.query_re): + --query_re '${matching.query_re}' + #end if + + $matching.query_uniquename + + --query_type '$matching.query_type' + ]]></token> + + <token name="@ANALYSIS@"><![CDATA[ + #if $analysis.algorithm: + --algorithm '$analysis.algorithm' + #end if + #if $analysis.source_version: + --sourceversion '$analysis.source_version' + #end if + #if $analysis.source_uri: + --sourceuri '$analysis.source_uri' + #end if + #if $analysis.description: + --description '$analysis.description' + #end if + #if $analysis.date: + --date_executed '$analysis.date' + #end if + + '$analysis.name' + '$analysis.program' + '$analysis.program_version' + '$analysis.source' + ]]></token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blast.xml Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,68 @@ +<?xml version="1.0"?> +<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd"> +<BlastOutput> + <BlastOutput_program>blastx</BlastOutput_program> + <BlastOutput_version>blastx 2.2.25 [Feb-01-2011]</BlastOutput_version> + <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference> + <BlastOutput_db>/scratch/mainlab/data/lib/nr</BlastOutput_db> + <BlastOutput_query-ID>lcl|1_0</BlastOutput_query-ID> + <BlastOutput_query-def>orange1.1g015632m PAC:18136217 (mRNA) Citrus sinensis</BlastOutput_query-def> + <BlastOutput_query-len>2075</BlastOutput_query-len> + <BlastOutput_param> + <Parameters> + <Parameters_matrix>BLOSUM62</Parameters_matrix> + <Parameters_expect>1e-06</Parameters_expect> + <Parameters_gap-open>11</Parameters_gap-open> + <Parameters_gap-extend>1</Parameters_gap-extend> + <Parameters_filter>F</Parameters_filter> + </Parameters> + </BlastOutput_param> + <BlastOutput_iterations> + <Iteration> + <Iteration_iter-num>1</Iteration_iter-num> + <Iteration_query-ID>lcl|1_0</Iteration_query-ID> + <Iteration_query-def>orange1.1g015632m PAC:18136217 (mRNA) Citrus sinensis</Iteration_query-def> + <Iteration_query-len>2075</Iteration_query-len> + <Iteration_hits> + <Hit> + <Hit_num>1</Hit_num> + <Hit_id>gi|224068663|ref|XP_002302794.1|</Hit_id> + <Hit_def>predicted protein [Populus trichocarpa] >gi|222844520|gb|EEE82067.1| predicted protein [Populus trichocarpa]</Hit_def> + <Hit_accession>XP_002302794</Hit_accession> + <Hit_len>409</Hit_len> + <Hit_hsps> + <Hsp> + <Hsp_num>1</Hsp_num> + <Hsp_bit-score>792.727</Hsp_bit-score> + <Hsp_score>2046</Hsp_score> + <Hsp_evalue>0</Hsp_evalue> + <Hsp_query-from>559</Hsp_query-from> + <Hsp_query-to>1767</Hsp_query-to> + <Hsp_hit-from>1</Hsp_hit-from> + <Hsp_hit-to>409</Hsp_hit-to> + <Hsp_query-frame>1</Hsp_query-frame> + <Hsp_identity>387</Hsp_identity> + <Hsp_positive>394</Hsp_positive> + <Hsp_gaps>6</Hsp_gaps> + <Hsp_align-len>409</Hsp_align-len> + <Hsp_qseq>MASVSVVPASG------NTVGVDRLPEEMNDMKIRDDKEMEATVVDGNGTEAGHIIVTTIGGKNGQPKQTISYMAERVVGHGSFGVVFQAKCLETGEAVAIKKVLQDKRYKNRELQTMRLLDHPNVVSLKHCFFSTTEKDELYLNLVLEYVPETVHRVIKHHYKMSQRMPLIYVKLYFYQICRALAYIHNTIGVCHRDIKPQNLLVNPHTHQLKLCDFGSAKVLVKGEPNISYICSRYYRAPELIFGATEYTAAIDIWSAGCVLAELLLGQPLFPGESGVDQLVEIIKVLGTPTREEIKCMNPNYTEFKFPQIKAHPWHKIFQKRMPPEAVDLVSRLLQYSPNLRSTALEALIHPFFDELRDPNTRLPNGRFLPPLFNFKPHELKGVPVDMLVKLIPEHARKQCAFLGL</Hsp_qseq> + <Hsp_hseq>MASVSVVPASGLRDTLGNTTGVDKLPEEMNDMKISDDKEMEAAVVDGNGTETGHIIVTTIGGKNGQPKQTISYMAERVVGHGSFGLVFQAKCLETGETVAIKKVLQDKRYKNRELQTMRLLDHPNVVSLKHCFFSTTEKDELYLNLVLEYVPETIHRVIKHYYKMSQRMPLIYVKLYFYQICRALAYIHNSIGVCHRDIKPQNLLVNPHTHQVKLCDFGSAKVLVKGEPNISYICSRYYRAPELIFGATEYTTAIDIWSAGCVLAELLLGQPLFPGESGVDQLVEIIKVLGTPTREEIKCMNPNYTEFKFPQIKAHPWHKIFHKRMPPEAVDLVSRLLQYSPNLRSTALEALIHPFFDELRDPNARLPNGRILPPLFNFKPHELKGVPVEMLVKLIPEHARKQCAFLGL</Hsp_hseq> + <Hsp_midline>MASVSVVPASG NT GVD+LPEEMNDMKI DDKEMEA VVDGNGTE GHIIVTTIGGKNGQPKQTISYMAERVVGHGSFG+VFQAKCLETGE VAIKKVLQDKRYKNRELQTMRLLDHPNVVSLKHCFFSTTEKDELYLNLVLEYVPET+HRVIKH+YKMSQRMPLIYVKLYFYQICRALAYIHN+IGVCHRDIKPQNLLVNPHTHQ+KLCDFGSAKVLVKGEPNISYICSRYYRAPELIFGATEYT AIDIWSAGCVLAELLLGQPLFPGESGVDQLVEIIKVLGTPTREEIKCMNPNYTEFKFPQIKAHPWHKIF KRMPPEAVDLVSRLLQYSPNLRSTALEALIHPFFDELRDPN RLPNGR LPPLFNFKPHELKGVPV+MLVKLIPEHARKQCAFLGL</Hsp_midline> + </Hsp> + </Hit_hsps> + </Hit> + </Iteration_hits> + <Iteration_stat> + <Statistics> + <Statistics_db-num>18996442</Statistics_db-num> + <Statistics_db-len>6510958228</Statistics_db-len> + <Statistics_hsp-len>0</Statistics_hsp-len> + <Statistics_eff-space>0</Statistics_eff-space> + <Statistics_kappa>0.041</Statistics_kappa> + <Statistics_lambda>0.267</Statistics_lambda> + <Statistics_entropy>0.14</Statistics_entropy> + </Statistics> + </Iteration_stat> + </Iteration> + </BlastOutput_iterations> +</BlastOutput>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blast2go.gaf Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,6 @@ +!gaf-version: 2.1 + gi|328696447|ref|XP_003240026.1| GO:0016021 + gi|328696447|ref|XP_003240026.1| GO:0006511 + gi|328696447|ref|XP_003240026.1| GO:0030145 + gi|328696447|ref|XP_003240026.1| GO:0004803 + gi|328696447|ref|XP_003240026.1| GO:0004177
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/citrus_genome.fasta Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,10 @@ +>scaffold00001 length=5927163 +TTTTGTATTCTATGTCCTCTGATCTTTATACTTCTTCATTTTGTCTTTGCAAGAACCGGA +ATTATGGGTACATCACAAATTCTCTAGGTGTGACTTGTGTTGTGGGGCCTTTTTTTtACA +TTTCCATATTGCAAGTATTTTTTTGCTACCATTGGTATATTTGTCTGTTAAAATCAATCT +GCTTTCACTTATGTTCGTGCGTTCTTGTTCCCTCGCCTTGCAATTGCATATCTCAAATTA +TCTTTCTTACTTTGATTTAGATGGCCAAGGTTTTAAGCTAACTTTTTACAATGCCAATTT +TTAAATGGTTTTCTAATGCTGTTCAAAGTTGCAGCCTTTACTTCGTATATTTGTCAGGTT +CTGACGGGTGCGGTCGGCGGCGGGGGCTATAGCATGCGGTCTCGAGAGCCGCAAAGAAAA +ATGGGTGGTTTTCCCGGTTTCGGCCATAACTCGTGATCGGGGCCTCCGATTCTGGTTCCG +TTTCGTCCCACGGGACCAGCCGGGCGGGGGCATCGGATTGCAAAAGTCTTTAAATTTGAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/interpro.xml Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,12 @@ +<interpro_matches> + + <protein id="orange1.1g022799m_6_ORF4" length="79" crc64="D3F0B609CB0B9D28" > + <interpro id="noIPR" name="unintegrated" type="unintegrated"> + <match id="seg" name="seg" dbname="SEG"> + <location start="11" end="17" score="NA" status="?" evidence="Seg" /> + <location start="54" end="78" score="NA" status="?" evidence="Seg" /> + </match> + </interpro> + </protein> + +</interpro_matches>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample.gff3 Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,21 @@ +##gff-version 3 +##sequence-region scaffold00001 4058460 4062210 +scaffold00001 phytozome6 supercontig 1 5927163 . . . Name=scaffold00001;ID=scaffold00001 +scaffold00001 phytozome6 gene 4058460 4062210 . + . ID=orange1.1g015632m.g;Name=orange1.1g015632m.g +scaffold00001 phytozome6 mRNA 4058460 4062210 . + . ID=PAC:18136217;Name=orange1.1g015632m;PACid=18136217;Parent=orange1.1g015632m.g +scaffold00001 phytozome6 five_prime_UTR 4058460 4058898 . + . Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 five_prime_UTR 4059019 4059074 . + . Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 five_prime_UTR 4059172 4059234 . + . Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4059235 4059330 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4059422 4059514 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4059600 4059659 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4059790 4060062 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4060285 4060359 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4060480 4060536 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4060625 4060765 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4060857 4060907 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4061250 4061345 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4061417 4061500 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4061617 4061719 . + 0 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 CDS 4061823 4061905 . + 2 Parent=PAC:18136217;PACid=18136217 +scaffold00001 phytozome6 three_prime_UTR 4061906 4062210 . + . Parent=PAC:18136217;PACid=18136217
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tripal.py Mon Sep 11 05:51:14 2017 -0400 @@ -0,0 +1,503 @@ +import collections +import os +import time + +from abc import abstractmethod + +import tripal + + +############################################# +# BEGIN IMPORT OF CACHING LIBRARY # +############################################# +# This code is licensed under the MIT # +# License and is a copy of code publicly # +# available in rev. # +# e27332bc82f4e327aedaec17c9b656ae719322ed # +# of https://github.com/tkem/cachetools/ # +############################################# +class DefaultMapping(collections.MutableMapping): + + __slots__ = () + + @abstractmethod + def __contains__(self, key): # pragma: nocover + return False + + @abstractmethod + def __getitem__(self, key): # pragma: nocover + if hasattr(self.__class__, '__missing__'): + return self.__class__.__missing__(self, key) + else: + raise KeyError(key) + + def get(self, key, default=None): + if key in self: + return self[key] + else: + return default + + __marker = object() + + def pop(self, key, default=__marker): + if key in self: + value = self[key] + del self[key] + elif default is self.__marker: + raise KeyError(key) + else: + value = default + return value + + def setdefault(self, key, default=None): + if key in self: + value = self[key] + else: + self[key] = value = default + return value + + +DefaultMapping.register(dict) + + +class _DefaultSize(object): + def __getitem__(self, _): + return 1 + + def __setitem__(self, _, value): + assert value == 1 + + def pop(self, _): + return 1 + + +class Cache(DefaultMapping): + """Mutable mapping to serve as a simple cache or cache base class.""" + + __size = _DefaultSize() + + def __init__(self, maxsize, missing=None, getsizeof=None): + if missing: + self.__missing = missing + if getsizeof: + self.__getsizeof = getsizeof + self.__size = dict() + self.__data = dict() + self.__currsize = 0 + self.__maxsize = maxsize + + def __repr__(self): + return '%s(%r, maxsize=%r, currsize=%r)' % ( + self.__class__.__name__, + list(self.__data.items()), + self.__maxsize, + self.__currsize, + ) + + def __getitem__(self, key): + try: + return self.__data[key] + except KeyError: + return self.__missing__(key) + + def __setitem__(self, key, value): + maxsize = self.__maxsize + size = self.getsizeof(value) + if size > maxsize: + raise ValueError('value too large') + if key not in self.__data or self.__size[key] < size: + while self.__currsize + size > maxsize: + self.popitem() + if key in self.__data: + diffsize = size - self.__size[key] + else: + diffsize = size + self.__data[key] = value + self.__size[key] = size + self.__currsize += diffsize + + def __delitem__(self, key): + size = self.__size.pop(key) + del self.__data[key] + self.__currsize -= size + + def __contains__(self, key): + return key in self.__data + + def __missing__(self, key): + value = self.__missing(key) + try: + self.__setitem__(key, value) + except ValueError: + pass # value too large + return value + + def __iter__(self): + return iter(self.__data) + + def __len__(self): + return len(self.__data) + + @staticmethod + def __getsizeof(value): + return 1 + + @staticmethod + def __missing(key): + raise KeyError(key) + + @property + def maxsize(self): + """The maximum size of the cache.""" + return self.__maxsize + + @property + def currsize(self): + """The current size of the cache.""" + return self.__currsize + + def getsizeof(self, value): + """Return the size of a cache element's value.""" + return self.__getsizeof(value) + + +class _Link(object): + + __slots__ = ('key', 'expire', 'next', 'prev') + + def __init__(self, key=None, expire=None): + self.key = key + self.expire = expire + + def __reduce__(self): + return _Link, (self.key, self.expire) + + def unlink(self): + next = self.next + prev = self.prev + prev.next = next + next.prev = prev + + +class _Timer(object): + + def __init__(self, timer): + self.__timer = timer + self.__nesting = 0 + + def __call__(self): + if self.__nesting == 0: + return self.__timer() + else: + return self.__time + + def __enter__(self): + if self.__nesting == 0: + self.__time = time = self.__timer() + else: + time = self.__time + self.__nesting += 1 + return time + + def __exit__(self, *exc): + self.__nesting -= 1 + + def __reduce__(self): + return _Timer, (self.__timer,) + + def __getattr__(self, name): + return getattr(self.__timer, name) + + +class TTLCache(Cache): + """LRU Cache implementation with per-item time-to-live (TTL) value.""" + + def __init__(self, maxsize, ttl, timer=time.time, missing=None, + getsizeof=None): + Cache.__init__(self, maxsize, missing, getsizeof) + self.__root = root = _Link() + root.prev = root.next = root + self.__links = collections.OrderedDict() + self.__timer = _Timer(timer) + self.__ttl = ttl + + def __contains__(self, key): + try: + link = self.__links[key] # no reordering + except KeyError: + return False + else: + return not (link.expire < self.__timer()) + + def __getitem__(self, key, cache_getitem=Cache.__getitem__): + try: + link = self.__getlink(key) + except KeyError: + expired = False + else: + expired = link.expire < self.__timer() + if expired: + return self.__missing__(key) + else: + return cache_getitem(self, key) + + def __setitem__(self, key, value, cache_setitem=Cache.__setitem__): + with self.__timer as time: + self.expire(time) + cache_setitem(self, key, value) + try: + link = self.__getlink(key) + except KeyError: + self.__links[key] = link = _Link(key) + else: + link.unlink() + link.expire = time + self.__ttl + link.next = root = self.__root + link.prev = prev = root.prev + prev.next = root.prev = link + + def __delitem__(self, key, cache_delitem=Cache.__delitem__): + cache_delitem(self, key) + link = self.__links.pop(key) + link.unlink() + if link.expire < self.__timer(): + raise KeyError(key) + + def __iter__(self): + root = self.__root + curr = root.next + while curr is not root: + # "freeze" time for iterator access + with self.__timer as time: + if not (curr.expire < time): + yield curr.key + curr = curr.next + + def __len__(self): + root = self.__root + curr = root.next + time = self.__timer() + count = len(self.__links) + while curr is not root and curr.expire < time: + count -= 1 + curr = curr.next + return count + + def __setstate__(self, state): + self.__dict__.update(state) + root = self.__root + root.prev = root.next = root + for link in sorted(self.__links.values(), key=lambda obj: obj.expire): + link.next = root + link.prev = prev = root.prev + prev.next = root.prev = link + self.expire(self.__timer()) + + def __repr__(self, cache_repr=Cache.__repr__): + with self.__timer as time: + self.expire(time) + return cache_repr(self) + + @property + def currsize(self): + with self.__timer as time: + self.expire(time) + return super(TTLCache, self).currsize + + @property + def timer(self): + """The timer function used by the cache.""" + return self.__timer + + @property + def ttl(self): + """The time-to-live value of the cache's items.""" + return self.__ttl + + def expire(self, time=None): + """Remove expired items from the cache.""" + if time is None: + time = self.__timer() + root = self.__root + curr = root.next + links = self.__links + cache_delitem = Cache.__delitem__ + while curr is not root and curr.expire < time: + cache_delitem(self, curr.key) + del links[curr.key] + next = curr.next + curr.unlink() + curr = next + + def clear(self): + with self.__timer as time: + self.expire(time) + Cache.clear(self) + + def get(self, *args, **kwargs): + with self.__timer: + return Cache.get(self, *args, **kwargs) + + def pop(self, *args, **kwargs): + with self.__timer: + return Cache.pop(self, *args, **kwargs) + + def setdefault(self, *args, **kwargs): + with self.__timer: + return Cache.setdefault(self, *args, **kwargs) + + def popitem(self): + """Remove and return the `(key, value)` pair least recently used that + has not already expired. + + """ + with self.__timer as time: + self.expire(time) + try: + key = next(iter(self.__links)) + except StopIteration: + raise KeyError('%s is empty' % self.__class__.__name__) + else: + return (key, self.pop(key)) + + if hasattr(collections.OrderedDict, 'move_to_end'): + def __getlink(self, key): + value = self.__links[key] + self.__links.move_to_end(key) + return value + else: + def __getlink(self, key): + value = self.__links.pop(key) + self.__links[key] = value + return value + + +############################################# +# END IMPORT OF CACHING LIBRARY # +############################################# + +cache = TTLCache( + 100, # Up to 100 items + 1 * 60 # 5 minute cache life +) + + +def _get_instance(): + return tripal.TripalInstance( + os.environ['GALAXY_TRIPAL_URL'], + os.environ['GALAXY_TRIPAL_USER'], + os.environ['GALAXY_TRIPAL_PASSWORD'] + ) + + +def list_organisms(*args, **kwargs): + + ti = _get_instance() + + # Key for cached data + cacheKey = 'orgs' + # We don't want to trust "if key in cache" because between asking and fetch + # it might through key error. + if cacheKey not in cache: + # However if it ISN'T there, we know we're safe to fetch + put in + # there. + data = _list_organisms(ti, *args, **kwargs) + cache[cacheKey] = data + return data + try: + # The cache key may or may not be in the cache at this point, it + # /likely/ is. However we take no chances that it wasn't evicted between + # when we checked above and now, so we reference the object from the + # cache in preparation to return. + data = cache[cacheKey] + return data + except KeyError: + # If access fails due to eviction, we will fail over and can ensure that + # data is inserted. + data = _list_organisms(ti, *args, **kwargs) + cache[cacheKey] = data + return data + + +def _list_organisms(ti, *args, **kwargs): + # Fetch the orgs. + orgs_data = [] + for org in ti.organism.get_organisms(): + clean_name = '%s %s' % (org['genus'], org['species']) + if org['infraspecific_name']: + clean_name += ' (%s)' % (org['infraspecific_name']) + orgs_data.append((clean_name, org['organism_id'], False)) + return orgs_data + + +def list_analyses(*args, **kwargs): + + ti = _get_instance() + + # Key for cached data + cacheKey = 'analyses' + # We don't want to trust "if key in cache" because between asking and fetch + # it might through key error. + if cacheKey not in cache: + # However if it ISN'T there, we know we're safe to fetch + put in + # there.<?xml version="1.0"?> + + data = _list_analyses(ti, *args, **kwargs) + cache[cacheKey] = data + return data + try: + # The cache key may or may not be in the cache at this point, it + # /likely/ is. However we take no chances that it wasn't evicted between + # when we checked above and now, so we reference the object from the + # cache in preparation to return. + data = cache[cacheKey] + return data + except KeyError: + # If access fails due to eviction, we will fail over and can ensure that + # data is inserted. + data = _list_analyses(ti, *args, **kwargs) + cache[cacheKey] = data + return data + + +def _list_analyses(ti, *args, **kwargs): + ans_data = [] + for an in ti.analysis.get_analyses(): + ans_data.append((an['name'], an['analysis_id'], False)) + return ans_data + + +def list_blastdbs(*args, **kwargs): + + ti = _get_instance() + + # Key for cached data + cacheKey = 'blastdbs' + # We don't want to trust "if key in cache" because between asking and fetch + # it might through key error. + if cacheKey not in cache: + # However if it ISN'T there, we know we're safe to fetch + put in + # there. + data = _list_blastdbs(ti, *args, **kwargs) + cache[cacheKey] = data + return data + try: + # The cache key may or may not be in the cache at this point, it + # /likely/ is. However we take no chances that it wasn't evicted between + # when we checked above and now, so we reference the object from the + # cache in preparation to return. + data = cache[cacheKey] + return data + except KeyError: + # If access fails due to eviction, we will fail over and can ensure that + # data is inserted. + data = _list_blastdbs(ti, *args, **kwargs) + cache[cacheKey] = data + return data + + +def _list_blastdbs(ti, *args, **kwargs): + dbs_data = [] + for db in ti.db.get_dbs(): + dbs_data.append((db['name'], db['db_id'], False)) + return dbs_data
