Mercurial > repos > iuc > ncbi_entrez_direct_einfo

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Tue Mar 22 22:30:36 2022 +0000
@@ -0,0 +1,17 @@
+Galaxy NCBI Entrez Direct Tools
+===============================
+
+This repo requires a readme as administrators should very aware of some
+restrictions NCBI places on the use of the Entrez service.
+
+NCBI requests that you please limit large jobs to either weekends or
+between 9:00 PM and 5:00 AM Eastern time during weekdays. This is not a
+request that the Galaxy tool can easily service, so we've included it in
+the disclaimer on every tool quite prominently.
+
+Failure to comply with NCBI's policies may result in an block.
+
+Note that these are *IP* level blocks so the Galaxy tools uses a
+concatenation of the administrator's emails, and the user email, in
+hopes that NCBI will contact all relevant parties should their system be
+abused.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/__efetch_build_options.py	Tue Mar 22 22:30:36 2022 +0000
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+
+# Daniel Blankenberg
+# Creates the options for tool interface
+
+# http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi
+db_list = '''<DbName>pubmed</DbName>
+<DbName>protein</DbName>
+<DbName>nuccore</DbName>
+<DbName>nucleotide</DbName>
+<DbName>nucgss</DbName>
+<DbName>nucest</DbName>
+<DbName>structure</DbName>
+<DbName>genome</DbName>
+<DbName>annotinfo</DbName>
+<DbName>assembly</DbName>
+<DbName>bioproject</DbName>
+<DbName>biosample</DbName>
+<DbName>blastdbinfo</DbName>
+<DbName>books</DbName>
+<DbName>cdd</DbName>
+<DbName>clinvar</DbName>
+<DbName>clone</DbName>
+<DbName>gap</DbName>
+<DbName>gapplus</DbName>
+<DbName>grasp</DbName>
+<DbName>dbvar</DbName>
+<DbName>gene</DbName>
+<DbName>gds</DbName>
+<DbName>geoprofiles</DbName>
+<DbName>homologene</DbName>
+<DbName>medgen</DbName>
+<DbName>mesh</DbName>
+<DbName>ncbisearch</DbName>
+<DbName>nlmcatalog</DbName>
+<DbName>omim</DbName>
+<DbName>orgtrack</DbName>
+<DbName>pmc</DbName>
+<DbName>popset</DbName>
+<DbName>probe</DbName>
+<DbName>proteinclusters</DbName>
+<DbName>pcassay</DbName>
+<DbName>biosystems</DbName>
+<DbName>pccompound</DbName>
+<DbName>pcsubstance</DbName>
+<DbName>pubmedhealth</DbName>
+<DbName>seqannot</DbName>
+<DbName>snp</DbName>
+<DbName>sra</DbName>
+<DbName>taxonomy</DbName>
+<DbName>unigene</DbName>
+<DbName>gencoll</DbName>
+<DbName>gtr</DbName>'''.replace("<DbName>", "").replace("</DbName>", "").split("\n")
+
+
+help = '''  (all)
+                 docsum                      DocumentSummarySet XML
+                 docsum             json     DocumentSummarySet JSON
+                 full                        Same as native except for mesh
+                 uid                         Unique Identifier List
+                 url                         Entrez URL
+                 xml                         Same as -format full -mode xml
+
+  bioproject
+                 native                      BioProject Report
+                 native             xml      RecordSet XML
+
+  biosample
+                 native                      BioSample Report
+                 native             xml      BioSampleSet XML
+
+  biosystems
+                 native             xml      Sys-set XML
+
+  gds
+                 native             xml      RecordSet XML
+                 summary                     Summary
+
+  gene
+                 gene_table                  Gene Table
+                 native                      Gene Report
+                 native             asn.1    Entrezgene ASN.1
+                 native             xml      Entrezgene-Set XML
+                 tabular                     Tabular Report
+
+  homologene
+                 alignmentscores             Alignment Scores
+                 fasta                       FASTA
+                 homologene                  Homologene Report
+                 native                      Homologene List
+                 native             asn.1    HG-Entry ASN.1
+                 native             xml      Entrez-Homologene-Set XML
+
+  mesh
+                 full                        Full Record
+                 native                      MeSH Report
+                 native             xml      RecordSet XML
+
+  nlmcatalog
+                 native                      Full Record
+                 native             xml      NLMCatalogRecordSet XML
+
+  pmc
+                 medline                     MEDLINE
+                 native             xml      pmc-articleset XML
+
+  pubmed
+                 abstract                    Abstract
+                 medline                     MEDLINE
+                 native             asn.1    Pubmed-entry ASN.1
+                 native             xml      PubmedArticleSet XML
+
+  (sequences)
+                 acc                         Accession Number
+                 est                         EST Report
+                 fasta                       FASTA
+                 fasta              xml      TinySeq XML
+                 fasta_cds_aa                FASTA of CDS Products
+                 fasta_cds_na                FASTA of Coding Regions
+                 ft                          Feature Table
+                 gb                          GenBank Flatfile
+                 gb                 xml      GBSet XML
+                 gbc                xml      INSDSet XML
+                 gbwithparts                 GenBank with Contig Sequences
+                 gene_fasta                  FASTA of Gene
+                 gp                          GenPept Flatfile
+                 gp                 xml      GBSet XML
+                 gpc                xml      INSDSet XML
+                 gss                         GSS Report
+                 ipg                         Identical Protein Report
+                 ipg                xml      IPGReportSet XML
+                 native             text     Seq-entry ASN.1
+                 native             xml      Bioseq-set XML
+                 seqid                       Seq-id ASN.1
+
+  snp
+                 chr                         Chromosome Report
+                 docset                      Summary
+                 fasta                       FASTA
+                 flt                         Flat File
+                 native             asn.1    Rs ASN.1
+                 native             xml      ExchangeSet XML
+                 rsr                         RS Cluster Report
+                 ssexemplar                  SS Exemplar List
+
+  sra
+                 native             xml      EXPERIMENT_PACKAGE_SET XML
+                 runinfo            xml      SraRunInfo XML
+
+  structure
+                 mmdb                        Ncbi-mime-asn1 strucseq ASN.1
+                 native                      MMDB Report
+                 native             xml      RecordSet XML
+
+  taxonomy
+                 native                      Taxonomy List
+                 native             xml      TaxaSet XML'''.split("\n")
+
+db = {}
+name = None
+all = "(all)"
+for line in help:
+    if line.strip() and line[2] != ' ':
+        name = line.strip()
+        db[name] = {}
+    elif line.strip():
+        format = line[0:len("                 docsum             ")].strip()
+        mode = line[len("                 docsum             "):len("                 docsum             json     ")].strip()
+        if format not in db[name]:
+            db[name][format] = []
+        db[name][format].append(mode)
+
+for name in db_list:
+    if name not in db:
+        db[name] = {}
+
+db["sequences"] = db["(sequences)"]
+del db["(sequences)"]
+
+print('<conditional name="db">')
+print('    <param name="db" type="select" label="Database" argument="-db">')
+for name in sorted(db.keys()):
+    if name == all:
+        continue
+    print('        <option value="%s">%s</option>' % (name, name))
+print('        <option value="">Manual Entry</option>')
+print('    </param>')
+
+for name in sorted(db.keys()):
+    if name == all:
+        continue
+    my_dict = db[all].copy()
+
+    for format, modes in db[name].items():
+        if format in my_dict:
+            for mode in modes:
+                if mode not in my_dict[format]:
+                    my_dict[format].append(mode)
+        else:
+            my_dict[format] = modes
+    if "" not in my_dict:
+        my_dict[""] = [""]
+    print('    <when value="%s">' % name)
+    print('        <conditional name="format">')
+    print('            <param name="format" type="select" label="Format" argument="-format">')
+    for format in sorted(my_dict.keys()):
+        print('                <option value="%s">%s</option>' % (format, format or "None"))
+    print('            </param>')
+    for format in sorted(my_dict.keys()):
+        print('            <when value="%s">' % format)
+        print('                <param name="mode" type="select" label="Mode" argument="-mode">')
+        if "" not in my_dict[format]:
+            my_dict[format].append("")
+        for mode in sorted(my_dict[format]):
+            print('                    <option value="%s">%s</option>' % (mode, mode or "None"))
+        print('                </param>')
+        print('            </when>')
+    print('        </conditional>')
+    print('    </when>')
+print('    <when value="">')
+print('        <param name="db_manual" type="text" label="Database" argument="-db"/>')
+print('        <param name="format" type="text" label="Format" argument="-format"/>')
+print('        <param name="mode" type="text" label="Mode" argument="-mode"/>')
+print('    </when>')
+print('</conditional>')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/einfo.xml	Tue Mar 22 22:30:36 2022 +0000
@@ -0,0 +1,104 @@
+<tool id="ncbi_entrez_direct_einfo" name="NCBI EInfo" version="@TOOL_VERSION@">
+  <description>fetch NCBI database metadata</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <version_command>einfo -version</version_command>
+  <command detect_errors="exit_code"><![CDATA[
+      @ECONTACT@
+      einfo
+      #if str( $db.target ) == "dbs":
+          -dbs
+      #else:
+          -db "${db.db}"
+      #end if
+      > '${output_xml}'
+      ]]>
+</command>
+  <inputs>
+      <conditional name="db">
+          <param name="target" type="select" label="List DBs or Info on a DB">
+              <option value="db" selected="True">DB info</option>
+              <option value="dbs">List of DBs</option>
+          </param>
+          <when value="dbs"/>
+          <when value="db">
+              <param name="db" type="select" label="Choose your DB" argument="-db">
+                  <option value="annotinfo">annotinfo</option>
+                  <option value="assembly">assembly</option>
+                  <option value="bioproject">bioproject</option>
+                  <option value="biosample">biosample</option>
+                  <option value="biosystems">biosystems</option>
+                  <option value="blastdbinfo">blastdbinfo</option>
+                  <option value="books">books</option>
+                  <option value="cdd">cdd</option>
+                  <option value="clinvar">clinvar</option>
+                  <option value="clone">clone</option>
+                  <option value="dbvar">dbvar</option>
+                  <option value="gap">gap</option>
+                  <option value="gapplus">gapplus</option>
+                  <option value="gds">gds</option>
+                  <option value="gencoll">gencoll</option>
+                  <option value="gene">gene</option>
+                  <option value="genome">genome</option>
+                  <option value="geoprofiles">geoprofiles</option>
+                  <option value="grasp">grasp</option>
+                  <option value="gtr">gtr</option>
+                  <option value="homologene">homologene</option>
+                  <option value="medgen">medgen</option>
+                  <option value="mesh">mesh</option>
+                  <option value="ncbisearch">ncbisearch</option>
+                  <option value="nlmcatalog">nlmcatalog</option>
+                  <option value="nuccore">nuccore</option>
+                  <option value="nucest">nucest</option>
+                  <option value="nucgss">nucgss</option>
+                  <option value="nucleotide">nucleotide</option>
+                  <option value="omim">omim</option>
+                  <option value="orgtrack">orgtrack</option>
+                  <option value="pcassay">pcassay</option>
+                  <option value="pccompound">pccompound</option>
+                  <option value="pcsubstance">pcsubstance</option>
+                  <option value="pmc">pmc</option>
+                  <option value="popset">popset</option>
+                  <option value="probe">probe</option>
+                  <option value="protein">protein</option>
+                  <option value="proteinclusters">proteinclusters</option>
+                  <option value="pubmed">pubmed</option>
+                  <option value="pubmedhealth">pubmedhealth</option>
+                  <option value="seqannot">seqannot</option>
+                  <option value="snp">snp</option>
+                  <option value="sra">sra</option>
+                  <option value="structure">structure</option>
+                  <option value="taxonomy">taxonomy</option>
+                  <option value="unigene">unigene</option>
+              </param>
+          </when>
+      </conditional>
+  </inputs>
+  <outputs>
+    <data format="xml" name="output_xml"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="db|target" value="db"/>
+      <param name="db|db" value="sra"/>
+      <output name="output_xml">
+          <assert_contents>
+              <has_text_matching expression="SRA" />
+          </assert_contents>
+      </output>
+    </test>
+  </tests>
+  <help><![CDATA[
+NCBI Entrez EInfo
+=================
+
+Provides the number of records indexed in each field of a given database, the
+date of the last update of the database, and the available links from the
+database to other Entrez databases.
+
+@DISCLAIMER@
+      ]]></help>
+  <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Mar 22 22:30:36 2022 +0000
@@ -0,0 +1,102 @@
+<macros>
+    <token name="@TOOL_VERSION@">13.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">entrez-direct</requirement>
+        </requirements>
+    </xml>
+    <token name="@ECONTACT@"><![CDATA[
+        #set $__contact_email__ = ';'.join( str( $__admin_users__ ).split( ',' ) )
+        #if str( $__user_email__ ):
+            #set $__contact_email__ = $__contact_email__ + ";" + str( $__user_email__ )
+        #end if
+        econtact -email "${ __contact_email__ }" -tool "galaxy_ncbi_entrez_direct" > /dev/null ;
+        ]]>
+    </token>
+    <token name="@DISCLAIMER@"><![CDATA[
+Usage Guidelines and Requirements
+=================================
+
+Frequency, Timing, and Registration of E-utility URL Requests
+-------------------------------------------------------------
+
+In order not to overload the E-utility servers, NCBI recommends that users
+limit large jobs to either weekends or between 9:00 PM and 5:00 AM Eastern time
+during weekdays. Failure to comply with this policy may result in an IP address
+being blocked from accessing NCBI.
+
+Minimizing the Number of Requests
+---------------------------------
+
+If a task requires searching for and/or downloading a large number of
+records, it is much more efficient to use the Entrez History to upload
+and/or retrieve these records in batches rather than using separate
+requests for each record. Please refer to Application 3 in Chapter 3
+for an example. Many thousands of IDs can be uploaded using a single
+EPost request, and several hundred records can be downloaded using one
+EFetch request.
+
+
+Disclaimer and Copyright Issues
+-------------------------------
+
+In accordance with requirements of NCBI's E-Utilities, we must provide
+the following disclaimer:
+
+Please note that abstracts in PubMed may incorporate material that may
+be protected by U.S. and foreign copyright laws. All persons
+reproducing, redistributing, or making commercial use of this
+information are expected to adhere to the terms and conditions asserted
+by the copyright holder. Transmission or reproduction of protected
+items beyond that allowed by fair use (PDF) as defined in the copyright
+laws requires the written permission of the copyright owners. NLM
+provides no legal advice concerning distribution of copyrighted
+materials. Please consult your legal counsel. If you wish to do a large
+data mining project on PubMed data, you can enter into a licensing
+agreement and lease the data for free from NLM. For more information on
+
+The `full disclaimer <https://www.ncbi.nlm.nih.gov/home/about/policies/>`__ is available on
+their website
+
+Liability
+~~~~~~~~~
+
+For documents and software available from this server, the
+U.S. Government does not warrant or assume any legal liability or
+responsibility for the accuracy, completeness, or usefulness of any
+information, apparatus, product, or process disclosed.
+
+Endorsement
+~~~~~~~~~~~
+
+NCBI does not endorse or recommend any commercial
+products, processes, or services. The views and opinions of authors
+expressed on NCBI's Web sites do not necessarily state or reflect those
+of the U.S. Government, and they may not be used for advertising or
+product endorsement purposes.
+
+External Links
+~~~~~~~~~~~~~~
+
+Some NCBI Web pages may provide links to other Internet
+sites for the convenience of users. NCBI is not responsible for the
+availability or content of these external sites, nor does NCBI endorse,
+warrant, or guarantee the products, services, or information described
+or offered at these other Internet sites. Users cannot assume that the
+external sites will abide by the same Privacy Policy to which NCBI
+adheres. It is the responsibility of the user to examine the copyright
+and licensing restrictions of linked pages and to secure all necessary
+permissions.
+        ]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">@Book{ncbiEDirect,
+          author = {Jonathan Kans},
+          title = {Entrez Direct: E-utilities on the UNIX Command Line},
+          year = {2013},
+          publisher = {National Center for Biotechnology Information, Bethesda, Maryland},
+          note = {http://www.ncbi.nlm.nih.gov/books/NBK179288/}
+            }</citation>
+        </citations>
+    </xml>
+</macros>