# HG changeset patch
# User artbio
# Date 1510164026 18000
# Node ID 8be88084f89cc6b30ebd8f7172822800b44e6ff0
# Parent  50f5ef3313bb811358f443de4b02c4139415e952
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit ab45487db8cc69750f92a40d763d51ffac940e25
diff -r 50f5ef3313bb -r 8be88084f89c fetch_fasta_from_NCBI.py
--- a/fetch_fasta_from_NCBI.py	Sun Oct 15 20:13:13 2017 -0400
+++ b/fetch_fasta_from_NCBI.py	Wed Nov 08 13:00:26 2017 -0500
@@ -21,9 +21,9 @@
 queries are 1 sec delayed, to satisfy NCBI guidelines
 (more than what they request)
 """
+import argparse
 import httplib
 import logging
-import optparse
 import re
 import sys
 import time
@@ -55,26 +55,15 @@
         self.count = 0
         self.webenv = ""
         self.query_key = ""
-        self.datetype = options.datetype
-        if options.reldate:
-            self.reldate = options.reldate
-        else:
-            self.reldate = ''
-        if options.mindate:
-            self.mindate = options.mindate
-        else:
-            self.mindate = ''
-        if options.maxdate:
-            self.maxdate = options.maxdate
-        else:
-            self.maxdate = ''
+
+    def dry_run(self):
+        self.get_count_value()
 
     def retrieve(self):
         """
         Retrieve the fasta sequences corresponding to the query
         """
         self.get_count_value()
-
         # If no UIDs are found exit script
         if self.count > 0:
             self.get_uids_list()
@@ -101,8 +90,7 @@
         self.logger.info("for Query: %s and database: %s" %
                          (self.query_string, self.dbname))
         querylog = self.esearch(self.dbname, self.query_string, '', '',
-                                "count", self.datetype, self.reldate,
-                                self.mindate, self.maxdate)
+                                "count")
         self.logger.debug("Query response:")
         for line in querylog:
             self.logger.debug(line.rstrip())
@@ -127,8 +115,7 @@
                          num_batches)
         for n in range(num_batches):
             querylog = self.esearch(self.dbname, self.query_string, n*retmax,
-                                    retmax, '', self.datetype, self.reldate,
-                                    self.mindate, self.maxdate)
+                                    retmax, '')
             for line in querylog:
                 if '' in line and '' in line:
                     uid = (line[line.find('')+len(''):
@@ -136,19 +123,14 @@
                     self.ids.append(uid)
             self.logger.info("Retrieved %d UIDs" % len(self.ids))
 
-    def esearch(self, db, term, retstart, retmax, rettype, datetype, reldate,
-                mindate, maxdate):
+    def esearch(self, db, term, retstart, retmax, rettype):
         url = self.base + "esearch.fcgi"
         self.logger.debug("url: %s" % url)
         values = {'db': db,
                   'term': term,
                   'rettype': rettype,
                   'retstart': retstart,
-                  'retmax': retmax,
-                  'datetype': datetype,
-                  'reldate': reldate,
-                  'mindate': mindate,
-                  'maxdate': maxdate}
+                  'retmax': retmax}
         data = urllib.urlencode(values)
         self.logger.debug("data: %s" % str(data))
         req = urllib2.Request(url, data)
@@ -225,9 +207,14 @@
             counter += 1
             self.logger.info("Server Transaction Trial:  %s" % (counter))
             try:
+                self.logger.debug("Going to open")
                 response = urllib2.urlopen(req)
+                self.logger.debug("Going to get code")
                 response_code = response.getcode()
+                self.logger.debug("Going to read, de code was : %s",
+                                  str(response_code))
                 fasta = response.read()
+                self.logger.debug("Did all that")
                 response.close()
                 if((response_code != 200) or
                    ("Resource temporarily unavailable" in fasta) or
@@ -348,63 +335,46 @@
 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
 
 
+def command_parse():
+    parser = argparse.ArgumentParser(description='Retrieve data from NCBI')
+    parser.add_argument('-i', dest='query_string', help='NCBI Query String',
+                        required=True)
+    parser.add_argument('-o', dest='outname', help='output file name')
+    parser.add_argument('-d', dest='dbname', help='database type')
+    parser.add_argument('--count', '-c', dest='count_ids',
+                        action='store_true', default=False,
+                        help='dry run ouputing only the number of sequences\
+                        found')
+    parser.add_argument('-l', '--logfile', help='log file (default=stderr)')
+    parser.add_argument('--loglevel', choices=LOG_LEVELS, default='INFO',
+                        help='logging level (default: INFO)')
+    args = parser.parse_args()
+    return args
+
+
 def __main__():
     """ main function """
-    parser = optparse.OptionParser(description='Retrieve data from NCBI')
-    parser.add_option('-i', dest='query_string', help='NCBI Query String')
-    parser.add_option('-o', dest='outname', help='output file name')
-    parser.add_option('-d', dest='dbname', help='database type')
-    parser.add_option('-l', '--logfile', help='log file (default=stderr)')
-    parser.add_option('--datetype', dest='datetype',
-                      choices=['mdat', 'pdat'],
-                      help='Type of date used to limit a search.\
-                            [ mdat(modification date), pdat(publication date)]\
-                            (default=pdat)', default='pdat')
-    parser.add_option('--reldate', dest='reldate',
-                      help='When reldate is set to an integer n, the search\
-                            returns only those items that have a date\
-                            specified by datetype within the last n days.')
-    parser.add_option('--maxdate', dest='maxdate',
-                      help='Date range used to limit a search result by the\
-                            date specified by datetype. These two parameters\
-                            (mindate, maxdate) must be used together to\
-                            specify an arbitrary date range. The general date\
-                            format is YYYY/MM/DD, and these variants are also\
-                            allowed: YYYY, YYYY/MM.')
-    parser.add_option('--mindate', dest='mindate',
-                      help='Date range used to limit a search result by the\
-                            date specified by datetype. These two parameters\
-                            (mindate, maxdate) must be used together to\
-                            specify an arbitrary date range. The general date\
-                            format is YYYY/MM/DD, and these variants are also\
-                            allowed: YYYY, YYYY/MM.')
-    parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO',
-                      help='logging level (default: INFO)')
-    (options, args) = parser.parse_args()
-    if len(args) > 0:
-        parser.error('Wrong number of arguments')
-    if((options.reldate and options.maxdate) or
-       (options.reldate and options.mindate)):
-        parser.error("You can't mix 'reldate' and 'maxdate', 'mindate'\
-                     parameters")
-    if((options.mindate and not options.maxdate) or
-       (options.maxdate and not options.mindate)):
-        parser.error("mindate and maxdate must be used together")
-
-    log_level = getattr(logging, options.loglevel)
+    args = command_parse()
+    log_level = getattr(logging, args.loglevel)
     kwargs = {'format': LOG_FORMAT,
               'datefmt': LOG_DATEFMT,
               'level': log_level}
-    if options.logfile:
-        kwargs['filename'] = options.logfile
+    if args.logfile:
+        kwargs['filename'] = args.logfile
     logging.basicConfig(**kwargs)
     logger = logging.getLogger('data_from_NCBI')
 
-    E = Eutils(options, logger)
-    try:
-        E.retrieve()
-    except Exception:
-        sys.exit(1)
+    E = Eutils(args, logger)
+    if args.count_ids:
+        try:
+            E.dry_run()
+        except Exception:
+            sys.exit(1)
+    else:
+        try:
+            E.retrieve()
+        except Exception:
+            sys.exit(1)
 
 
 if __name__ == "__main__":
diff -r 50f5ef3313bb -r 8be88084f89c fetch_fasta_from_NCBI.xml
--- a/fetch_fasta_from_NCBI.xml	Sun Oct 15 20:13:13 2017 -0400
+++ b/fetch_fasta_from_NCBI.xml	Wed Nov 08 13:00:26 2017 -0500
@@ -1,16 +1,12 @@
-
+
   
   
 
   
@@ -30,21 +26,12 @@
       
       
     
-    
-        
-        
-            
-                
-                
-            
-            
-            
-        
-        
-    
+    
   
   
-    
+    
+       dry_run == False
+    
     
   
   
@@ -52,16 +39,13 @@
         
         
         
-        
     
     
         
         
-        
-        
-        
-        
-        
+        
+        
+        
     
   
   
diff -r 50f5ef3313bb -r 8be88084f89c test-data/dry_run.log
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dry_run.log	Wed Nov 08 13:00:26 2017 -0500
@@ -0,0 +1,4 @@
+2017-11-08 16:27:09|INFO    |retrieving data from https://eutils.ncbi.nlm.nih.gov/entrez/eutils/
+2017-11-08 16:27:09|INFO    |for Query: Drosophila melanogaster[Organism] AND Gcn5[Title] and database: nuccore
+2017-11-08 16:27:10|INFO    |Found 4 UIDs
+