diff scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Tue, 28 Jan 2020 10:42:31 -0500
parents 965517909457
children
line wrap: on
line diff
--- a/scripts/ReMatCh/modules/seqFromWebTaxon.py	Wed Jan 22 09:10:12 2020 -0500
+++ b/scripts/ReMatCh/modules/seqFromWebTaxon.py	Tue Jan 28 10:42:31 2020 -0500
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # -*- coding: utf-8 -*-
 
@@ -8,41 +8,44 @@
 mickaelsilva
 '''
 
-import urllib2
 import sys
-import urllib
+import urllib.request
+import urllib.parse
 import xml.etree.ElementTree as ET
 import time
 import argparse
 import os
 
 
-def runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True):
-    print '\n' + 'Searching RunIDs for ' + taxonname
+def run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True):
+    print('\n' + 'Searching RunIDs for ' + taxonname)
 
-    taxonname = urllib.quote(taxonname)
+    taxonname = urllib.parse.quote(taxonname)
     url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml"
     try:
-        content = urllib2.urlopen(url)
+        content = urllib.request.urlopen(url)
         xml = content.read()
         tree = ET.fromstring(xml)
         taxonid = ''
     except:
-        print "Ooops!There might be a problem with the ena service, try later or check if the xml is well formated at " + url
+        print("Ooops!There might be a problem with the ena service, try later or check if the xml is well formated"
+              " at " + url)
         raise
     for child in tree:
         taxonid = child.get('taxId')
     if (taxonid):
-        print "\n" + "Taxon ID found: " + taxonid
-        url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + taxonid + "%29%22&result=read_run&display=xml"
+        print("\n" + "Taxon ID found: " + taxonid)
+        url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + \
+              taxonid + \
+              "%29%22&result=read_run&display=xml"
 
-        content = urllib2.urlopen(url)
+        content = urllib.request.urlopen(url)
         xml = content.read()
         tree = ET.fromstring(xml)
 
         runid = ''
         n = 0
-        with open(outputfile, "wb") as f:
+        with open(outputfile, "wt") as f:
             f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n")
             model = ''
             prjid = ''
@@ -59,7 +62,7 @@
                         if child2.tag == 'EXPERIMENT_REF':
                             expid = child2.get('accession')
                             url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml"
-                            content = urllib2.urlopen(url2)
+                            content = urllib.request.urlopen(url2)
                             xml = content.read()
                             tree2 = ET.fromstring(xml)
                             try:
@@ -87,7 +90,8 @@
                                 libraryType = 'not found'
                     f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n")
                     if print_True:
-                        line = "run acession %s sequenced on %s from project %s for %s %s end data" % (runid, model, prjid, omics, libraryType)
+                        line = "run acession %s sequenced on %s from project %s for %s %s end" \
+                               " data" % (runid, model, prjid, omics, libraryType)
                         if length_line < len(line):
                             length_line = len(line)
                         sys.stderr.write("\r" + line + str(' ' * (length_line - len(line))))
@@ -100,20 +104,27 @@
                             length_line = len(line)
                         sys.stderr.write("\r" + line + str(' ' * (length_line - len(line))))
                         sys.stderr.flush()
-        print "\n"
-        print "\nfound %s run id's" % n
+        print("\n")
+        print("\n"
+              "found %s run id's" % n)
 
     else:
-        print "taxon name does not exist"
+        print("taxon name does not exist")
 
 
 def main():
-    parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the sequencing was performed, given a taxon name accepted by the European nucleotide Archive")
+    parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the"
+                                                 " sequencing was performed, given a taxon name accepted by the"
+                                                 " European nucleotide Archive")
     parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True)
     parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True)
-    parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', required=False)
-    parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', action='store_true')
-    parser.add_argument('--getLibraryType', help='Informs the programme to include library type (examples: PAIRED / SINGLE) in the output', action='store_true')
+    parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true',
+                        required=False)
+    parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type'
+                                                   ' (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output',
+                        action='store_true')
+    parser.add_argument('--getLibraryType', help='Informs the programme to include library type'
+                                                 ' (examples: PAIRED / SINGLE) in the output', action='store_true')
 
     args = parser.parse_args()
 
@@ -128,7 +139,7 @@
     getOmicsDataType = args.getOmicsDataType
     getLibraryType = args.getLibraryType
 
-    runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True)
+    run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True)
 
 
 if __name__ == "__main__":