test_eurl_vtec_wgs_pt: scripts/ReMatCh/modules/seqFromWebTaxon.py comparison

comparison scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8

author	cstrittmatter
date	Tue, 28 Jan 2020 10:42:31 -0500
parents	965517909457
children

comparison

equal deleted inserted replaced

-:6837f733b4aa
+:0cbed1c0a762
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 '''
 Adapted from:
 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py
 mickaelsilva
 '''
-import urllib2
 import sys
-import urllib
+import urllib.request
+import urllib.parse
 import xml.etree.ElementTree as ET
 import time
 import argparse
 import os
-def runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True):
+def run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True):
-print '\n' + 'Searching RunIDs for ' + taxonname
+print('\n' + 'Searching RunIDs for ' + taxonname)
-taxonname = urllib.quote(taxonname)
+taxonname = urllib.parse.quote(taxonname)
 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml"
 try:
-content = urllib2.urlopen(url)
+content = urllib.request.urlopen(url)
 xml = content.read()
 tree = ET.fromstring(xml)
 taxonid = ''
 except:
-print "Ooops!There might be a problem with the ena service, try later or check if the xml is well formated at " + url
+print("Ooops!There might be a problem with the ena service, try later or check if the xml is well formated"
+" at " + url)
 raise
 for child in tree:
 taxonid = child.get('taxId')
 if (taxonid):
-print "\n" + "Taxon ID found: " + taxonid
+print("\n" + "Taxon ID found: " + taxonid)
-url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + taxonid + "%29%22&result=read_run&display=xml"
+url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + \
+taxonid + \
+"%29%22&result=read_run&display=xml"
-content = urllib2.urlopen(url)
+content = urllib.request.urlopen(url)
 xml = content.read()
 tree = ET.fromstring(xml)
 runid = ''
 n = 0
-with open(outputfile, "wb") as f:
+with open(outputfile, "wt") as f:
 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n")
 model = ''
 prjid = ''
 length_line = 0
 omics = ''
 if getmachine is True or getOmicsDataType is True or getLibraryType is True:
 for child2 in child:
 if child2.tag == 'EXPERIMENT_REF':
 expid = child2.get('accession')
 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml"
-content = urllib2.urlopen(url2)
+content = urllib.request.urlopen(url2)
 xml = content.read()
 tree2 = ET.fromstring(xml)
 try:
 for child3 in tree2:
 for child4 in child3:
 model = 'not found'
 omics = 'not found'
 libraryType = 'not found'
 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n")
 if print_True:
-line = "run acession %s sequenced on %s from project %s for %s %s end data" % (runid, model, prjid, omics, libraryType)
+line = "run acession %s sequenced on %s from project %s for %s %s end" \
+" data" % (runid, model, prjid, omics, libraryType)
 if length_line < len(line):
 length_line = len(line)
 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line))))
 sys.stderr.flush()
 else:
 line = "run acession %s" % (runid, prjid)
 if length_line < len(line):
 length_line = len(line)
 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line))))
 sys.stderr.flush()
-print "\n"
+print("\n")
-print "\nfound %s run id's" % n
+print("\n"
+"found %s run id's" % n)
 else:
-print "taxon name does not exist"
+print("taxon name does not exist")
 def main():
-parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the sequencing was performed, given a taxon name accepted by the European nucleotide Archive")
+parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the"
+" sequencing was performed, given a taxon name accepted by the"
+" European nucleotide Archive")
 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True)
 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True)
-parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', required=False)
+parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true',
-parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', action='store_true')
+required=False)
-parser.add_argument('--getLibraryType', help='Informs the programme to include library type (examples: PAIRED / SINGLE) in the output', action='store_true')
+parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type'
+' (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output',
+action='store_true')
+parser.add_argument('--getLibraryType', help='Informs the programme to include library type'
+' (examples: PAIRED / SINGLE) in the output', action='store_true')
 args = parser.parse_args()
 getmachine = args.g
 taxonname = args.i[0]
 outputfile = os.path.abspath(args.o[0])
 getOmicsDataType = args.getOmicsDataType
 getLibraryType = args.getLibraryType
-runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True)
+run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True)
 if __name__ == "__main__":
 main()

Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt

comparison scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip