Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
diff scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
line wrap: on
line diff
--- a/scripts/ReMatCh/modules/seqFromWebTaxon.py Wed Jan 22 09:10:12 2020 -0500 +++ b/scripts/ReMatCh/modules/seqFromWebTaxon.py Tue Jan 28 10:42:31 2020 -0500 @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- @@ -8,41 +8,44 @@ mickaelsilva ''' -import urllib2 import sys -import urllib +import urllib.request +import urllib.parse import xml.etree.ElementTree as ET import time import argparse import os -def runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): - print '\n' + 'Searching RunIDs for ' + taxonname +def run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): + print('\n' + 'Searching RunIDs for ' + taxonname) - taxonname = urllib.quote(taxonname) + taxonname = urllib.parse.quote(taxonname) url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml" try: - content = urllib2.urlopen(url) + content = urllib.request.urlopen(url) xml = content.read() tree = ET.fromstring(xml) taxonid = '' except: - print "Ooops!There might be a problem with the ena service, try later or check if the xml is well formated at " + url + print("Ooops!There might be a problem with the ena service, try later or check if the xml is well formated" + " at " + url) raise for child in tree: taxonid = child.get('taxId') if (taxonid): - print "\n" + "Taxon ID found: " + taxonid - url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + taxonid + "%29%22&result=read_run&display=xml" + print("\n" + "Taxon ID found: " + taxonid) + url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + \ + taxonid + \ + "%29%22&result=read_run&display=xml" - content = urllib2.urlopen(url) + content = urllib.request.urlopen(url) xml = content.read() tree = ET.fromstring(xml) runid = '' n = 0 - with open(outputfile, "wb") as f: + with open(outputfile, "wt") as f: f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n") model = '' prjid = '' @@ -59,7 +62,7 @@ if child2.tag == 'EXPERIMENT_REF': expid = child2.get('accession') url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml" - content = urllib2.urlopen(url2) + content = urllib.request.urlopen(url2) xml = content.read() tree2 = ET.fromstring(xml) try: @@ -87,7 +90,8 @@ libraryType = 'not found' f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n") if print_True: - line = "run acession %s sequenced on %s from project %s for %s %s end data" % (runid, model, prjid, omics, libraryType) + line = "run acession %s sequenced on %s from project %s for %s %s end" \ + " data" % (runid, model, prjid, omics, libraryType) if length_line < len(line): length_line = len(line) sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) @@ -100,20 +104,27 @@ length_line = len(line) sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) sys.stderr.flush() - print "\n" - print "\nfound %s run id's" % n + print("\n") + print("\n" + "found %s run id's" % n) else: - print "taxon name does not exist" + print("taxon name does not exist") def main(): - parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the sequencing was performed, given a taxon name accepted by the European nucleotide Archive") + parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the" + " sequencing was performed, given a taxon name accepted by the" + " European nucleotide Archive") parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True) parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True) - parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', required=False) - parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', action='store_true') - parser.add_argument('--getLibraryType', help='Informs the programme to include library type (examples: PAIRED / SINGLE) in the output', action='store_true') + parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', + required=False) + parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type' + ' (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', + action='store_true') + parser.add_argument('--getLibraryType', help='Informs the programme to include library type' + ' (examples: PAIRED / SINGLE) in the output', action='store_true') args = parser.parse_args() @@ -128,7 +139,7 @@ getOmicsDataType = args.getOmicsDataType getLibraryType = args.getLibraryType - runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) + run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) if __name__ == "__main__":