Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
| author | cstrittmatter |
|---|---|
| date | Tue, 28 Jan 2020 10:42:31 -0500 |
| parents | 965517909457 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:6837f733b4aa | 3:0cbed1c0a762 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python3 |
| 2 | 2 |
| 3 # -*- coding: utf-8 -*- | 3 # -*- coding: utf-8 -*- |
| 4 | 4 |
| 5 ''' | 5 ''' |
| 6 Adapted from: | 6 Adapted from: |
| 7 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py | 7 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py |
| 8 mickaelsilva | 8 mickaelsilva |
| 9 ''' | 9 ''' |
| 10 | 10 |
| 11 import urllib2 | |
| 12 import sys | 11 import sys |
| 13 import urllib | 12 import urllib.request |
| 13 import urllib.parse | |
| 14 import xml.etree.ElementTree as ET | 14 import xml.etree.ElementTree as ET |
| 15 import time | 15 import time |
| 16 import argparse | 16 import argparse |
| 17 import os | 17 import os |
| 18 | 18 |
| 19 | 19 |
| 20 def runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): | 20 def run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): |
| 21 print '\n' + 'Searching RunIDs for ' + taxonname | 21 print('\n' + 'Searching RunIDs for ' + taxonname) |
| 22 | 22 |
| 23 taxonname = urllib.quote(taxonname) | 23 taxonname = urllib.parse.quote(taxonname) |
| 24 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml" | 24 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml" |
| 25 try: | 25 try: |
| 26 content = urllib2.urlopen(url) | 26 content = urllib.request.urlopen(url) |
| 27 xml = content.read() | 27 xml = content.read() |
| 28 tree = ET.fromstring(xml) | 28 tree = ET.fromstring(xml) |
| 29 taxonid = '' | 29 taxonid = '' |
| 30 except: | 30 except: |
| 31 print "Ooops!There might be a problem with the ena service, try later or check if the xml is well formated at " + url | 31 print("Ooops!There might be a problem with the ena service, try later or check if the xml is well formated" |
| 32 " at " + url) | |
| 32 raise | 33 raise |
| 33 for child in tree: | 34 for child in tree: |
| 34 taxonid = child.get('taxId') | 35 taxonid = child.get('taxId') |
| 35 if (taxonid): | 36 if (taxonid): |
| 36 print "\n" + "Taxon ID found: " + taxonid | 37 print("\n" + "Taxon ID found: " + taxonid) |
| 37 url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + taxonid + "%29%22&result=read_run&display=xml" | 38 url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + \ |
| 39 taxonid + \ | |
| 40 "%29%22&result=read_run&display=xml" | |
| 38 | 41 |
| 39 content = urllib2.urlopen(url) | 42 content = urllib.request.urlopen(url) |
| 40 xml = content.read() | 43 xml = content.read() |
| 41 tree = ET.fromstring(xml) | 44 tree = ET.fromstring(xml) |
| 42 | 45 |
| 43 runid = '' | 46 runid = '' |
| 44 n = 0 | 47 n = 0 |
| 45 with open(outputfile, "wb") as f: | 48 with open(outputfile, "wt") as f: |
| 46 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n") | 49 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n") |
| 47 model = '' | 50 model = '' |
| 48 prjid = '' | 51 prjid = '' |
| 49 length_line = 0 | 52 length_line = 0 |
| 50 omics = '' | 53 omics = '' |
| 57 if getmachine is True or getOmicsDataType is True or getLibraryType is True: | 60 if getmachine is True or getOmicsDataType is True or getLibraryType is True: |
| 58 for child2 in child: | 61 for child2 in child: |
| 59 if child2.tag == 'EXPERIMENT_REF': | 62 if child2.tag == 'EXPERIMENT_REF': |
| 60 expid = child2.get('accession') | 63 expid = child2.get('accession') |
| 61 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml" | 64 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml" |
| 62 content = urllib2.urlopen(url2) | 65 content = urllib.request.urlopen(url2) |
| 63 xml = content.read() | 66 xml = content.read() |
| 64 tree2 = ET.fromstring(xml) | 67 tree2 = ET.fromstring(xml) |
| 65 try: | 68 try: |
| 66 for child3 in tree2: | 69 for child3 in tree2: |
| 67 for child4 in child3: | 70 for child4 in child3: |
| 85 model = 'not found' | 88 model = 'not found' |
| 86 omics = 'not found' | 89 omics = 'not found' |
| 87 libraryType = 'not found' | 90 libraryType = 'not found' |
| 88 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n") | 91 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n") |
| 89 if print_True: | 92 if print_True: |
| 90 line = "run acession %s sequenced on %s from project %s for %s %s end data" % (runid, model, prjid, omics, libraryType) | 93 line = "run acession %s sequenced on %s from project %s for %s %s end" \ |
| 94 " data" % (runid, model, prjid, omics, libraryType) | |
| 91 if length_line < len(line): | 95 if length_line < len(line): |
| 92 length_line = len(line) | 96 length_line = len(line) |
| 93 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) | 97 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) |
| 94 sys.stderr.flush() | 98 sys.stderr.flush() |
| 95 else: | 99 else: |
| 98 line = "run acession %s" % (runid, prjid) | 102 line = "run acession %s" % (runid, prjid) |
| 99 if length_line < len(line): | 103 if length_line < len(line): |
| 100 length_line = len(line) | 104 length_line = len(line) |
| 101 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) | 105 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) |
| 102 sys.stderr.flush() | 106 sys.stderr.flush() |
| 103 print "\n" | 107 print("\n") |
| 104 print "\nfound %s run id's" % n | 108 print("\n" |
| 109 "found %s run id's" % n) | |
| 105 | 110 |
| 106 else: | 111 else: |
| 107 print "taxon name does not exist" | 112 print("taxon name does not exist") |
| 108 | 113 |
| 109 | 114 |
| 110 def main(): | 115 def main(): |
| 111 parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the sequencing was performed, given a taxon name accepted by the European nucleotide Archive") | 116 parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the" |
| 117 " sequencing was performed, given a taxon name accepted by the" | |
| 118 " European nucleotide Archive") | |
| 112 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True) | 119 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True) |
| 113 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True) | 120 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True) |
| 114 parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', required=False) | 121 parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', |
| 115 parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', action='store_true') | 122 required=False) |
| 116 parser.add_argument('--getLibraryType', help='Informs the programme to include library type (examples: PAIRED / SINGLE) in the output', action='store_true') | 123 parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type' |
| 124 ' (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', | |
| 125 action='store_true') | |
| 126 parser.add_argument('--getLibraryType', help='Informs the programme to include library type' | |
| 127 ' (examples: PAIRED / SINGLE) in the output', action='store_true') | |
| 117 | 128 |
| 118 args = parser.parse_args() | 129 args = parser.parse_args() |
| 119 | 130 |
| 120 getmachine = args.g | 131 getmachine = args.g |
| 121 taxonname = args.i[0] | 132 taxonname = args.i[0] |
| 126 outputfile = os.path.abspath(args.o[0]) | 137 outputfile = os.path.abspath(args.o[0]) |
| 127 | 138 |
| 128 getOmicsDataType = args.getOmicsDataType | 139 getOmicsDataType = args.getOmicsDataType |
| 129 getLibraryType = args.getLibraryType | 140 getLibraryType = args.getLibraryType |
| 130 | 141 |
| 131 runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) | 142 run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) |
| 132 | 143 |
| 133 | 144 |
| 134 if __name__ == "__main__": | 145 if __name__ == "__main__": |
| 135 main() | 146 main() |
