Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
comparison
equal
deleted
inserted
replaced
2:6837f733b4aa | 3:0cbed1c0a762 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python3 |
2 | 2 |
3 # -*- coding: utf-8 -*- | 3 # -*- coding: utf-8 -*- |
4 | 4 |
5 ''' | 5 ''' |
6 Adapted from: | 6 Adapted from: |
7 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py | 7 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py |
8 mickaelsilva | 8 mickaelsilva |
9 ''' | 9 ''' |
10 | 10 |
11 import urllib2 | |
12 import sys | 11 import sys |
13 import urllib | 12 import urllib.request |
13 import urllib.parse | |
14 import xml.etree.ElementTree as ET | 14 import xml.etree.ElementTree as ET |
15 import time | 15 import time |
16 import argparse | 16 import argparse |
17 import os | 17 import os |
18 | 18 |
19 | 19 |
20 def runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): | 20 def run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): |
21 print '\n' + 'Searching RunIDs for ' + taxonname | 21 print('\n' + 'Searching RunIDs for ' + taxonname) |
22 | 22 |
23 taxonname = urllib.quote(taxonname) | 23 taxonname = urllib.parse.quote(taxonname) |
24 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml" | 24 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml" |
25 try: | 25 try: |
26 content = urllib2.urlopen(url) | 26 content = urllib.request.urlopen(url) |
27 xml = content.read() | 27 xml = content.read() |
28 tree = ET.fromstring(xml) | 28 tree = ET.fromstring(xml) |
29 taxonid = '' | 29 taxonid = '' |
30 except: | 30 except: |
31 print "Ooops!There might be a problem with the ena service, try later or check if the xml is well formated at " + url | 31 print("Ooops!There might be a problem with the ena service, try later or check if the xml is well formated" |
32 " at " + url) | |
32 raise | 33 raise |
33 for child in tree: | 34 for child in tree: |
34 taxonid = child.get('taxId') | 35 taxonid = child.get('taxId') |
35 if (taxonid): | 36 if (taxonid): |
36 print "\n" + "Taxon ID found: " + taxonid | 37 print("\n" + "Taxon ID found: " + taxonid) |
37 url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + taxonid + "%29%22&result=read_run&display=xml" | 38 url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + \ |
39 taxonid + \ | |
40 "%29%22&result=read_run&display=xml" | |
38 | 41 |
39 content = urllib2.urlopen(url) | 42 content = urllib.request.urlopen(url) |
40 xml = content.read() | 43 xml = content.read() |
41 tree = ET.fromstring(xml) | 44 tree = ET.fromstring(xml) |
42 | 45 |
43 runid = '' | 46 runid = '' |
44 n = 0 | 47 n = 0 |
45 with open(outputfile, "wb") as f: | 48 with open(outputfile, "wt") as f: |
46 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n") | 49 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n") |
47 model = '' | 50 model = '' |
48 prjid = '' | 51 prjid = '' |
49 length_line = 0 | 52 length_line = 0 |
50 omics = '' | 53 omics = '' |
57 if getmachine is True or getOmicsDataType is True or getLibraryType is True: | 60 if getmachine is True or getOmicsDataType is True or getLibraryType is True: |
58 for child2 in child: | 61 for child2 in child: |
59 if child2.tag == 'EXPERIMENT_REF': | 62 if child2.tag == 'EXPERIMENT_REF': |
60 expid = child2.get('accession') | 63 expid = child2.get('accession') |
61 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml" | 64 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml" |
62 content = urllib2.urlopen(url2) | 65 content = urllib.request.urlopen(url2) |
63 xml = content.read() | 66 xml = content.read() |
64 tree2 = ET.fromstring(xml) | 67 tree2 = ET.fromstring(xml) |
65 try: | 68 try: |
66 for child3 in tree2: | 69 for child3 in tree2: |
67 for child4 in child3: | 70 for child4 in child3: |
85 model = 'not found' | 88 model = 'not found' |
86 omics = 'not found' | 89 omics = 'not found' |
87 libraryType = 'not found' | 90 libraryType = 'not found' |
88 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n") | 91 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n") |
89 if print_True: | 92 if print_True: |
90 line = "run acession %s sequenced on %s from project %s for %s %s end data" % (runid, model, prjid, omics, libraryType) | 93 line = "run acession %s sequenced on %s from project %s for %s %s end" \ |
94 " data" % (runid, model, prjid, omics, libraryType) | |
91 if length_line < len(line): | 95 if length_line < len(line): |
92 length_line = len(line) | 96 length_line = len(line) |
93 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) | 97 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) |
94 sys.stderr.flush() | 98 sys.stderr.flush() |
95 else: | 99 else: |
98 line = "run acession %s" % (runid, prjid) | 102 line = "run acession %s" % (runid, prjid) |
99 if length_line < len(line): | 103 if length_line < len(line): |
100 length_line = len(line) | 104 length_line = len(line) |
101 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) | 105 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) |
102 sys.stderr.flush() | 106 sys.stderr.flush() |
103 print "\n" | 107 print("\n") |
104 print "\nfound %s run id's" % n | 108 print("\n" |
109 "found %s run id's" % n) | |
105 | 110 |
106 else: | 111 else: |
107 print "taxon name does not exist" | 112 print("taxon name does not exist") |
108 | 113 |
109 | 114 |
110 def main(): | 115 def main(): |
111 parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the sequencing was performed, given a taxon name accepted by the European nucleotide Archive") | 116 parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the" |
117 " sequencing was performed, given a taxon name accepted by the" | |
118 " European nucleotide Archive") | |
112 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True) | 119 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True) |
113 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True) | 120 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True) |
114 parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', required=False) | 121 parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', |
115 parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', action='store_true') | 122 required=False) |
116 parser.add_argument('--getLibraryType', help='Informs the programme to include library type (examples: PAIRED / SINGLE) in the output', action='store_true') | 123 parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type' |
124 ' (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', | |
125 action='store_true') | |
126 parser.add_argument('--getLibraryType', help='Informs the programme to include library type' | |
127 ' (examples: PAIRED / SINGLE) in the output', action='store_true') | |
117 | 128 |
118 args = parser.parse_args() | 129 args = parser.parse_args() |
119 | 130 |
120 getmachine = args.g | 131 getmachine = args.g |
121 taxonname = args.i[0] | 132 taxonname = args.i[0] |
126 outputfile = os.path.abspath(args.o[0]) | 137 outputfile = os.path.abspath(args.o[0]) |
127 | 138 |
128 getOmicsDataType = args.getOmicsDataType | 139 getOmicsDataType = args.getOmicsDataType |
129 getLibraryType = args.getLibraryType | 140 getLibraryType = args.getLibraryType |
130 | 141 |
131 runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) | 142 run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) |
132 | 143 |
133 | 144 |
134 if __name__ == "__main__": | 145 if __name__ == "__main__": |
135 main() | 146 main() |