comparison scripts/ReMatCh/modules/seqFromWebTaxon.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Tue, 28 Jan 2020 10:42:31 -0500
parents 965517909457
children
comparison
equal deleted inserted replaced
2:6837f733b4aa 3:0cbed1c0a762
1 #!/usr/bin/env python 1 #!/usr/bin/env python3
2 2
3 # -*- coding: utf-8 -*- 3 # -*- coding: utf-8 -*-
4 4
5 ''' 5 '''
6 Adapted from: 6 Adapted from:
7 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py 7 https://github.com/mickaelsilva/pythonscripts/blob/master/SeqOfWeb/SeqFromWebTaxon.py
8 mickaelsilva 8 mickaelsilva
9 ''' 9 '''
10 10
11 import urllib2
12 import sys 11 import sys
13 import urllib 12 import urllib.request
13 import urllib.parse
14 import xml.etree.ElementTree as ET 14 import xml.etree.ElementTree as ET
15 import time 15 import time
16 import argparse 16 import argparse
17 import os 17 import os
18 18
19 19
20 def runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True): 20 def run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, print_True):
21 print '\n' + 'Searching RunIDs for ' + taxonname 21 print('\n' + 'Searching RunIDs for ' + taxonname)
22 22
23 taxonname = urllib.quote(taxonname) 23 taxonname = urllib.parse.quote(taxonname)
24 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml" 24 url = "http://www.ebi.ac.uk/ena/data/view/Taxon%3A" + taxonname + "&display=xml"
25 try: 25 try:
26 content = urllib2.urlopen(url) 26 content = urllib.request.urlopen(url)
27 xml = content.read() 27 xml = content.read()
28 tree = ET.fromstring(xml) 28 tree = ET.fromstring(xml)
29 taxonid = '' 29 taxonid = ''
30 except: 30 except:
31 print "Ooops!There might be a problem with the ena service, try later or check if the xml is well formated at " + url 31 print("Ooops!There might be a problem with the ena service, try later or check if the xml is well formated"
32 " at " + url)
32 raise 33 raise
33 for child in tree: 34 for child in tree:
34 taxonid = child.get('taxId') 35 taxonid = child.get('taxId')
35 if (taxonid): 36 if (taxonid):
36 print "\n" + "Taxon ID found: " + taxonid 37 print("\n" + "Taxon ID found: " + taxonid)
37 url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + taxonid + "%29%22&result=read_run&display=xml" 38 url = "http://www.ebi.ac.uk/ena/data/warehouse/search?query=%22tax_tree%28" + \
39 taxonid + \
40 "%29%22&result=read_run&display=xml"
38 41
39 content = urllib2.urlopen(url) 42 content = urllib.request.urlopen(url)
40 xml = content.read() 43 xml = content.read()
41 tree = ET.fromstring(xml) 44 tree = ET.fromstring(xml)
42 45
43 runid = '' 46 runid = ''
44 n = 0 47 n = 0
45 with open(outputfile, "wb") as f: 48 with open(outputfile, "wt") as f:
46 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n") 49 f.write('#' + str(time.strftime("%d/%m/%Y")) + "\n")
47 model = '' 50 model = ''
48 prjid = '' 51 prjid = ''
49 length_line = 0 52 length_line = 0
50 omics = '' 53 omics = ''
57 if getmachine is True or getOmicsDataType is True or getLibraryType is True: 60 if getmachine is True or getOmicsDataType is True or getLibraryType is True:
58 for child2 in child: 61 for child2 in child:
59 if child2.tag == 'EXPERIMENT_REF': 62 if child2.tag == 'EXPERIMENT_REF':
60 expid = child2.get('accession') 63 expid = child2.get('accession')
61 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml" 64 url2 = "http://www.ebi.ac.uk/ena/data/view/" + expid + "&display=xml"
62 content = urllib2.urlopen(url2) 65 content = urllib.request.urlopen(url2)
63 xml = content.read() 66 xml = content.read()
64 tree2 = ET.fromstring(xml) 67 tree2 = ET.fromstring(xml)
65 try: 68 try:
66 for child3 in tree2: 69 for child3 in tree2:
67 for child4 in child3: 70 for child4 in child3:
85 model = 'not found' 88 model = 'not found'
86 omics = 'not found' 89 omics = 'not found'
87 libraryType = 'not found' 90 libraryType = 'not found'
88 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n") 91 f.write(str(runid) + "\t" + model + "\t" + prjid + "\t" + omics + "\t" + libraryType + "\n")
89 if print_True: 92 if print_True:
90 line = "run acession %s sequenced on %s from project %s for %s %s end data" % (runid, model, prjid, omics, libraryType) 93 line = "run acession %s sequenced on %s from project %s for %s %s end" \
94 " data" % (runid, model, prjid, omics, libraryType)
91 if length_line < len(line): 95 if length_line < len(line):
92 length_line = len(line) 96 length_line = len(line)
93 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) 97 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line))))
94 sys.stderr.flush() 98 sys.stderr.flush()
95 else: 99 else:
98 line = "run acession %s" % (runid, prjid) 102 line = "run acession %s" % (runid, prjid)
99 if length_line < len(line): 103 if length_line < len(line):
100 length_line = len(line) 104 length_line = len(line)
101 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line)))) 105 sys.stderr.write("\r" + line + str(' ' * (length_line - len(line))))
102 sys.stderr.flush() 106 sys.stderr.flush()
103 print "\n" 107 print("\n")
104 print "\nfound %s run id's" % n 108 print("\n"
109 "found %s run id's" % n)
105 110
106 else: 111 else:
107 print "taxon name does not exist" 112 print("taxon name does not exist")
108 113
109 114
110 def main(): 115 def main():
111 parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the sequencing was performed, given a taxon name accepted by the European nucleotide Archive") 116 parser = argparse.ArgumentParser(description="This program gets a list of sequencing runs and machine were the"
117 " sequencing was performed, given a taxon name accepted by the"
118 " European nucleotide Archive")
112 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True) 119 parser.add_argument('-i', nargs=1, type=str, help='taxon name', metavar='"Streptococcus agalactiae"', required=True)
113 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True) 120 parser.add_argument('-o', nargs=1, type=str, help='output file name', required=True)
114 parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true', required=False) 121 parser.add_argument('-g', help='True to include sequencing machine in the output', action='store_true',
115 parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output', action='store_true') 122 required=False)
116 parser.add_argument('--getLibraryType', help='Informs the programme to include library type (examples: PAIRED / SINGLE) in the output', action='store_true') 123 parser.add_argument('--getOmicsDataType', help='Informs the programme to include OMICS data type'
124 ' (examples: GENOMIC / TRANSCRIPTOMIC / SYNTHETIC) in the output',
125 action='store_true')
126 parser.add_argument('--getLibraryType', help='Informs the programme to include library type'
127 ' (examples: PAIRED / SINGLE) in the output', action='store_true')
117 128
118 args = parser.parse_args() 129 args = parser.parse_args()
119 130
120 getmachine = args.g 131 getmachine = args.g
121 taxonname = args.i[0] 132 taxonname = args.i[0]
126 outputfile = os.path.abspath(args.o[0]) 137 outputfile = os.path.abspath(args.o[0])
127 138
128 getOmicsDataType = args.getOmicsDataType 139 getOmicsDataType = args.getOmicsDataType
129 getLibraryType = args.getLibraryType 140 getLibraryType = args.getLibraryType
130 141
131 runSeqFromWebTaxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True) 142 run_seq_from_web_taxon(taxonname, outputfile, getmachine, getOmicsDataType, getLibraryType, True)
132 143
133 144
134 if __name__ == "__main__": 145 if __name__ == "__main__":
135 main() 146 main()