Mercurial > repos > crs4 > edge_pro
comparison get_edge_data.py @ 0:7af33315bc5e draft
Uploaded
author | crs4 |
---|---|
date | Mon, 09 Sep 2013 06:11:47 -0400 |
parents | |
children | f77ce4f92b46 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7af33315bc5e |
---|---|
1 # -*- coding: utf-8 -*- | |
2 | |
3 from ftplib import FTP | |
4 import optparse | |
5 import sys | |
6 | |
7 class GetData: | |
8 | |
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): | |
10 self.gbkid = gbkid | |
11 self.fnafile = fnafile | |
12 self.pttfile = pttfile | |
13 self.rntfile = rntfile | |
14 self.gfffile = gfffile | |
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov' | |
16 self.folder = '/genomes/Bacteria/' | |
17 | |
18 def getData(self): | |
19 """ """ | |
20 strainName = self._getStrainName() | |
21 print strainName | |
22 if not strainName: | |
23 sys.exit("Unrecognized RefSeq Genomic Accession ID") | |
24 ftp = FTP(self.ftpurl) | |
25 ftp.login() | |
26 newDir = self.folder + strainName | |
27 ftp.cwd(newDir) | |
28 | |
29 directoryFiles = [] | |
30 ftp.retrlines('NLST', directoryFiles.append) | |
31 for fileName in directoryFiles: | |
32 try: | |
33 if '.fna' in fileName and self.gbkid in fileName: | |
34 #print "downloading", fileName | |
35 with open(self.fnafile, 'w') as outFile: | |
36 ftp.retrbinary("RETR " + fileName, outFile.write) | |
37 elif '.ptt' in fileName and self.gbkid in fileName: | |
38 #print "downloading", fileName | |
39 with open(self.pttfile, 'w') as outFile: | |
40 ftp.retrbinary("RETR " + fileName, outFile.write) | |
41 elif '.rnt' in fileName and self.gbkid in fileName: | |
42 #print "downloading", fileName | |
43 with open(self.rntfile, 'w') as outFile: | |
44 ftp.retrbinary("RETR " + fileName, outFile.write) | |
45 elif '.gff' in fileName and self.gbkid in fileName: | |
46 #print "downloading", fileName | |
47 with open(self.gfffile, 'w') as outFile: | |
48 ftp.retrbinary("RETR " + fileName, outFile.write) | |
49 #elif '.gbk' in fileName and self.gbkid in fileName: | |
50 # print "downloading", fileName | |
51 # with open(fileName, 'w') as outFile: | |
52 # ftp.retrbinary("RETR " + fileName, outFile.write) | |
53 except: | |
54 pass | |
55 | |
56 def _getStrainName(self): | |
57 """ """ | |
58 ftp = FTP(self.ftpurl) | |
59 ftp.login() | |
60 ftp.cwd(self.folder) | |
61 | |
62 straindirectories = [] | |
63 ftp.retrlines("NLST " , straindirectories.append) | |
64 #print "scanning directories..." | |
65 for strainName in straindirectories: | |
66 try: | |
67 newDir = self.folder + strainName | |
68 ftp.cwd(newDir) | |
69 strainFiles = [] | |
70 ftp.retrlines('NLST', strainFiles.append) | |
71 for element in strainFiles: | |
72 if self.gbkid in element: | |
73 return strainName | |
74 except: | |
75 pass | |
76 return None | |
77 | |
78 | |
79 def __main__(): | |
80 """ main function """ | |
81 parser = optparse.OptionParser() | |
82 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') | |
83 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') | |
84 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') | |
85 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') | |
86 parser.add_option('--gff', dest='gfffile', help='Output GFF file name') | |
87 (options, args) = parser.parse_args() | |
88 if len(args) > 0: | |
89 parser.error('Wrong number of arguments') | |
90 | |
91 S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) | |
92 S.getData() | |
93 | |
94 | |
95 if __name__ == "__main__": | |
96 __main__() |