0
|
1 # -*- coding: utf-8 -*-
|
|
2
|
|
3 from ftplib import FTP
|
|
4 import optparse
|
|
5 import sys
|
|
6
|
|
7 class GetData:
|
|
8
|
|
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile):
|
|
10 self.gbkid = gbkid
|
|
11 self.fnafile = fnafile
|
|
12 self.pttfile = pttfile
|
|
13 self.rntfile = rntfile
|
|
14 self.gfffile = gfffile
|
|
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov'
|
|
16 self.folder = '/genomes/Bacteria/'
|
|
17
|
|
18 def getData(self):
|
|
19 """ """
|
|
20 strainName = self._getStrainName()
|
|
21 print strainName
|
|
22 if not strainName:
|
|
23 sys.exit("Unrecognized RefSeq Genomic Accession ID")
|
|
24 ftp = FTP(self.ftpurl)
|
|
25 ftp.login()
|
|
26 newDir = self.folder + strainName
|
|
27 ftp.cwd(newDir)
|
|
28
|
|
29 directoryFiles = []
|
|
30 ftp.retrlines('NLST', directoryFiles.append)
|
|
31 for fileName in directoryFiles:
|
|
32 try:
|
|
33 if '.fna' in fileName and self.gbkid in fileName:
|
|
34 #print "downloading", fileName
|
|
35 with open(self.fnafile, 'w') as outFile:
|
|
36 ftp.retrbinary("RETR " + fileName, outFile.write)
|
|
37 elif '.ptt' in fileName and self.gbkid in fileName:
|
|
38 #print "downloading", fileName
|
|
39 with open(self.pttfile, 'w') as outFile:
|
|
40 ftp.retrbinary("RETR " + fileName, outFile.write)
|
|
41 elif '.rnt' in fileName and self.gbkid in fileName:
|
|
42 #print "downloading", fileName
|
|
43 with open(self.rntfile, 'w') as outFile:
|
|
44 ftp.retrbinary("RETR " + fileName, outFile.write)
|
|
45 elif '.gff' in fileName and self.gbkid in fileName:
|
|
46 #print "downloading", fileName
|
|
47 with open(self.gfffile, 'w') as outFile:
|
|
48 ftp.retrbinary("RETR " + fileName, outFile.write)
|
|
49 #elif '.gbk' in fileName and self.gbkid in fileName:
|
|
50 # print "downloading", fileName
|
|
51 # with open(fileName, 'w') as outFile:
|
|
52 # ftp.retrbinary("RETR " + fileName, outFile.write)
|
|
53 except:
|
|
54 pass
|
|
55
|
|
56 def _getStrainName(self):
|
|
57 """ """
|
|
58 ftp = FTP(self.ftpurl)
|
|
59 ftp.login()
|
|
60 ftp.cwd(self.folder)
|
|
61
|
|
62 straindirectories = []
|
|
63 ftp.retrlines("NLST " , straindirectories.append)
|
|
64 #print "scanning directories..."
|
|
65 for strainName in straindirectories:
|
|
66 try:
|
|
67 newDir = self.folder + strainName
|
|
68 ftp.cwd(newDir)
|
|
69 strainFiles = []
|
|
70 ftp.retrlines('NLST', strainFiles.append)
|
|
71 for element in strainFiles:
|
|
72 if self.gbkid in element:
|
|
73 return strainName
|
|
74 except:
|
|
75 pass
|
|
76 return None
|
|
77
|
|
78
|
|
79 def __main__():
|
|
80 """ main function """
|
|
81 parser = optparse.OptionParser()
|
|
82 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID')
|
|
83 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name')
|
|
84 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name')
|
|
85 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name')
|
|
86 parser.add_option('--gff', dest='gfffile', help='Output GFF file name')
|
|
87 (options, args) = parser.parse_args()
|
|
88 if len(args) > 0:
|
|
89 parser.error('Wrong number of arguments')
|
|
90
|
|
91 S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile)
|
|
92 S.getData()
|
|
93
|
|
94
|
|
95 if __name__ == "__main__":
|
|
96 __main__()
|