comparison get_edge_data.py @ 0:7af33315bc5e draft

Uploaded
author crs4
date Mon, 09 Sep 2013 06:11:47 -0400
parents
children f77ce4f92b46
comparison
equal deleted inserted replaced
-1:000000000000 0:7af33315bc5e
1 # -*- coding: utf-8 -*-
2
3 from ftplib import FTP
4 import optparse
5 import sys
6
7 class GetData:
8
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile):
10 self.gbkid = gbkid
11 self.fnafile = fnafile
12 self.pttfile = pttfile
13 self.rntfile = rntfile
14 self.gfffile = gfffile
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov'
16 self.folder = '/genomes/Bacteria/'
17
18 def getData(self):
19 """ """
20 strainName = self._getStrainName()
21 print strainName
22 if not strainName:
23 sys.exit("Unrecognized RefSeq Genomic Accession ID")
24 ftp = FTP(self.ftpurl)
25 ftp.login()
26 newDir = self.folder + strainName
27 ftp.cwd(newDir)
28
29 directoryFiles = []
30 ftp.retrlines('NLST', directoryFiles.append)
31 for fileName in directoryFiles:
32 try:
33 if '.fna' in fileName and self.gbkid in fileName:
34 #print "downloading", fileName
35 with open(self.fnafile, 'w') as outFile:
36 ftp.retrbinary("RETR " + fileName, outFile.write)
37 elif '.ptt' in fileName and self.gbkid in fileName:
38 #print "downloading", fileName
39 with open(self.pttfile, 'w') as outFile:
40 ftp.retrbinary("RETR " + fileName, outFile.write)
41 elif '.rnt' in fileName and self.gbkid in fileName:
42 #print "downloading", fileName
43 with open(self.rntfile, 'w') as outFile:
44 ftp.retrbinary("RETR " + fileName, outFile.write)
45 elif '.gff' in fileName and self.gbkid in fileName:
46 #print "downloading", fileName
47 with open(self.gfffile, 'w') as outFile:
48 ftp.retrbinary("RETR " + fileName, outFile.write)
49 #elif '.gbk' in fileName and self.gbkid in fileName:
50 # print "downloading", fileName
51 # with open(fileName, 'w') as outFile:
52 # ftp.retrbinary("RETR " + fileName, outFile.write)
53 except:
54 pass
55
56 def _getStrainName(self):
57 """ """
58 ftp = FTP(self.ftpurl)
59 ftp.login()
60 ftp.cwd(self.folder)
61
62 straindirectories = []
63 ftp.retrlines("NLST " , straindirectories.append)
64 #print "scanning directories..."
65 for strainName in straindirectories:
66 try:
67 newDir = self.folder + strainName
68 ftp.cwd(newDir)
69 strainFiles = []
70 ftp.retrlines('NLST', strainFiles.append)
71 for element in strainFiles:
72 if self.gbkid in element:
73 return strainName
74 except:
75 pass
76 return None
77
78
79 def __main__():
80 """ main function """
81 parser = optparse.OptionParser()
82 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID')
83 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name')
84 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name')
85 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name')
86 parser.add_option('--gff', dest='gfffile', help='Output GFF file name')
87 (options, args) = parser.parse_args()
88 if len(args) > 0:
89 parser.error('Wrong number of arguments')
90
91 S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile)
92 S.getData()
93
94
95 if __name__ == "__main__":
96 __main__()