annotate get_edge_data.py @ 0:7af33315bc5e draft

Uploaded
author crs4
date Mon, 09 Sep 2013 06:11:47 -0400
parents
children f77ce4f92b46
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
1 # -*- coding: utf-8 -*-
7af33315bc5e Uploaded
crs4
parents:
diff changeset
2
7af33315bc5e Uploaded
crs4
parents:
diff changeset
3 from ftplib import FTP
7af33315bc5e Uploaded
crs4
parents:
diff changeset
4 import optparse
7af33315bc5e Uploaded
crs4
parents:
diff changeset
5 import sys
7af33315bc5e Uploaded
crs4
parents:
diff changeset
6
7af33315bc5e Uploaded
crs4
parents:
diff changeset
7 class GetData:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
8
7af33315bc5e Uploaded
crs4
parents:
diff changeset
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile):
7af33315bc5e Uploaded
crs4
parents:
diff changeset
10 self.gbkid = gbkid
7af33315bc5e Uploaded
crs4
parents:
diff changeset
11 self.fnafile = fnafile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
12 self.pttfile = pttfile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
13 self.rntfile = rntfile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
14 self.gfffile = gfffile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov'
7af33315bc5e Uploaded
crs4
parents:
diff changeset
16 self.folder = '/genomes/Bacteria/'
7af33315bc5e Uploaded
crs4
parents:
diff changeset
17
7af33315bc5e Uploaded
crs4
parents:
diff changeset
18 def getData(self):
7af33315bc5e Uploaded
crs4
parents:
diff changeset
19 """ """
7af33315bc5e Uploaded
crs4
parents:
diff changeset
20 strainName = self._getStrainName()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
21 print strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
22 if not strainName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
23 sys.exit("Unrecognized RefSeq Genomic Accession ID")
7af33315bc5e Uploaded
crs4
parents:
diff changeset
24 ftp = FTP(self.ftpurl)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
25 ftp.login()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
26 newDir = self.folder + strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
27 ftp.cwd(newDir)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
28
7af33315bc5e Uploaded
crs4
parents:
diff changeset
29 directoryFiles = []
7af33315bc5e Uploaded
crs4
parents:
diff changeset
30 ftp.retrlines('NLST', directoryFiles.append)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
31 for fileName in directoryFiles:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
32 try:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
33 if '.fna' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
34 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
35 with open(self.fnafile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
36 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
37 elif '.ptt' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
38 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
39 with open(self.pttfile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
40 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
41 elif '.rnt' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
42 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
43 with open(self.rntfile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
44 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
45 elif '.gff' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
46 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
47 with open(self.gfffile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
48 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
49 #elif '.gbk' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
50 # print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
51 # with open(fileName, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
52 # ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
53 except:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
54 pass
7af33315bc5e Uploaded
crs4
parents:
diff changeset
55
7af33315bc5e Uploaded
crs4
parents:
diff changeset
56 def _getStrainName(self):
7af33315bc5e Uploaded
crs4
parents:
diff changeset
57 """ """
7af33315bc5e Uploaded
crs4
parents:
diff changeset
58 ftp = FTP(self.ftpurl)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
59 ftp.login()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
60 ftp.cwd(self.folder)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
61
7af33315bc5e Uploaded
crs4
parents:
diff changeset
62 straindirectories = []
7af33315bc5e Uploaded
crs4
parents:
diff changeset
63 ftp.retrlines("NLST " , straindirectories.append)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
64 #print "scanning directories..."
7af33315bc5e Uploaded
crs4
parents:
diff changeset
65 for strainName in straindirectories:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
66 try:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
67 newDir = self.folder + strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
68 ftp.cwd(newDir)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
69 strainFiles = []
7af33315bc5e Uploaded
crs4
parents:
diff changeset
70 ftp.retrlines('NLST', strainFiles.append)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
71 for element in strainFiles:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
72 if self.gbkid in element:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
73 return strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
74 except:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
75 pass
7af33315bc5e Uploaded
crs4
parents:
diff changeset
76 return None
7af33315bc5e Uploaded
crs4
parents:
diff changeset
77
7af33315bc5e Uploaded
crs4
parents:
diff changeset
78
7af33315bc5e Uploaded
crs4
parents:
diff changeset
79 def __main__():
7af33315bc5e Uploaded
crs4
parents:
diff changeset
80 """ main function """
7af33315bc5e Uploaded
crs4
parents:
diff changeset
81 parser = optparse.OptionParser()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
82 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
83 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
84 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
85 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
86 parser.add_option('--gff', dest='gfffile', help='Output GFF file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
87 (options, args) = parser.parse_args()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
88 if len(args) > 0:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
89 parser.error('Wrong number of arguments')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
90
7af33315bc5e Uploaded
crs4
parents:
diff changeset
91 S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
92 S.getData()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
93
7af33315bc5e Uploaded
crs4
parents:
diff changeset
94
7af33315bc5e Uploaded
crs4
parents:
diff changeset
95 if __name__ == "__main__":
7af33315bc5e Uploaded
crs4
parents:
diff changeset
96 __main__()