Mercurial > repos > crs4 > edge_pro
view get_edge_data.py @ 0:7af33315bc5e draft
Uploaded
author | crs4 |
---|---|
date | Mon, 09 Sep 2013 06:11:47 -0400 |
parents | |
children | f77ce4f92b46 |
line wrap: on
line source
# -*- coding: utf-8 -*- from ftplib import FTP import optparse import sys class GetData: def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): self.gbkid = gbkid self.fnafile = fnafile self.pttfile = pttfile self.rntfile = rntfile self.gfffile = gfffile self.ftpurl = 'ftp.ncbi.nlm.nih.gov' self.folder = '/genomes/Bacteria/' def getData(self): """ """ strainName = self._getStrainName() print strainName if not strainName: sys.exit("Unrecognized RefSeq Genomic Accession ID") ftp = FTP(self.ftpurl) ftp.login() newDir = self.folder + strainName ftp.cwd(newDir) directoryFiles = [] ftp.retrlines('NLST', directoryFiles.append) for fileName in directoryFiles: try: if '.fna' in fileName and self.gbkid in fileName: #print "downloading", fileName with open(self.fnafile, 'w') as outFile: ftp.retrbinary("RETR " + fileName, outFile.write) elif '.ptt' in fileName and self.gbkid in fileName: #print "downloading", fileName with open(self.pttfile, 'w') as outFile: ftp.retrbinary("RETR " + fileName, outFile.write) elif '.rnt' in fileName and self.gbkid in fileName: #print "downloading", fileName with open(self.rntfile, 'w') as outFile: ftp.retrbinary("RETR " + fileName, outFile.write) elif '.gff' in fileName and self.gbkid in fileName: #print "downloading", fileName with open(self.gfffile, 'w') as outFile: ftp.retrbinary("RETR " + fileName, outFile.write) #elif '.gbk' in fileName and self.gbkid in fileName: # print "downloading", fileName # with open(fileName, 'w') as outFile: # ftp.retrbinary("RETR " + fileName, outFile.write) except: pass def _getStrainName(self): """ """ ftp = FTP(self.ftpurl) ftp.login() ftp.cwd(self.folder) straindirectories = [] ftp.retrlines("NLST " , straindirectories.append) #print "scanning directories..." for strainName in straindirectories: try: newDir = self.folder + strainName ftp.cwd(newDir) strainFiles = [] ftp.retrlines('NLST', strainFiles.append) for element in strainFiles: if self.gbkid in element: return strainName except: pass return None def __main__(): """ main function """ parser = optparse.OptionParser() parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') parser.add_option('--gff', dest='gfffile', help='Output GFF file name') (options, args) = parser.parse_args() if len(args) > 0: parser.error('Wrong number of arguments') S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) S.getData() if __name__ == "__main__": __main__()