Mercurial > repos > crs4 > edge_pro
diff get_edge_data.py @ 0:7af33315bc5e draft
Uploaded
author | crs4 |
---|---|
date | Mon, 09 Sep 2013 06:11:47 -0400 |
parents | |
children | f77ce4f92b46 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_edge_data.py Mon Sep 09 06:11:47 2013 -0400 @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +from ftplib import FTP +import optparse +import sys + +class GetData: + + def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): + self.gbkid = gbkid + self.fnafile = fnafile + self.pttfile = pttfile + self.rntfile = rntfile + self.gfffile = gfffile + self.ftpurl = 'ftp.ncbi.nlm.nih.gov' + self.folder = '/genomes/Bacteria/' + + def getData(self): + """ """ + strainName = self._getStrainName() + print strainName + if not strainName: + sys.exit("Unrecognized RefSeq Genomic Accession ID") + ftp = FTP(self.ftpurl) + ftp.login() + newDir = self.folder + strainName + ftp.cwd(newDir) + + directoryFiles = [] + ftp.retrlines('NLST', directoryFiles.append) + for fileName in directoryFiles: + try: + if '.fna' in fileName and self.gbkid in fileName: + #print "downloading", fileName + with open(self.fnafile, 'w') as outFile: + ftp.retrbinary("RETR " + fileName, outFile.write) + elif '.ptt' in fileName and self.gbkid in fileName: + #print "downloading", fileName + with open(self.pttfile, 'w') as outFile: + ftp.retrbinary("RETR " + fileName, outFile.write) + elif '.rnt' in fileName and self.gbkid in fileName: + #print "downloading", fileName + with open(self.rntfile, 'w') as outFile: + ftp.retrbinary("RETR " + fileName, outFile.write) + elif '.gff' in fileName and self.gbkid in fileName: + #print "downloading", fileName + with open(self.gfffile, 'w') as outFile: + ftp.retrbinary("RETR " + fileName, outFile.write) + #elif '.gbk' in fileName and self.gbkid in fileName: + # print "downloading", fileName + # with open(fileName, 'w') as outFile: + # ftp.retrbinary("RETR " + fileName, outFile.write) + except: + pass + + def _getStrainName(self): + """ """ + ftp = FTP(self.ftpurl) + ftp.login() + ftp.cwd(self.folder) + + straindirectories = [] + ftp.retrlines("NLST " , straindirectories.append) + #print "scanning directories..." + for strainName in straindirectories: + try: + newDir = self.folder + strainName + ftp.cwd(newDir) + strainFiles = [] + ftp.retrlines('NLST', strainFiles.append) + for element in strainFiles: + if self.gbkid in element: + return strainName + except: + pass + return None + + +def __main__(): + """ main function """ + parser = optparse.OptionParser() + parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') + parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') + parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') + parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') + parser.add_option('--gff', dest='gfffile', help='Output GFF file name') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) + S.getData() + + +if __name__ == "__main__": + __main__()