Mercurial > repos > crs4 > edge_pro
annotate get_edge_data.py @ 5:407b894abb08 draft
Uploaded
| author | crs4 |
|---|---|
| date | Thu, 03 Nov 2016 10:56:44 -0400 |
| parents | f77ce4f92b46 |
| children |
| rev | line source |
|---|---|
| 0 | 1 # -*- coding: utf-8 -*- |
| 2 | |
| 3 from ftplib import FTP | |
| 4 import optparse | |
| 5 import sys | |
| 6 | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
7 class GetData(object): |
| 0 | 8 |
| 9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): | |
| 10 self.gbkid = gbkid | |
| 11 self.fnafile = fnafile | |
| 12 self.pttfile = pttfile | |
| 13 self.rntfile = rntfile | |
| 14 self.gfffile = gfffile | |
| 15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov' | |
| 5 | 16 #self.folder = '/genomes/Bacteria/' |
| 17 self.folder = '/genomes/archive/old_refseq/Bacteria/' | |
| 0 | 18 strainName = self._getStrainName() |
| 19 print strainName | |
| 20 if not strainName: | |
| 21 sys.exit("Unrecognized RefSeq Genomic Accession ID") | |
| 22 ftp = FTP(self.ftpurl) | |
| 23 ftp.login() | |
| 24 newDir = self.folder + strainName | |
| 25 ftp.cwd(newDir) | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
26 |
| 0 | 27 directoryFiles = [] |
| 28 ftp.retrlines('NLST', directoryFiles.append) | |
| 29 for fileName in directoryFiles: | |
| 30 try: | |
| 31 if '.fna' in fileName and self.gbkid in fileName: | |
| 32 #print "downloading", fileName | |
| 33 with open(self.fnafile, 'w') as outFile: | |
| 34 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 35 elif '.ptt' in fileName and self.gbkid in fileName: | |
| 36 #print "downloading", fileName | |
| 37 with open(self.pttfile, 'w') as outFile: | |
| 38 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 39 elif '.rnt' in fileName and self.gbkid in fileName: | |
| 40 #print "downloading", fileName | |
| 41 with open(self.rntfile, 'w') as outFile: | |
| 42 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 43 elif '.gff' in fileName and self.gbkid in fileName: | |
| 44 #print "downloading", fileName | |
| 45 with open(self.gfffile, 'w') as outFile: | |
| 46 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 47 #elif '.gbk' in fileName and self.gbkid in fileName: | |
| 48 # print "downloading", fileName | |
| 49 # with open(fileName, 'w') as outFile: | |
| 50 # ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 51 except: | |
| 52 pass | |
| 53 | |
| 54 def _getStrainName(self): | |
| 55 """ """ | |
| 56 ftp = FTP(self.ftpurl) | |
| 57 ftp.login() | |
| 58 ftp.cwd(self.folder) | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
59 |
| 0 | 60 straindirectories = [] |
| 61 ftp.retrlines("NLST " , straindirectories.append) | |
| 62 #print "scanning directories..." | |
| 63 for strainName in straindirectories: | |
| 64 try: | |
| 65 newDir = self.folder + strainName | |
| 66 ftp.cwd(newDir) | |
| 67 strainFiles = [] | |
| 68 ftp.retrlines('NLST', strainFiles.append) | |
| 69 for element in strainFiles: | |
| 70 if self.gbkid in element: | |
| 71 return strainName | |
| 72 except: | |
| 73 pass | |
| 74 return None | |
| 75 | |
| 76 | |
| 77 def __main__(): | |
| 78 parser = optparse.OptionParser() | |
| 79 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') | |
| 80 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') | |
| 81 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') | |
| 82 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') | |
| 83 parser.add_option('--gff', dest='gfffile', help='Output GFF file name') | |
| 84 (options, args) = parser.parse_args() | |
| 85 if len(args) > 0: | |
| 86 parser.error('Wrong number of arguments') | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
87 |
|
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
88 GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) |
| 0 | 89 |
| 90 | |
| 91 if __name__ == "__main__": | |
| 92 __main__() |
