Mercurial > repos > crs4 > edge_pro
annotate get_edge_data.py @ 4:d5464c9e1723 draft
Add support for paired collection of FASTQ (thanks to Inge Alexander Raknes).
| author | crs4 |
|---|---|
| date | Tue, 17 Mar 2015 10:44:33 -0400 |
| parents | f77ce4f92b46 |
| children | 407b894abb08 |
| rev | line source |
|---|---|
| 0 | 1 # -*- coding: utf-8 -*- |
| 2 | |
| 3 from ftplib import FTP | |
| 4 import optparse | |
| 5 import sys | |
| 6 | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
7 class GetData(object): |
| 0 | 8 |
| 9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): | |
| 10 self.gbkid = gbkid | |
| 11 self.fnafile = fnafile | |
| 12 self.pttfile = pttfile | |
| 13 self.rntfile = rntfile | |
| 14 self.gfffile = gfffile | |
| 15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov' | |
| 16 self.folder = '/genomes/Bacteria/' | |
| 17 strainName = self._getStrainName() | |
| 18 print strainName | |
| 19 if not strainName: | |
| 20 sys.exit("Unrecognized RefSeq Genomic Accession ID") | |
| 21 ftp = FTP(self.ftpurl) | |
| 22 ftp.login() | |
| 23 newDir = self.folder + strainName | |
| 24 ftp.cwd(newDir) | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
25 |
| 0 | 26 directoryFiles = [] |
| 27 ftp.retrlines('NLST', directoryFiles.append) | |
| 28 for fileName in directoryFiles: | |
| 29 try: | |
| 30 if '.fna' in fileName and self.gbkid in fileName: | |
| 31 #print "downloading", fileName | |
| 32 with open(self.fnafile, 'w') as outFile: | |
| 33 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 34 elif '.ptt' in fileName and self.gbkid in fileName: | |
| 35 #print "downloading", fileName | |
| 36 with open(self.pttfile, 'w') as outFile: | |
| 37 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 38 elif '.rnt' in fileName and self.gbkid in fileName: | |
| 39 #print "downloading", fileName | |
| 40 with open(self.rntfile, 'w') as outFile: | |
| 41 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 42 elif '.gff' in fileName and self.gbkid in fileName: | |
| 43 #print "downloading", fileName | |
| 44 with open(self.gfffile, 'w') as outFile: | |
| 45 ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 46 #elif '.gbk' in fileName and self.gbkid in fileName: | |
| 47 # print "downloading", fileName | |
| 48 # with open(fileName, 'w') as outFile: | |
| 49 # ftp.retrbinary("RETR " + fileName, outFile.write) | |
| 50 except: | |
| 51 pass | |
| 52 | |
| 53 def _getStrainName(self): | |
| 54 """ """ | |
| 55 ftp = FTP(self.ftpurl) | |
| 56 ftp.login() | |
| 57 ftp.cwd(self.folder) | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
58 |
| 0 | 59 straindirectories = [] |
| 60 ftp.retrlines("NLST " , straindirectories.append) | |
| 61 #print "scanning directories..." | |
| 62 for strainName in straindirectories: | |
| 63 try: | |
| 64 newDir = self.folder + strainName | |
| 65 ftp.cwd(newDir) | |
| 66 strainFiles = [] | |
| 67 ftp.retrlines('NLST', strainFiles.append) | |
| 68 for element in strainFiles: | |
| 69 if self.gbkid in element: | |
| 70 return strainName | |
| 71 except: | |
| 72 pass | |
| 73 return None | |
| 74 | |
| 75 | |
| 76 def __main__(): | |
| 77 parser = optparse.OptionParser() | |
| 78 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') | |
| 79 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') | |
| 80 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') | |
| 81 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') | |
| 82 parser.add_option('--gff', dest='gfffile', help='Output GFF file name') | |
| 83 (options, args) = parser.parse_args() | |
| 84 if len(args) > 0: | |
| 85 parser.error('Wrong number of arguments') | |
|
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
86 |
|
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
87 GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) |
| 0 | 88 |
| 89 | |
| 90 if __name__ == "__main__": | |
| 91 __main__() |
