Mercurial > repos > crs4 > edge_pro
annotate get_edge_data.py @ 1:f77ce4f92b46 draft
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
author | crs4 |
---|---|
date | Fri, 31 Jan 2014 05:44:03 -0500 |
parents | 7af33315bc5e |
children | 407b894abb08 |
rev | line source |
---|---|
0 | 1 # -*- coding: utf-8 -*- |
2 | |
3 from ftplib import FTP | |
4 import optparse | |
5 import sys | |
6 | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
7 class GetData(object): |
0 | 8 |
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): | |
10 self.gbkid = gbkid | |
11 self.fnafile = fnafile | |
12 self.pttfile = pttfile | |
13 self.rntfile = rntfile | |
14 self.gfffile = gfffile | |
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov' | |
16 self.folder = '/genomes/Bacteria/' | |
17 strainName = self._getStrainName() | |
18 print strainName | |
19 if not strainName: | |
20 sys.exit("Unrecognized RefSeq Genomic Accession ID") | |
21 ftp = FTP(self.ftpurl) | |
22 ftp.login() | |
23 newDir = self.folder + strainName | |
24 ftp.cwd(newDir) | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
25 |
0 | 26 directoryFiles = [] |
27 ftp.retrlines('NLST', directoryFiles.append) | |
28 for fileName in directoryFiles: | |
29 try: | |
30 if '.fna' in fileName and self.gbkid in fileName: | |
31 #print "downloading", fileName | |
32 with open(self.fnafile, 'w') as outFile: | |
33 ftp.retrbinary("RETR " + fileName, outFile.write) | |
34 elif '.ptt' in fileName and self.gbkid in fileName: | |
35 #print "downloading", fileName | |
36 with open(self.pttfile, 'w') as outFile: | |
37 ftp.retrbinary("RETR " + fileName, outFile.write) | |
38 elif '.rnt' in fileName and self.gbkid in fileName: | |
39 #print "downloading", fileName | |
40 with open(self.rntfile, 'w') as outFile: | |
41 ftp.retrbinary("RETR " + fileName, outFile.write) | |
42 elif '.gff' in fileName and self.gbkid in fileName: | |
43 #print "downloading", fileName | |
44 with open(self.gfffile, 'w') as outFile: | |
45 ftp.retrbinary("RETR " + fileName, outFile.write) | |
46 #elif '.gbk' in fileName and self.gbkid in fileName: | |
47 # print "downloading", fileName | |
48 # with open(fileName, 'w') as outFile: | |
49 # ftp.retrbinary("RETR " + fileName, outFile.write) | |
50 except: | |
51 pass | |
52 | |
53 def _getStrainName(self): | |
54 """ """ | |
55 ftp = FTP(self.ftpurl) | |
56 ftp.login() | |
57 ftp.cwd(self.folder) | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
58 |
0 | 59 straindirectories = [] |
60 ftp.retrlines("NLST " , straindirectories.append) | |
61 #print "scanning directories..." | |
62 for strainName in straindirectories: | |
63 try: | |
64 newDir = self.folder + strainName | |
65 ftp.cwd(newDir) | |
66 strainFiles = [] | |
67 ftp.retrlines('NLST', strainFiles.append) | |
68 for element in strainFiles: | |
69 if self.gbkid in element: | |
70 return strainName | |
71 except: | |
72 pass | |
73 return None | |
74 | |
75 | |
76 def __main__(): | |
77 parser = optparse.OptionParser() | |
78 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') | |
79 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') | |
80 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') | |
81 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') | |
82 parser.add_option('--gff', dest='gfffile', help='Output GFF file name') | |
83 (options, args) = parser.parse_args() | |
84 if len(args) > 0: | |
85 parser.error('Wrong number of arguments') | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
86 |
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
87 GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) |
0 | 88 |
89 | |
90 if __name__ == "__main__": | |
91 __main__() |