Mercurial > repos > crs4 > edge_pro
annotate get_edge_data.py @ 5:407b894abb08 draft
Uploaded
author | crs4 |
---|---|
date | Thu, 03 Nov 2016 10:56:44 -0400 |
parents | f77ce4f92b46 |
children |
rev | line source |
---|---|
0 | 1 # -*- coding: utf-8 -*- |
2 | |
3 from ftplib import FTP | |
4 import optparse | |
5 import sys | |
6 | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
7 class GetData(object): |
0 | 8 |
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): | |
10 self.gbkid = gbkid | |
11 self.fnafile = fnafile | |
12 self.pttfile = pttfile | |
13 self.rntfile = rntfile | |
14 self.gfffile = gfffile | |
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov' | |
5 | 16 #self.folder = '/genomes/Bacteria/' |
17 self.folder = '/genomes/archive/old_refseq/Bacteria/' | |
0 | 18 strainName = self._getStrainName() |
19 print strainName | |
20 if not strainName: | |
21 sys.exit("Unrecognized RefSeq Genomic Accession ID") | |
22 ftp = FTP(self.ftpurl) | |
23 ftp.login() | |
24 newDir = self.folder + strainName | |
25 ftp.cwd(newDir) | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
26 |
0 | 27 directoryFiles = [] |
28 ftp.retrlines('NLST', directoryFiles.append) | |
29 for fileName in directoryFiles: | |
30 try: | |
31 if '.fna' in fileName and self.gbkid in fileName: | |
32 #print "downloading", fileName | |
33 with open(self.fnafile, 'w') as outFile: | |
34 ftp.retrbinary("RETR " + fileName, outFile.write) | |
35 elif '.ptt' in fileName and self.gbkid in fileName: | |
36 #print "downloading", fileName | |
37 with open(self.pttfile, 'w') as outFile: | |
38 ftp.retrbinary("RETR " + fileName, outFile.write) | |
39 elif '.rnt' in fileName and self.gbkid in fileName: | |
40 #print "downloading", fileName | |
41 with open(self.rntfile, 'w') as outFile: | |
42 ftp.retrbinary("RETR " + fileName, outFile.write) | |
43 elif '.gff' in fileName and self.gbkid in fileName: | |
44 #print "downloading", fileName | |
45 with open(self.gfffile, 'w') as outFile: | |
46 ftp.retrbinary("RETR " + fileName, outFile.write) | |
47 #elif '.gbk' in fileName and self.gbkid in fileName: | |
48 # print "downloading", fileName | |
49 # with open(fileName, 'w') as outFile: | |
50 # ftp.retrbinary("RETR " + fileName, outFile.write) | |
51 except: | |
52 pass | |
53 | |
54 def _getStrainName(self): | |
55 """ """ | |
56 ftp = FTP(self.ftpurl) | |
57 ftp.login() | |
58 ftp.cwd(self.folder) | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
59 |
0 | 60 straindirectories = [] |
61 ftp.retrlines("NLST " , straindirectories.append) | |
62 #print "scanning directories..." | |
63 for strainName in straindirectories: | |
64 try: | |
65 newDir = self.folder + strainName | |
66 ftp.cwd(newDir) | |
67 strainFiles = [] | |
68 ftp.retrlines('NLST', strainFiles.append) | |
69 for element in strainFiles: | |
70 if self.gbkid in element: | |
71 return strainName | |
72 except: | |
73 pass | |
74 return None | |
75 | |
76 | |
77 def __main__(): | |
78 parser = optparse.OptionParser() | |
79 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') | |
80 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') | |
81 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') | |
82 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') | |
83 parser.add_option('--gff', dest='gfffile', help='Output GFF file name') | |
84 (options, args) = parser.parse_args() | |
85 if len(args) > 0: | |
86 parser.error('Wrong number of arguments') | |
1
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
87 |
f77ce4f92b46
Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents:
0
diff
changeset
|
88 GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) |
0 | 89 |
90 | |
91 if __name__ == "__main__": | |
92 __main__() |