comparison get_edge_data.py @ 1:f77ce4f92b46 draft

Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
author crs4
date Fri, 31 Jan 2014 05:44:03 -0500
parents 7af33315bc5e
children 407b894abb08
comparison
equal deleted inserted replaced
0:7af33315bc5e 1:f77ce4f92b46
2 2
3 from ftplib import FTP 3 from ftplib import FTP
4 import optparse 4 import optparse
5 import sys 5 import sys
6 6
7 class GetData: 7 class GetData(object):
8 8
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile): 9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile):
10 self.gbkid = gbkid 10 self.gbkid = gbkid
11 self.fnafile = fnafile 11 self.fnafile = fnafile
12 self.pttfile = pttfile 12 self.pttfile = pttfile
13 self.rntfile = rntfile 13 self.rntfile = rntfile
14 self.gfffile = gfffile 14 self.gfffile = gfffile
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov' 15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov'
16 self.folder = '/genomes/Bacteria/' 16 self.folder = '/genomes/Bacteria/'
17
18 def getData(self):
19 """ """
20 strainName = self._getStrainName() 17 strainName = self._getStrainName()
21 print strainName 18 print strainName
22 if not strainName: 19 if not strainName:
23 sys.exit("Unrecognized RefSeq Genomic Accession ID") 20 sys.exit("Unrecognized RefSeq Genomic Accession ID")
24 ftp = FTP(self.ftpurl) 21 ftp = FTP(self.ftpurl)
25 ftp.login() 22 ftp.login()
26 newDir = self.folder + strainName 23 newDir = self.folder + strainName
27 ftp.cwd(newDir) 24 ftp.cwd(newDir)
28 25
29 directoryFiles = [] 26 directoryFiles = []
30 ftp.retrlines('NLST', directoryFiles.append) 27 ftp.retrlines('NLST', directoryFiles.append)
31 for fileName in directoryFiles: 28 for fileName in directoryFiles:
32 try: 29 try:
33 if '.fna' in fileName and self.gbkid in fileName: 30 if '.fna' in fileName and self.gbkid in fileName:
56 def _getStrainName(self): 53 def _getStrainName(self):
57 """ """ 54 """ """
58 ftp = FTP(self.ftpurl) 55 ftp = FTP(self.ftpurl)
59 ftp.login() 56 ftp.login()
60 ftp.cwd(self.folder) 57 ftp.cwd(self.folder)
61 58
62 straindirectories = [] 59 straindirectories = []
63 ftp.retrlines("NLST " , straindirectories.append) 60 ftp.retrlines("NLST " , straindirectories.append)
64 #print "scanning directories..." 61 #print "scanning directories..."
65 for strainName in straindirectories: 62 for strainName in straindirectories:
66 try: 63 try:
75 pass 72 pass
76 return None 73 return None
77 74
78 75
79 def __main__(): 76 def __main__():
80 """ main function """
81 parser = optparse.OptionParser() 77 parser = optparse.OptionParser()
82 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID') 78 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID')
83 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name') 79 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name')
84 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name') 80 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name')
85 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name') 81 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name')
86 parser.add_option('--gff', dest='gfffile', help='Output GFF file name') 82 parser.add_option('--gff', dest='gfffile', help='Output GFF file name')
87 (options, args) = parser.parse_args() 83 (options, args) = parser.parse_args()
88 if len(args) > 0: 84 if len(args) > 0:
89 parser.error('Wrong number of arguments') 85 parser.error('Wrong number of arguments')
90 86
91 S = GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile) 87 GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile)
92 S.getData()
93 88
94 89
95 if __name__ == "__main__": 90 if __name__ == "__main__":
96 __main__() 91 __main__()