annotate get_edge_data.py @ 6:e6e6f5f9b9c4 draft default tip

planemo upload for repository https://bitbucket.org/crs4/orione-tools/src/bb-orione-tools/custom/edge_pro/ commit 02541df6373bc61be0cdc0617c8549cf8b49b832
author crs4
date Thu, 31 Aug 2017 12:54:08 -0400
parents 407b894abb08
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
1 # -*- coding: utf-8 -*-
7af33315bc5e Uploaded
crs4
parents:
diff changeset
2
7af33315bc5e Uploaded
crs4
parents:
diff changeset
3 from ftplib import FTP
7af33315bc5e Uploaded
crs4
parents:
diff changeset
4 import optparse
7af33315bc5e Uploaded
crs4
parents:
diff changeset
5 import sys
7af33315bc5e Uploaded
crs4
parents:
diff changeset
6
1
f77ce4f92b46 Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents: 0
diff changeset
7 class GetData(object):
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
8
7af33315bc5e Uploaded
crs4
parents:
diff changeset
9 def __init__(self, gbkid, fnafile, pttfile, rntfile, gfffile):
7af33315bc5e Uploaded
crs4
parents:
diff changeset
10 self.gbkid = gbkid
7af33315bc5e Uploaded
crs4
parents:
diff changeset
11 self.fnafile = fnafile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
12 self.pttfile = pttfile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
13 self.rntfile = rntfile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
14 self.gfffile = gfffile
7af33315bc5e Uploaded
crs4
parents:
diff changeset
15 self.ftpurl = 'ftp.ncbi.nlm.nih.gov'
5
407b894abb08 Uploaded
crs4
parents: 1
diff changeset
16 #self.folder = '/genomes/Bacteria/'
407b894abb08 Uploaded
crs4
parents: 1
diff changeset
17 self.folder = '/genomes/archive/old_refseq/Bacteria/'
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
18 strainName = self._getStrainName()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
19 print strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
20 if not strainName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
21 sys.exit("Unrecognized RefSeq Genomic Accession ID")
7af33315bc5e Uploaded
crs4
parents:
diff changeset
22 ftp = FTP(self.ftpurl)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
23 ftp.login()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
24 newDir = self.folder + strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
25 ftp.cwd(newDir)
1
f77ce4f92b46 Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents: 0
diff changeset
26
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
27 directoryFiles = []
7af33315bc5e Uploaded
crs4
parents:
diff changeset
28 ftp.retrlines('NLST', directoryFiles.append)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
29 for fileName in directoryFiles:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
30 try:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
31 if '.fna' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
32 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
33 with open(self.fnafile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
34 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
35 elif '.ptt' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
36 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
37 with open(self.pttfile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
38 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
39 elif '.rnt' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
40 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
41 with open(self.rntfile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
42 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
43 elif '.gff' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
44 #print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
45 with open(self.gfffile, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
46 ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
47 #elif '.gbk' in fileName and self.gbkid in fileName:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
48 # print "downloading", fileName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
49 # with open(fileName, 'w') as outFile:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
50 # ftp.retrbinary("RETR " + fileName, outFile.write)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
51 except:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
52 pass
7af33315bc5e Uploaded
crs4
parents:
diff changeset
53
7af33315bc5e Uploaded
crs4
parents:
diff changeset
54 def _getStrainName(self):
7af33315bc5e Uploaded
crs4
parents:
diff changeset
55 """ """
7af33315bc5e Uploaded
crs4
parents:
diff changeset
56 ftp = FTP(self.ftpurl)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
57 ftp.login()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
58 ftp.cwd(self.folder)
1
f77ce4f92b46 Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents: 0
diff changeset
59
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
60 straindirectories = []
7af33315bc5e Uploaded
crs4
parents:
diff changeset
61 ftp.retrlines("NLST " , straindirectories.append)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
62 #print "scanning directories..."
7af33315bc5e Uploaded
crs4
parents:
diff changeset
63 for strainName in straindirectories:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
64 try:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
65 newDir = self.folder + strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
66 ftp.cwd(newDir)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
67 strainFiles = []
7af33315bc5e Uploaded
crs4
parents:
diff changeset
68 ftp.retrlines('NLST', strainFiles.append)
7af33315bc5e Uploaded
crs4
parents:
diff changeset
69 for element in strainFiles:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
70 if self.gbkid in element:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
71 return strainName
7af33315bc5e Uploaded
crs4
parents:
diff changeset
72 except:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
73 pass
7af33315bc5e Uploaded
crs4
parents:
diff changeset
74 return None
7af33315bc5e Uploaded
crs4
parents:
diff changeset
75
7af33315bc5e Uploaded
crs4
parents:
diff changeset
76
7af33315bc5e Uploaded
crs4
parents:
diff changeset
77 def __main__():
7af33315bc5e Uploaded
crs4
parents:
diff changeset
78 parser = optparse.OptionParser()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
79 parser.add_option('-i', dest='gbkid', help='RefSeq Genomic Accession ID')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
80 parser.add_option('--fna', dest='fnafile', help='Output FASTA file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
81 parser.add_option('--ptt', dest='pttfile', help='Output PTT file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
82 parser.add_option('--rnt', dest='rntfile', help='Output RNT file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
83 parser.add_option('--gff', dest='gfffile', help='Output GFF file name')
7af33315bc5e Uploaded
crs4
parents:
diff changeset
84 (options, args) = parser.parse_args()
7af33315bc5e Uploaded
crs4
parents:
diff changeset
85 if len(args) > 0:
7af33315bc5e Uploaded
crs4
parents:
diff changeset
86 parser.error('Wrong number of arguments')
1
f77ce4f92b46 Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents: 0
diff changeset
87
f77ce4f92b46 Use $GALAXY_SLOTS instead of $EDGE_PRO_SITE_OPTIONS. Add dependency on bowtie2. Add readme.rst .
crs4
parents: 0
diff changeset
88 GetData(options.gbkid, options.fnafile, options.pttfile, options.rntfile, options.gfffile)
0
7af33315bc5e Uploaded
crs4
parents:
diff changeset
89
7af33315bc5e Uploaded
crs4
parents:
diff changeset
90
7af33315bc5e Uploaded
crs4
parents:
diff changeset
91 if __name__ == "__main__":
7af33315bc5e Uploaded
crs4
parents:
diff changeset
92 __main__()