comparison GEMBASSY-1.0.3/src/genret.c @ 0:8300eb051bea draft

Initial upload
author ktnyt
date Fri, 26 Jun 2015 05:19:29 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8300eb051bea
1 /******************************************************************************
2 ** @source genret
3 **
4 ** Retrieves various gene related infomration from genome flatfile
5 **
6 ** @author Copyright (C) 2012 Hidetoshi Itaya
7 ** @version 1.0.3
8 ** @modified 2012/1/20 Hidetoshi Itaya Created!
9 ** @modified 2013/6/16 Revision 1
10 ** @modified 2015/2/7 Refactor
11 ** @@
12 **
13 ** This program is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU General Public License
15 ** as published by the Free Software Foundation; either version 2
16 ** of the License, or (at your option) any later version.
17 **
18 ** This program is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ** GNU General Public License for more details.
22 **
23 ** You should have received a copy of the GNU General Public License
24 ** along with this program; if not, write to the Free Software
25 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 ******************************************************************************/
27
28 #include "emboss.h"
29 #include "glibs.h"
30
31
32
33
34 /* @prog genret ***************************************************************
35 **
36 ** Retrieves various gene related infomration from genome flatfile
37 **
38 ******************************************************************************/
39
40 int main(int argc, char *argv[])
41 {
42 embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3");
43
44 AjPSeqall seqall;
45 AjPSeq seq = NULL;
46 AjPStr inseq = NULL;
47 AjPStr gene = NULL;
48 AjPStr access = NULL;
49 AjBool accid = ajTrue;
50 AjPStr argument = NULL;
51 AjPFile outfile = NULL;
52
53 AjPStr seqid = NULL;
54 AjPStr restid = NULL;
55
56 AjBool valid = ajFalse;
57 AjBool isseq = ajFalse;
58 AjBool isgbk = ajFalse;
59
60 AjPFilebuff buff = NULL;
61 AjPFile tmpfile = NULL;
62 AjPStr tmpname = NULL;
63
64 AjPStr regexstr = NULL;
65 AjPStrTok token = NULL;
66 AjPRegexp regex = NULL;
67
68 AjPStr url = NULL;
69 AjPStr base = NULL;
70 AjPStr head = NULL;
71 AjPStr line = NULL;
72
73 seqall = ajAcdGetSeqall("sequence");
74 access = ajAcdGetString("access");
75 gene = ajAcdGetString("gene");
76 argument = ajAcdGetString("argument");
77 accid = ajAcdGetBoolean("accid");
78 outfile = ajAcdGetOutfile("outfile");
79
80 if(
81 ajStrMatchC(access, "translation") ||
82 ajStrMatchC(access, "get_exon") ||
83 ajStrMatchC(access, "get_exons") ||
84 ajStrMatchC(access, "get_cdsseq") ||
85 ajStrMatchC(access, "get_gbkseq") ||
86 ajStrMatchC(access, "get_geneseq") ||
87 ajStrMatchC(access, "get_intron") ||
88 ajStrMatchC(access, "getseq") ||
89 ajStrMatchC(access, "seq") ||
90 ajStrMatchC(access, "around_startcodon") ||
91 ajStrMatchC(access, "around_stopcodon") ||
92 ajStrMatchC(access, "before_startcodon") ||
93 ajStrMatchC(access, "before_stopcodon") ||
94 ajStrMatchC(access, "after_startcodon") ||
95 ajStrMatchC(access, "after_stopcodon")
96 )
97 {
98 isseq = ajTrue;
99 }
100 else if(ajStrMatchC(access, "annotate") ||
101 ajStrMatchC(access, "output"))
102 {
103 isgbk = ajTrue;
104 }
105 else
106 {
107 ajFmtPrintF(outfile, "gene,%S\n", access);
108 }
109
110 base = ajStrNewC("rest.g-language.org");
111
112 ajStrExchangeCC(&argument, " ", "/");
113 ajStrExchangeCC(&argument, ",", "/");
114 ajStrExchangeCC(&argument, "\t", "/");
115 ajStrExchangeCC(&argument, "\r", "/");
116 ajStrExchangeCC(&argument, "\n", "/");
117
118 if(ajStrMatchC(gene, "*"))
119 {
120 ajStrInsertK(&gene, 0, '.');
121 }
122
123 if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::"))
124 {
125 ajStrExchangeCC(&gene, "@", "");
126 ajStrExchangeCC(&gene, "list::", "");
127 ajStrAssignS(&tmpname, gene);
128
129 tmpfile = ajFileNewInNameS(tmpname);
130
131 if(!tmpfile)
132 {
133 ajDie("List file (%S) open error\n", tmpname);
134 }
135
136 gene = ajStrNew();
137
138 while(ajReadline(tmpfile, &line))
139 {
140 ajStrAppendS(&gene, line);
141 }
142
143 ajFileClose(&tmpfile);
144 ajStrDel(&tmpname);
145 ajStrDel(&line);
146 }
147
148 tmpname = ajStrNew();
149 gAssignUniqueName(&tmpname);
150
151 while(ajSeqallNext(seqall, &seq))
152 {
153 inseq = ajStrNew();
154
155 if(!accid)
156 {
157 if(gFormatGenbank(seq, &inseq))
158 {
159 tmpfile = ajFileNewOutNameS(tmpname);
160
161 if(!tmpfile)
162 {
163 ajDie("Output file (%S) open error\n", tmpname);
164 }
165
166 ajFmtPrintF(tmpfile, "%S", inseq);
167
168 ajFileClose(&tmpfile);
169
170 ajFmtPrintS(&url, "http://%S/upload/upl.pl", base);
171
172 gFilePostSS(url, tmpname, &restid);
173
174 ajStrDel(&url);
175
176 ajSysFileUnlinkS(tmpname);
177 }
178 else
179 {
180 ajWarn("Sequence does not have features\n"
181 "Proceeding with sequence accession ID\n");
182 accid = ajTrue;
183 }
184 }
185
186
187 ajStrAssignS(&seqid, ajSeqGetAccS(seq));
188
189 if(ajStrGetLen(seqid) == 0)
190 {
191 ajStrAssignS(&seqid, ajSeqGetNameS(seq));
192 }
193
194 if(ajStrGetLen(seqid) == 0)
195 {
196 ajWarn("No valid header information\n");
197 }
198
199 if(accid)
200 {
201 ajStrAssignS(&restid, seqid);
202 if(ajStrGetLen(seqid) == 0)
203 {
204 ajDie("Cannot proceed without header with -accid\n");
205 }
206
207 if(!gValID(seqid))
208 {
209 ajDie("Invalid accession ID:%S, exiting\n", seqid);
210 }
211 }
212
213 url = ajStrNew();
214
215 if(isgbk)
216 {
217 ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access);
218 }
219 else
220 {
221 ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument);
222 }
223
224 if(!gFilebuffURLS(url, &buff))
225 {
226 ajDie("GET error from %S\n", url);
227 }
228
229 while(ajBuffreadLine(buff, &line))
230 {
231 if(isgbk){
232 ajFmtPrintF(outfile, "%S", line);
233 continue;
234 }
235
236 ajStrRemoveLastNewline(&line);
237
238 regex = ajRegCompC("^>");
239
240 if(ajRegExec(regex, line))
241 {
242 head = ajStrNew();
243
244 ajStrAssignS(&head, line);
245 ajStrTrimStartC(&head, ">");
246
247 valid = ajFalse;
248
249 token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n");
250
251 while(ajStrTokenNextParse(token, &regexstr))
252 {
253 if(ajStrGetLen(regexstr))
254 {
255 regex = ajRegComp(regexstr);
256
257 if(ajRegExec(regex, line))
258 {
259 valid = ajTrue;
260 if(ajStrIsAlnum(regexstr))
261 {
262 ajStrExchangeSC(&gene, regexstr, "");
263 }
264 }
265
266 ajRegFree(&regex);
267 }
268 }
269 }
270 else
271 {
272 if(valid)
273 {
274 if(isseq)
275 {
276 ajStrFmtWrap(&line, 60);
277 ajFmtPrintF(outfile, ">%S\n%S\n", head, line);
278 }
279 else
280 {
281 ajFmtPrintF(outfile, "%S,%S\n", head, line);
282 }
283
284 valid = ajFalse;
285 }
286 }
287 }
288
289 ajFileClose(&outfile);
290
291 ajStrDel(&restid);
292 ajStrDel(&seqid);
293 ajStrDel(&inseq);
294 }
295
296 ajSeqallDel(&seqall);
297 ajSeqDel(&seq);
298 ajStrDel(&access);
299 ajStrDel(&gene);
300
301 embExit();
302 }