0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 by Genome Research Ltd (GRL).
|
|
4 2010 by Attractive Chaos <attractor@live.co.uk>
|
|
5
|
|
6 Permission is hereby granted, free of charge, to any person obtaining
|
|
7 a copy of this software and associated documentation files (the
|
|
8 "Software"), to deal in the Software without restriction, including
|
|
9 without limitation the rights to use, copy, modify, merge, publish,
|
|
10 distribute, sublicense, and/or sell copies of the Software, and to
|
|
11 permit persons to whom the Software is furnished to do so, subject to
|
|
12 the following conditions:
|
|
13
|
|
14 The above copyright notice and this permission notice shall be
|
|
15 included in all copies or substantial portions of the Software.
|
|
16
|
|
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
24 SOFTWARE.
|
|
25 */
|
|
26
|
|
27 /* Probably I will not do socket programming in the next few years and
|
|
28 therefore I decide to heavily annotate this file, for Linux and
|
|
29 Windows as well. -ac */
|
|
30
|
|
31 #include <time.h>
|
|
32 #include <stdio.h>
|
|
33 #include <ctype.h>
|
|
34 #include <stdlib.h>
|
|
35 #include <string.h>
|
|
36 #include <errno.h>
|
|
37 #include <unistd.h>
|
|
38 #include <sys/types.h>
|
|
39
|
|
40 #ifndef _WIN32
|
|
41 #include <netdb.h>
|
|
42 #include <arpa/inet.h>
|
|
43 #include <sys/socket.h>
|
|
44 #endif
|
|
45
|
|
46 #include "htslib/knetfile.h"
|
|
47
|
|
48 /* In winsock.h, the type of a socket is SOCKET, which is: "typedef
|
|
49 * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
|
|
50 * integer -1. In knetfile.c, I use "int" for socket type
|
|
51 * throughout. This should be improved to avoid confusion.
|
|
52 *
|
|
53 * In Linux/Mac, recv() and read() do almost the same thing. You can see
|
|
54 * in the header file that netread() is simply an alias of read(). In
|
|
55 * Windows, however, they are different and using recv() is mandatory.
|
|
56 */
|
|
57
|
|
58 /* This function tests if the file handler is ready for reading (or
|
|
59 * writing if is_read==0). */
|
|
60 static int socket_wait(int fd, int is_read)
|
|
61 {
|
|
62 fd_set fds, *fdr = 0, *fdw = 0;
|
|
63 struct timeval tv;
|
|
64 int ret;
|
|
65 tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
|
|
66 FD_ZERO(&fds);
|
|
67 FD_SET(fd, &fds);
|
|
68 if (is_read) fdr = &fds;
|
|
69 else fdw = &fds;
|
|
70 ret = select(fd+1, fdr, fdw, 0, &tv);
|
|
71 #ifndef _WIN32
|
|
72 if (ret == -1) perror("select");
|
|
73 #else
|
|
74 if (ret == 0)
|
|
75 fprintf(stderr, "select time-out\n");
|
|
76 else if (ret == SOCKET_ERROR)
|
|
77 fprintf(stderr, "select: %d\n", WSAGetLastError());
|
|
78 #endif
|
|
79 return ret;
|
|
80 }
|
|
81
|
|
82 #ifndef _WIN32
|
|
83 /* This function does not work with Windows due to the lack of
|
|
84 * getaddrinfo() in winsock. It is addapted from an example in "Beej's
|
|
85 * Guide to Network Programming" (http://beej.us/guide/bgnet/). */
|
|
86 static int socket_connect(const char *host, const char *port)
|
|
87 {
|
|
88 #define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
|
|
89
|
|
90 int ai_err, on = 1, fd;
|
|
91 struct linger lng = { 0, 0 };
|
|
92 struct addrinfo hints, *res = 0;
|
|
93 memset(&hints, 0, sizeof(struct addrinfo));
|
|
94 hints.ai_family = AF_UNSPEC;
|
|
95 hints.ai_socktype = SOCK_STREAM;
|
|
96 /* In Unix/Mac, getaddrinfo() is the most convenient way to get
|
|
97 * server information. */
|
|
98 if ((ai_err = getaddrinfo(host, port, &hints, &res)) != 0) { fprintf(stderr, "can't resolve %s:%s: %s\n", host, port, gai_strerror(ai_err)); return -1; }
|
|
99 if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
|
|
100 /* The following two setsockopt() are used by ftplib
|
|
101 * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
|
|
102 * necessary. */
|
|
103 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
|
|
104 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
|
105 if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
|
|
106 freeaddrinfo(res);
|
|
107 return fd;
|
|
108 }
|
|
109 #else
|
|
110 /* MinGW's printf has problem with "%lld" */
|
|
111 char *int64tostr(char *buf, int64_t x)
|
|
112 {
|
|
113 int cnt;
|
|
114 int i = 0;
|
|
115 do {
|
|
116 buf[i++] = '0' + x % 10;
|
|
117 x /= 10;
|
|
118 } while (x);
|
|
119 buf[i] = 0;
|
|
120 for (cnt = i, i = 0; i < cnt/2; ++i) {
|
|
121 int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
|
|
122 }
|
|
123 return buf;
|
|
124 }
|
|
125
|
|
126 int64_t strtoint64(const char *buf)
|
|
127 {
|
|
128 int64_t x;
|
|
129 for (x = 0; *buf != '\0'; ++buf)
|
|
130 x = x * 10 + ((int64_t) *buf - 48);
|
|
131 return x;
|
|
132 }
|
|
133 /* In windows, the first thing is to establish the TCP connection. */
|
|
134 int knet_win32_init()
|
|
135 {
|
|
136 WSADATA wsaData;
|
|
137 return WSAStartup(MAKEWORD(2, 2), &wsaData);
|
|
138 }
|
|
139 void knet_win32_destroy()
|
|
140 {
|
|
141 WSACleanup();
|
|
142 }
|
|
143 /* A slightly modfied version of the following function also works on
|
|
144 * Mac (and presummably Linux). However, this function is not stable on
|
|
145 * my Mac. It sometimes works fine but sometimes does not. Therefore for
|
|
146 * non-Windows OS, I do not use this one. */
|
|
147 static SOCKET socket_connect(const char *host, const char *port)
|
|
148 {
|
|
149 #define __err_connect(func) \
|
|
150 do { \
|
|
151 fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
|
|
152 return -1; \
|
|
153 } while (0)
|
|
154
|
|
155 int on = 1;
|
|
156 SOCKET fd;
|
|
157 struct linger lng = { 0, 0 };
|
|
158 struct sockaddr_in server;
|
|
159 struct hostent *hp = 0;
|
|
160 // open socket
|
|
161 if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
|
|
162 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
|
|
163 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
|
164 // get host info
|
|
165 if (isalpha(host[0])) hp = gethostbyname(host);
|
|
166 else {
|
|
167 struct in_addr addr;
|
|
168 addr.s_addr = inet_addr(host);
|
|
169 hp = gethostbyaddr((char*)&addr, 4, AF_INET);
|
|
170 }
|
|
171 if (hp == 0) __err_connect("gethost");
|
|
172 // connect
|
|
173 server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
|
|
174 server.sin_family= AF_INET;
|
|
175 server.sin_port = htons(atoi(port));
|
|
176 if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
|
|
177 // freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
|
|
178 return fd;
|
|
179 }
|
|
180 #endif
|
|
181
|
|
182 static off_t my_netread(int fd, void *buf, off_t len)
|
|
183 {
|
|
184 off_t rest = len, curr, l = 0;
|
|
185 /* recv() and read() may not read the required length of data with
|
|
186 * one call. They have to be called repeatedly. */
|
|
187 while (rest) {
|
|
188 if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
|
|
189 curr = netread(fd, (void*)((char*)buf + l), rest);
|
|
190 /* According to the glibc manual, section 13.2, a zero returned
|
|
191 * value indicates end-of-file (EOF), which should mean that
|
|
192 * read() will not return zero if EOF has not been met but data
|
|
193 * are not immediately available. */
|
|
194 if (curr == 0) break;
|
|
195 l += curr; rest -= curr;
|
|
196 }
|
|
197 return l;
|
|
198 }
|
|
199
|
|
200 /*************************
|
|
201 * FTP specific routines *
|
|
202 *************************/
|
|
203
|
|
204 static int kftp_get_response(knetFile *ftp)
|
|
205 {
|
|
206 #ifndef _WIN32
|
|
207 unsigned char c;
|
|
208 #else
|
|
209 char c;
|
|
210 #endif
|
|
211 int n = 0;
|
|
212 char *p;
|
|
213 if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
|
|
214 while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
|
|
215 //fputc(c, stderr);
|
|
216 if (n >= ftp->max_response) {
|
|
217 ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
|
|
218 ftp->response = (char*)realloc(ftp->response, ftp->max_response);
|
|
219 }
|
|
220 ftp->response[n++] = c;
|
|
221 if (c == '\n') {
|
|
222 if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
|
|
223 && ftp->response[3] != '-') break;
|
|
224 n = 0;
|
|
225 continue;
|
|
226 }
|
|
227 }
|
|
228 if (n < 2) return -1;
|
|
229 ftp->response[n-2] = 0;
|
|
230 return strtol(ftp->response, &p, 0);
|
|
231 }
|
|
232
|
|
233 static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
|
|
234 {
|
|
235 if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
|
|
236 int len = strlen(cmd);
|
|
237 if ( netwrite(ftp->ctrl_fd, cmd, len) != len ) return -1;
|
|
238 return is_get? kftp_get_response(ftp) : 0;
|
|
239 }
|
|
240
|
|
241 static int kftp_pasv_prep(knetFile *ftp)
|
|
242 {
|
|
243 char *p;
|
|
244 int v[6];
|
|
245 kftp_send_cmd(ftp, "PASV\r\n", 1);
|
|
246 for (p = ftp->response; *p && *p != '('; ++p);
|
|
247 if (*p != '(') return -1;
|
|
248 ++p;
|
|
249 sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
|
|
250 memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
|
|
251 ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
|
|
252 return 0;
|
|
253 }
|
|
254
|
|
255
|
|
256 static int kftp_pasv_connect(knetFile *ftp)
|
|
257 {
|
|
258 char host[80], port[10];
|
|
259 if (ftp->pasv_port == 0) {
|
|
260 fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
|
|
261 return -1;
|
|
262 }
|
|
263 sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
|
|
264 sprintf(port, "%d", ftp->pasv_port);
|
|
265 ftp->fd = socket_connect(host, port);
|
|
266 if (ftp->fd == -1) return -1;
|
|
267 return 0;
|
|
268 }
|
|
269
|
|
270 int kftp_connect(knetFile *ftp)
|
|
271 {
|
|
272 ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
|
|
273 if (ftp->ctrl_fd == -1) return -1;
|
|
274 kftp_get_response(ftp);
|
|
275 kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
|
|
276 kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
|
|
277 kftp_send_cmd(ftp, "TYPE I\r\n", 1);
|
|
278 return 0;
|
|
279 }
|
|
280
|
|
281 int kftp_reconnect(knetFile *ftp)
|
|
282 {
|
|
283 if (ftp->ctrl_fd != -1) {
|
|
284 netclose(ftp->ctrl_fd);
|
|
285 ftp->ctrl_fd = -1;
|
|
286 }
|
|
287 netclose(ftp->fd);
|
|
288 ftp->fd = -1;
|
|
289 return kftp_connect(ftp);
|
|
290 }
|
|
291
|
|
292 // initialize ->type, ->host, ->retr and ->size
|
|
293 knetFile *kftp_parse_url(const char *fn, const char *mode)
|
|
294 {
|
|
295 knetFile *fp;
|
|
296 char *p;
|
|
297 int l;
|
|
298 if (strstr(fn, "ftp://") != fn) return 0;
|
|
299 for (p = (char*)fn + 6; *p && *p != '/'; ++p);
|
|
300 if (*p != '/') return 0;
|
|
301 l = p - fn - 6;
|
|
302 fp = (knetFile*)calloc(1, sizeof(knetFile));
|
|
303 fp->type = KNF_TYPE_FTP;
|
|
304 fp->fd = -1;
|
|
305 /* the Linux/Mac version of socket_connect() also recognizes a port
|
|
306 * like "ftp", but the Windows version does not. */
|
|
307 fp->port = strdup("21");
|
|
308 fp->host = (char*)calloc(l + 1, 1);
|
|
309 if (strchr(mode, 'c')) fp->no_reconnect = 1;
|
|
310 strncpy(fp->host, fn + 6, l);
|
|
311 fp->retr = (char*)calloc(strlen(p) + 8, 1);
|
|
312 sprintf(fp->retr, "RETR %s\r\n", p);
|
|
313 fp->size_cmd = (char*)calloc(strlen(p) + 8, 1);
|
|
314 sprintf(fp->size_cmd, "SIZE %s\r\n", p);
|
|
315 fp->seek_offset = 0;
|
|
316 return fp;
|
|
317 }
|
|
318 // place ->fd at offset off
|
|
319 int kftp_connect_file(knetFile *fp)
|
|
320 {
|
|
321 int ret;
|
|
322 long long file_size;
|
|
323 if (fp->fd != -1) {
|
|
324 netclose(fp->fd);
|
|
325 if (fp->no_reconnect) kftp_get_response(fp);
|
|
326 }
|
|
327 kftp_pasv_prep(fp);
|
|
328 kftp_send_cmd(fp, fp->size_cmd, 1);
|
|
329 #ifndef _WIN32
|
|
330 // If the file does not exist, the response will be "550 Could not get file
|
|
331 // size". Be silent on failure, hts_idx_load can be trying the existence of .csi or .tbi.
|
|
332 if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 ) return -1;
|
|
333 #else
|
|
334 const char *p = fp->response;
|
|
335 while (*p != ' ') ++p;
|
|
336 while (*p < '0' || *p > '9') ++p;
|
|
337 file_size = strtoint64(p);
|
|
338 #endif
|
|
339 fp->file_size = file_size;
|
|
340 if (fp->offset>=0) {
|
|
341 char tmp[32];
|
|
342 #ifndef _WIN32
|
|
343 sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
|
|
344 #else
|
|
345 strcpy(tmp, "REST ");
|
|
346 int64tostr(tmp + 5, fp->offset);
|
|
347 strcat(tmp, "\r\n");
|
|
348 #endif
|
|
349 kftp_send_cmd(fp, tmp, 1);
|
|
350 }
|
|
351 kftp_send_cmd(fp, fp->retr, 0);
|
|
352 kftp_pasv_connect(fp);
|
|
353 ret = kftp_get_response(fp);
|
|
354 if (ret != 150) {
|
|
355 fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
|
|
356 netclose(fp->fd);
|
|
357 fp->fd = -1;
|
|
358 return -1;
|
|
359 }
|
|
360 fp->is_ready = 1;
|
|
361 return 0;
|
|
362 }
|
|
363
|
|
364
|
|
365 /**************************
|
|
366 * HTTP specific routines *
|
|
367 **************************/
|
|
368
|
|
369 knetFile *khttp_parse_url(const char *fn, const char *mode)
|
|
370 {
|
|
371 knetFile *fp;
|
|
372 char *p, *proxy, *q;
|
|
373 int l;
|
|
374 if (strstr(fn, "http://") != fn) return 0;
|
|
375 // set ->http_host
|
|
376 for (p = (char*)fn + 7; *p && *p != '/'; ++p);
|
|
377 l = p - fn - 7;
|
|
378 fp = (knetFile*)calloc(1, sizeof(knetFile));
|
|
379 fp->http_host = (char*)calloc(l + 1, 1);
|
|
380 strncpy(fp->http_host, fn + 7, l);
|
|
381 fp->http_host[l] = 0;
|
|
382 for (q = fp->http_host; *q && *q != ':'; ++q);
|
|
383 if (*q == ':') *q++ = 0;
|
|
384 // get http_proxy
|
|
385 proxy = getenv("http_proxy");
|
|
386 // set ->host, ->port and ->path
|
|
387 if (proxy == 0) {
|
|
388 fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
|
|
389 fp->port = strdup(*q? q : "80");
|
|
390 fp->path = strdup(*p? p : "/");
|
|
391 } else {
|
|
392 fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
|
|
393 for (q = fp->host; *q && *q != ':'; ++q);
|
|
394 if (*q == ':') *q++ = 0;
|
|
395 fp->port = strdup(*q? q : "80");
|
|
396 fp->path = strdup(fn);
|
|
397 }
|
|
398 fp->type = KNF_TYPE_HTTP;
|
|
399 fp->ctrl_fd = fp->fd = -1;
|
|
400 fp->seek_offset = 0;
|
|
401 return fp;
|
|
402 }
|
|
403
|
|
404 int khttp_connect_file(knetFile *fp)
|
|
405 {
|
|
406 int ret, l = 0;
|
|
407 char *buf, *p;
|
|
408 if (fp->fd != -1) netclose(fp->fd);
|
|
409 fp->fd = socket_connect(fp->host, fp->port);
|
|
410 buf = (char*)calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
|
|
411 l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
|
|
412 l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
|
|
413 l += sprintf(buf + l, "\r\n");
|
|
414 if ( netwrite(fp->fd, buf, l) != l ) { free(buf); return -1; }
|
|
415 l = 0;
|
|
416 while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
|
|
417 if (buf[l] == '\n' && l >= 3)
|
|
418 if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
|
|
419 ++l;
|
|
420 }
|
|
421 buf[l] = 0;
|
|
422 if (l < 14) { // prematured header
|
|
423 free(buf);
|
|
424 netclose(fp->fd);
|
|
425 fp->fd = -1;
|
|
426 return -1;
|
|
427 }
|
|
428 ret = strtol(buf + 8, &p, 0); // HTTP return code
|
|
429 if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
|
|
430 off_t rest = fp->offset;
|
|
431 while (rest) {
|
|
432 off_t l = rest < 0x10000? rest : 0x10000;
|
|
433 rest -= my_netread(fp->fd, buf, l);
|
|
434 }
|
|
435 } else if (ret != 206 && ret != 200) {
|
|
436 // failed to open file
|
|
437 free(buf);
|
|
438 netclose(fp->fd);
|
|
439 switch (ret) {
|
|
440 case 401: errno = EPERM; break;
|
|
441 case 403: errno = EACCES; break;
|
|
442 case 404: errno = ENOENT; break;
|
|
443 case 407: errno = EPERM; break;
|
|
444 case 408: errno = ETIMEDOUT; break;
|
|
445 case 410: errno = ENOENT; break;
|
|
446 case 503: errno = EAGAIN; break;
|
|
447 case 504: errno = ETIMEDOUT; break;
|
|
448 default: errno = (ret >= 400 && ret < 500)? EINVAL : EIO; break;
|
|
449 }
|
|
450 fp->fd = -1;
|
|
451 return -1;
|
|
452 }
|
|
453 free(buf);
|
|
454 fp->is_ready = 1;
|
|
455 return 0;
|
|
456 }
|
|
457
|
|
458 /********************
|
|
459 * Generic routines *
|
|
460 ********************/
|
|
461
|
|
462 knetFile *knet_open(const char *fn, const char *mode)
|
|
463 {
|
|
464 knetFile *fp = 0;
|
|
465 if (mode[0] != 'r') {
|
|
466 fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
|
|
467 return 0;
|
|
468 }
|
|
469 if (strstr(fn, "ftp://") == fn) {
|
|
470 fp = kftp_parse_url(fn, mode);
|
|
471 if (fp == 0) return 0;
|
|
472 if (kftp_connect(fp) == -1) {
|
|
473 knet_close(fp);
|
|
474 return 0;
|
|
475 }
|
|
476 kftp_connect_file(fp);
|
|
477 } else if (strstr(fn, "http://") == fn) {
|
|
478 fp = khttp_parse_url(fn, mode);
|
|
479 if (fp == 0) return 0;
|
|
480 khttp_connect_file(fp);
|
|
481 } else { // local file
|
|
482 #ifdef _WIN32
|
|
483 /* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
|
|
484 * be undefined on some systems, although it is defined on my
|
|
485 * Mac and the Linux I have tested on. */
|
|
486 int fd = open(fn, O_RDONLY | O_BINARY);
|
|
487 #else
|
|
488 int fd = open(fn, O_RDONLY);
|
|
489 #endif
|
|
490 if (fd == -1) {
|
|
491 perror("open");
|
|
492 return 0;
|
|
493 }
|
|
494 fp = (knetFile*)calloc(1, sizeof(knetFile));
|
|
495 fp->type = KNF_TYPE_LOCAL;
|
|
496 fp->fd = fd;
|
|
497 fp->ctrl_fd = -1;
|
|
498 }
|
|
499 if (fp && fp->fd == -1) {
|
|
500 knet_close(fp);
|
|
501 return 0;
|
|
502 }
|
|
503 return fp;
|
|
504 }
|
|
505
|
|
506 knetFile *knet_dopen(int fd, const char *mode)
|
|
507 {
|
|
508 knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
|
|
509 fp->type = KNF_TYPE_LOCAL;
|
|
510 fp->fd = fd;
|
|
511 return fp;
|
|
512 }
|
|
513
|
|
514 ssize_t knet_read(knetFile *fp, void *buf, size_t len)
|
|
515 {
|
|
516 off_t l = 0;
|
|
517 if (fp->fd == -1) return 0;
|
|
518 if (fp->type == KNF_TYPE_FTP) {
|
|
519 if (fp->is_ready == 0) {
|
|
520 if (!fp->no_reconnect) kftp_reconnect(fp);
|
|
521 kftp_connect_file(fp);
|
|
522 }
|
|
523 } else if (fp->type == KNF_TYPE_HTTP) {
|
|
524 if (fp->is_ready == 0)
|
|
525 khttp_connect_file(fp);
|
|
526 }
|
|
527 if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
|
|
528 size_t rest = len;
|
|
529 ssize_t curr;
|
|
530 while (rest) {
|
|
531 do {
|
|
532 curr = read(fp->fd, (void*)((char*)buf + l), rest);
|
|
533 } while (curr < 0 && EINTR == errno);
|
|
534 if (curr < 0) return -1;
|
|
535 if (curr == 0) break;
|
|
536 l += curr; rest -= curr;
|
|
537 }
|
|
538 } else l = my_netread(fp->fd, buf, len);
|
|
539 fp->offset += l;
|
|
540 return l;
|
|
541 }
|
|
542
|
|
543 off_t knet_seek(knetFile *fp, off_t off, int whence)
|
|
544 {
|
|
545 if (whence == SEEK_SET && off == fp->offset) return 0;
|
|
546 if (fp->type == KNF_TYPE_LOCAL) {
|
|
547 /* Be aware that lseek() returns the offset after seeking, while fseek() returns zero on success. */
|
|
548 off_t offset = lseek(fp->fd, off, whence);
|
|
549 if (offset == -1) return -1;
|
|
550 fp->offset = offset;
|
|
551 return fp->offset;
|
|
552 } else if (fp->type == KNF_TYPE_FTP) {
|
|
553 if (whence == SEEK_CUR) fp->offset += off;
|
|
554 else if (whence == SEEK_SET) fp->offset = off;
|
|
555 else if (whence == SEEK_END) fp->offset = fp->file_size + off;
|
|
556 else return -1;
|
|
557 fp->is_ready = 0;
|
|
558 return fp->offset;
|
|
559 } else if (fp->type == KNF_TYPE_HTTP) {
|
|
560 if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
|
|
561 fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
|
|
562 errno = ESPIPE;
|
|
563 return -1;
|
|
564 }
|
|
565 if (whence == SEEK_CUR) fp->offset += off;
|
|
566 else if (whence == SEEK_SET) fp->offset = off;
|
|
567 else return -1;
|
|
568 fp->is_ready = 0;
|
|
569 return fp->offset;
|
|
570 }
|
|
571 errno = EINVAL;
|
|
572 fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
|
|
573 return -1;
|
|
574 }
|
|
575
|
|
576 int knet_close(knetFile *fp)
|
|
577 {
|
|
578 if (fp == 0) return 0;
|
|
579 if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
|
|
580 if (fp->fd != -1) {
|
|
581 /* On Linux/Mac, netclose() is an alias of close(), but on
|
|
582 * Windows, it is an alias of closesocket(). */
|
|
583 if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
|
|
584 else netclose(fp->fd);
|
|
585 }
|
|
586 free(fp->host); free(fp->port);
|
|
587 free(fp->response); free(fp->retr); // FTP specific
|
|
588 free(fp->path); free(fp->http_host); // HTTP specific
|
|
589 free(fp);
|
|
590 return 0;
|
|
591 }
|
|
592
|
|
593 #ifdef KNETFILE_MAIN
|
|
594 int main(void)
|
|
595 {
|
|
596 char *buf;
|
|
597 knetFile *fp;
|
|
598 int type = 4, l;
|
|
599 #ifdef _WIN32
|
|
600 knet_win32_init();
|
|
601 #endif
|
|
602 buf = calloc(0x100000, 1);
|
|
603 if (type == 0) {
|
|
604 fp = knet_open("knetfile.c", "r");
|
|
605 knet_seek(fp, 1000, SEEK_SET);
|
|
606 } else if (type == 1) { // NCBI FTP, large file
|
|
607 fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
|
|
608 knet_seek(fp, 2500000000ll, SEEK_SET);
|
|
609 l = knet_read(fp, buf, 255);
|
|
610 } else if (type == 2) {
|
|
611 fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
|
|
612 knet_seek(fp, 1000, SEEK_SET);
|
|
613 } else if (type == 3) {
|
|
614 fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
|
|
615 knet_seek(fp, 1000, SEEK_SET);
|
|
616 } else if (type == 4) {
|
|
617 fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
|
|
618 knet_read(fp, buf, 10000);
|
|
619 knet_seek(fp, 20000, SEEK_SET);
|
|
620 knet_seek(fp, 10000, SEEK_SET);
|
|
621 l = knet_read(fp, buf+10000, 10000000) + 10000;
|
|
622 }
|
|
623 if (type != 4 && type != 1) {
|
|
624 knet_read(fp, buf, 255);
|
|
625 buf[255] = 0;
|
|
626 printf("%s\n", buf);
|
|
627 } else write(fileno(stdout), buf, l);
|
|
628 knet_close(fp);
|
|
629 free(buf);
|
|
630 return 0;
|
|
631 }
|
|
632 #endif
|