Mercurial > repos > vipints > rdiff
comparison rDiff/src/octave/importdata.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 ## Copyright (C) 2012 Erik Kjellson | |
2 ## | |
3 ## This file is part of Octave. | |
4 ## | |
5 ## Octave is free software; you can redistribute it and/or modify it | |
6 ## under the terms of the GNU General Public License as published by | |
7 ## the Free Software Foundation; either version 3 of the License, or (at | |
8 ## your option) any later version. | |
9 ## | |
10 ## Octave is distributed in the hope that it will be useful, but | |
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 ## General Public License for more details. | |
14 ## | |
15 ## You should have received a copy of the GNU General Public License | |
16 ## along with Octave; see the file COPYING. If not, see | |
17 ## <http://www.gnu.org/licenses/>. | |
18 | |
19 ## -*- texinfo -*- | |
20 ## @deftypefn {Function File} {@var{A} =} importdata (@var{fname}) | |
21 ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}) | |
22 ## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}, @var{header_rows}) | |
23 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) | |
24 ## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...) | |
25 ## Importing data from file. | |
26 ## | |
27 ## Importing the contents of file @var{fname} into workspace. | |
28 ## | |
29 ## Input parameters: | |
30 ## @itemize | |
31 ## @item @var{fname} | |
32 ## The file name for the file to import. | |
33 ## | |
34 ## @item @var{delimiter} | |
35 ## The character separating columns of data. Use @code{\t} for tab. | |
36 ## (Only valid for ascii files) | |
37 ## | |
38 ## @item @var{header_rows} | |
39 ## Number of header rows before the data begins. (Only valid for ascii files) | |
40 ## @end itemize | |
41 ## | |
42 ## Different file types are supported: | |
43 ## @itemize | |
44 ## @item Ascii table | |
45 ## | |
46 ## Importing ascii table using the specified number of header rows and | |
47 ## the specified delimiter. | |
48 ## | |
49 ## @item Image file | |
50 ## | |
51 ## @item @sc{Matlab} file | |
52 ## | |
53 ## @item Spreadsheet files (depending on external software) | |
54 ## | |
55 ## @item Wav file | |
56 ## | |
57 ## @end itemize | |
58 ## | |
59 ## @seealso{textscan, dlmread, csvread, load} | |
60 ## @end deftypefn | |
61 | |
62 ## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net> | |
63 | |
64 function [output, delimiter, header_rows] = importdata (varargin) | |
65 | |
66 | |
67 ## Default values | |
68 fname = ""; | |
69 delimiter = ""; | |
70 header_rows = -1; | |
71 | |
72 ########## | |
73 | |
74 ## Check input arguments | |
75 | |
76 if (nargin < 1) | |
77 print_usage (); | |
78 endif | |
79 | |
80 fname = varargin{1}; | |
81 ## Check that the file name really is a string | |
82 if (! ischar (fname)) | |
83 error ("importdata: file name needs to be a string"); | |
84 endif | |
85 if ( strcmpi (fname, "-pastespecial")) | |
86 error ("importdata: option -pastespecial not implemented"); | |
87 endif | |
88 | |
89 if (nargin > 1) | |
90 delimiter = varargin{2}; | |
91 ## Check that the delimiter really is a string | |
92 if (!ischar (delimiter)) | |
93 error("importdata: delimiter needs to be a character"); | |
94 endif | |
95 if (length (delimiter) > 1 && !strcmpi (delimiter, "\\t")) | |
96 error("importdata: delimiter cannot be longer than 1 character"); | |
97 endif | |
98 if (strcmpi (delimiter, "\\")) | |
99 delimiter = "\\\\"; | |
100 endif | |
101 endif | |
102 | |
103 if (nargin > 2) | |
104 header_rows = varargin{3}; | |
105 if (!isnumeric (header_rows) || header_rows < 0) | |
106 error ("importdata: number of header rows needs to be an integer number >= 0"); | |
107 endif | |
108 endif | |
109 | |
110 if (nargin > 3) | |
111 error ("importdata: too many input arguments"); | |
112 endif | |
113 | |
114 ########## | |
115 | |
116 ## Check file format | |
117 ## Get the extension from the file name. | |
118 [d n fileExt v] = fileparts (fname); | |
119 ## Make sure file extension is in lower case. | |
120 fileExt = lower (fileExt); | |
121 | |
122 switch fileExt | |
123 case {".au", ".snd"} | |
124 error (sprintf ("importdata: not implemented for file format %s", | |
125 fileExt)); | |
126 case ".avi" | |
127 error (sprintf ("importdata: not implemented for file format %s", | |
128 fileExt)); | |
129 case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \ | |
130 ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \ | |
131 ".tif", ".tiff", ".xwd"} | |
132 delimiter = NaN; | |
133 header_rows = 0; | |
134 [output.cdata, output.colormap, output.alpha] = imread (fname); | |
135 case ".mat" | |
136 delimiter = NaN; | |
137 header_rows = 0; | |
138 output = load (fname); | |
139 case {".wk1", ".xls", ".xlsx", ".dbf", ".pxl"} | |
140 ## If there's no Excel file support simply fall back to unimplemented.m | |
141 output = xlsread (fname); | |
142 case {".ods", ".sxc", ".fods", ".uos", ".xml"} | |
143 ## unimplemented.m only knows ML functions; odsread isn't one but is in OF | |
144 try | |
145 output = odsread (fname); | |
146 catch | |
147 ## Fall back to unimplemented.m. | |
148 output = xlsread (fname); | |
149 end_try_catch | |
150 case {".wav", ".wave"} | |
151 delimiter = NaN; | |
152 header_rows = 0; | |
153 [output.data, output.fs] = wavread (fname); | |
154 otherwise | |
155 ## Assume the file is in ascii format. | |
156 [output, delimiter, header_rows] = \ | |
157 importdata_ascii (fname, delimiter, header_rows); | |
158 endswitch | |
159 | |
160 ## If there are any empty fields in the output structure, then remove them | |
161 if (isstruct (output) && length (output) == 1) | |
162 fields = fieldnames (output); | |
163 for i=1:length (fields) | |
164 if (isempty (getfield (output, fields{i}))) | |
165 output = rmfield (output, fields{i}); | |
166 endif | |
167 endfor | |
168 | |
169 ## If only one field is left, replace the structure with the field, | |
170 ## i.e. output = output.onlyFieldLeft | |
171 | |
172 ## Update the list of fields | |
173 fields = fieldnames (output); | |
174 if (length (fields) == 1) | |
175 output = getfield (output, fields{1}); | |
176 endif | |
177 endif | |
178 endfunction | |
179 | |
180 | |
181 ######################################## | |
182 | |
183 function [output, delimiter, header_rows] = \ | |
184 importdata_ascii (fname, delimiter, header_rows) | |
185 | |
186 ## Define the fields in the output structure so that the order will be | |
187 ## correct. | |
188 | |
189 output.data = []; | |
190 output.textdata = []; | |
191 output.rowheaders = []; | |
192 output.colheaders = []; | |
193 | |
194 ## Read file into string and count the number of header rows | |
195 #file_content = fileread (fname); | |
196 file_content_rows={}; | |
197 fid=fopen(fname); | |
198 currline=0; | |
199 firstline=1; | |
200 while 1==1 | |
201 if not(isempty(currline)) | |
202 if currline==-1 | |
203 break | |
204 end | |
205 end | |
206 currline=fgetl(fid); | |
207 file_content_rows{end+1}=currline; | |
208 end | |
209 fclose(fid); | |
210 file_content_rows={file_content_rows{1:(end-1)}}; | |
211 | |
212 ## Split the file into rows (using \r\n or \n as delimiters between rows). | |
213 #file_content_rows = regexp (file_content, "\r?\n", "split"); | |
214 | |
215 ## FIXME: guess delimiter, if it isn't defined | |
216 if (isempty (delimiter)) | |
217 error ("importdata: Guessing delimiter is not implemented yet, you have to specify it."); | |
218 endif | |
219 | |
220 ## FIXME: A more intelligent way to count number of header rows. This | |
221 ## is needed e.g. when delimiter=' ' and the header contains spaces... | |
222 | |
223 ## If number of header rows is undefined, then count the number of | |
224 ## header rows by step through row by row and look for the delimiter. | |
225 ## Assume that the header can't contain any delimiter. | |
226 if (header_rows < 0) | |
227 header_rows = 0; | |
228 for i=1:length (file_content_rows) | |
229 if (isempty (regexp(file_content_rows{i}, delimiter, "once"))) | |
230 header_rows++; | |
231 else | |
232 ## Data part has begun and therefore no more header rows can be | |
233 ## found | |
234 break; | |
235 endif | |
236 endfor | |
237 endif | |
238 | |
239 ## Put the header rows in output.textdata. | |
240 if (header_rows > 0) | |
241 output.textdata = file_content_rows (1:header_rows)'; | |
242 endif | |
243 | |
244 ## If space is the delimiter, then remove spaces in the beginning of | |
245 ## each data row. | |
246 if (strcmpi (delimiter, " ")) | |
247 for i=(header_rows+1):length (file_content_rows) | |
248 ## strtrim does not only remove the leading spaces but also the | |
249 ## tailing spaces, but that doesn't really matter. | |
250 file_content_rows{i} = strtrim (file_content_rows{i}); | |
251 endfor | |
252 endif | |
253 | |
254 ## Remove empty data rows. Go through them backwards so that you wont | |
255 ## get out of bounds. | |
256 for i=length (file_content_rows):-1:(header_rows + 1) | |
257 if (length (file_content_rows{i}) < 1) | |
258 file_content_rows = [file_content_rows(1:i-1), \ | |
259 file_content_rows(i+1:length(file_content_rows))]; | |
260 endif | |
261 endfor | |
262 | |
263 ## Count the number of data columns. If there are different number of | |
264 ## columns, use the greatest value. | |
265 data_columns = 0; | |
266 delimiter_pattern = delimiter; | |
267 ## If space is the delimiter, then multiple spaces should count as ONE | |
268 ## delimiter. Also ignore leading spaces. | |
269 if (strcmpi (delimiter, " ")) | |
270 delimiter_pattern = ' +'; | |
271 endif | |
272 for i=(header_rows+1):length(file_content_rows) | |
273 data_columns = max (data_columns, | |
274 length (regexp (file_content_rows{i}, | |
275 delimiter_pattern, "split"))); | |
276 endfor | |
277 | |
278 ## Go through the data and put it in either output.data or | |
279 ## output.textdata depending on if it is numeric or not. | |
280 output.data = NaN (length (file_content_rows) - header_rows, data_columns); | |
281 | |
282 cut_rows=zeros(1,data_columns); | |
283 for i=(header_rows+1):length(file_content_rows) | |
284 ## Only use the row if it contains anything other than white-space | |
285 ## characters. | |
286 if (any (file_content_rows{i} != " ")) | |
287 row_data = regexp (file_content_rows{i}, delimiter_pattern, "split"); | |
288 | |
289 for j=1:length(row_data) | |
290 ## Try to convert the column to a number, if it works put it in | |
291 ## output.data, otherwise in output.textdata | |
292 if (!isempty (row_data{j})) | |
293 data_numeric = str2double (row_data{j}); | |
294 if and(!isempty (data_numeric),not(isnan(data_numeric))) | |
295 output.data(i-header_rows, j) = data_numeric; | |
296 if not(isnan(data_numeric)) | |
297 cut_rows(j)=1; | |
298 end | |
299 else | |
300 output.textdata{i,j} = row_data{j}; | |
301 endif | |
302 endif | |
303 endfor | |
304 | |
305 endif | |
306 endfor | |
307 output.data=output.data(:,cut_rows>0); | |
308 | |
309 ## Check wether rowheaders or colheaders should be used | |
310 if ((header_rows == data_columns) && (size (output.textdata, 2) == 1)) | |
311 output.rowheaders = output.textdata; | |
312 elseif (size (output.textdata, 2) == data_columns) | |
313 output.colheaders = output.textdata(end,:); | |
314 endif | |
315 | |
316 ## When delimiter = "\\t" convert it to a tab, done for Matlab compatibility. | |
317 if (strcmp (delimiter, '\t')) | |
318 delimiter = "\t"; | |
319 endif | |
320 | |
321 endfunction | |
322 | |
323 | |
324 function [RET]=regexp(ARG1,ARG2,ARG3) | |
325 %ARG3 is always 'split' in this context | |
326 if (strcmp (ARG2, '\t')) | |
327 ARG2 = "\t"; | |
328 endif | |
329 RET = strsplit(ARG1,"\t",fixed=true); | |
330 endfunction | |
331 | |
332 ######################################## | |
333 | |
334 %!test | |
335 %! # Comma separated values | |
336 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
337 %! fn = tmpnam (); | |
338 %! fid = fopen (fn, "w"); | |
339 %! fputs (fid, "3.1,-7.2,0\n0.012,6.5,128"); | |
340 %! fclose (fid); | |
341 %! [a,d,h] = importdata (fn, ","); | |
342 %! unlink (fn); | |
343 %! assert (a, A); | |
344 %! assert (d, ","); | |
345 %! assert (h, 0); | |
346 | |
347 %!test | |
348 %! # Tab separated values | |
349 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
350 %! fn = tmpnam (); | |
351 %! fid = fopen (fn, "w"); | |
352 %! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); | |
353 %! fclose (fid); | |
354 %! [a,d,h] = importdata (fn, "\\t"); | |
355 %! unlink (fn); | |
356 %! assert (a, A); | |
357 %! assert (d, "\t"); | |
358 %! assert (h, 0); | |
359 | |
360 %!test | |
361 %! # Space separated values, using multiple spaces to align in columns. | |
362 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
363 %! fn = tmpnam (); | |
364 %! fid = fopen (fn, "w"); | |
365 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); | |
366 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); | |
367 %! fclose (fid); | |
368 %! [a,d,h] = importdata (fn, " "); | |
369 %! unlink (fn); | |
370 %! assert (a, A); | |
371 %! assert (d, " "); | |
372 %! assert (h, 0); | |
373 | |
374 %!test | |
375 %! # Header | |
376 %! A.data = [3.1 -7.2 0; 0.012 6.5 128]; | |
377 %! A.textdata = {"This is a header row."; \ | |
378 %! "this row does not contain any data, but the next one does."}; | |
379 %! fn = tmpnam (); | |
380 %! fid = fopen (fn, "w"); | |
381 %! fputs (fid, [A.textdata{1} "\n"]); | |
382 %! fputs (fid, [A.textdata{2} "\n"]); | |
383 %! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); | |
384 %! fclose (fid); | |
385 %! [a,d,h] = importdata (fn, "\\t"); | |
386 %! unlink (fn); | |
387 %! assert (a, A); | |
388 %! assert (d, "\t"); | |
389 %! assert (h, 2); | |
390 | |
391 %!test | |
392 %! # Ignore empty rows containing only spaces | |
393 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
394 %! fn = tmpnam (); | |
395 %! fid = fopen (fn, "w"); | |
396 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); | |
397 %! fputs (fid, " "); | |
398 %! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); | |
399 %! fclose (fid); | |
400 %! [a,d,h] = importdata (fn, " "); | |
401 %! unlink (fn); | |
402 %! assert (a, A); | |
403 %! assert (d, " "); | |
404 %! assert (h, 0); | |
405 | |
406 %!test | |
407 %! # Exponentials | |
408 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
409 %! fn = tmpnam (); | |
410 %! fid = fopen (fn, "w"); | |
411 %! fputs (fid, "+3.1e0\t-72E-1\t0\n12e-3\t6.5\t128"); | |
412 %! fclose (fid); | |
413 %! [a,d,h] = importdata (fn, "\\t"); | |
414 %! unlink (fn); | |
415 %! assert (a, A); | |
416 %! assert (d, "\t"); | |
417 %! assert (h, 0); | |
418 | |
419 %!test | |
420 %! # Missing values | |
421 %! A = [3.1 NaN 0; 0.012 6.5 128]; | |
422 %! fn = tmpnam (); | |
423 %! fid = fopen (fn, "w"); | |
424 %! fputs (fid, "3.1\t\t0\n0.012\t6.5\t128"); | |
425 %! fclose (fid); | |
426 %! [a,d,h] = importdata (fn, "\\t"); | |
427 %! unlink (fn); | |
428 %! assert (a, A); | |
429 %! assert (d, "\t"); | |
430 %! assert (h, 0); | |
431 | |
432 %!test | |
433 %! # CRLF for line breaks | |
434 %! A = [3.1 -7.2 0; 0.012 6.5 128]; | |
435 %! fn = tmpnam (); | |
436 %! fid = fopen (fn, "w"); | |
437 %! fputs (fid, "3.1\t-7.2\t0\r\n0.012\t6.5\t128"); | |
438 %! fclose (fid); | |
439 %! [a,d,h] = importdata (fn, "\\t"); | |
440 %! unlink (fn); | |
441 %! assert (a, A); | |
442 %! assert (d, "\t"); | |
443 %! assert (h, 0); | |
444 |