Mercurial > repos > vipints > rdiff
diff rDiff/src/octave/importdata.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rDiff/src/octave/importdata.m Thu Feb 14 23:38:36 2013 -0500 @@ -0,0 +1,444 @@ +## Copyright (C) 2012 Erik Kjellson +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or (at +## your option) any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, see +## <http://www.gnu.org/licenses/>. + +## -*- texinfo -*- +## @deftypefn {Function File} {@var{A} =} importdata (@var{fname}) +## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}) +## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter}, @var{header_rows}) +## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...) +## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...) +## Importing data from file. +## +## Importing the contents of file @var{fname} into workspace. +## +## Input parameters: +## @itemize +## @item @var{fname} +## The file name for the file to import. +## +## @item @var{delimiter} +## The character separating columns of data. Use @code{\t} for tab. +## (Only valid for ascii files) +## +## @item @var{header_rows} +## Number of header rows before the data begins. (Only valid for ascii files) +## @end itemize +## +## Different file types are supported: +## @itemize +## @item Ascii table +## +## Importing ascii table using the specified number of header rows and +## the specified delimiter. +## +## @item Image file +## +## @item @sc{Matlab} file +## +## @item Spreadsheet files (depending on external software) +## +## @item Wav file +## +## @end itemize +## +## @seealso{textscan, dlmread, csvread, load} +## @end deftypefn + +## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net> + +function [output, delimiter, header_rows] = importdata (varargin) + + + ## Default values + fname = ""; + delimiter = ""; + header_rows = -1; + + ########## + + ## Check input arguments + + if (nargin < 1) + print_usage (); + endif + + fname = varargin{1}; + ## Check that the file name really is a string + if (! ischar (fname)) + error ("importdata: file name needs to be a string"); + endif + if ( strcmpi (fname, "-pastespecial")) + error ("importdata: option -pastespecial not implemented"); + endif + + if (nargin > 1) + delimiter = varargin{2}; + ## Check that the delimiter really is a string + if (!ischar (delimiter)) + error("importdata: delimiter needs to be a character"); + endif + if (length (delimiter) > 1 && !strcmpi (delimiter, "\\t")) + error("importdata: delimiter cannot be longer than 1 character"); + endif + if (strcmpi (delimiter, "\\")) + delimiter = "\\\\"; + endif + endif + + if (nargin > 2) + header_rows = varargin{3}; + if (!isnumeric (header_rows) || header_rows < 0) + error ("importdata: number of header rows needs to be an integer number >= 0"); + endif + endif + + if (nargin > 3) + error ("importdata: too many input arguments"); + endif + + ########## + + ## Check file format + ## Get the extension from the file name. + [d n fileExt v] = fileparts (fname); + ## Make sure file extension is in lower case. + fileExt = lower (fileExt); + + switch fileExt + case {".au", ".snd"} + error (sprintf ("importdata: not implemented for file format %s", + fileExt)); + case ".avi" + error (sprintf ("importdata: not implemented for file format %s", + fileExt)); + case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \ + ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \ + ".tif", ".tiff", ".xwd"} + delimiter = NaN; + header_rows = 0; + [output.cdata, output.colormap, output.alpha] = imread (fname); + case ".mat" + delimiter = NaN; + header_rows = 0; + output = load (fname); + case {".wk1", ".xls", ".xlsx", ".dbf", ".pxl"} + ## If there's no Excel file support simply fall back to unimplemented.m + output = xlsread (fname); + case {".ods", ".sxc", ".fods", ".uos", ".xml"} + ## unimplemented.m only knows ML functions; odsread isn't one but is in OF + try + output = odsread (fname); + catch + ## Fall back to unimplemented.m. + output = xlsread (fname); + end_try_catch + case {".wav", ".wave"} + delimiter = NaN; + header_rows = 0; + [output.data, output.fs] = wavread (fname); + otherwise + ## Assume the file is in ascii format. + [output, delimiter, header_rows] = \ + importdata_ascii (fname, delimiter, header_rows); + endswitch + + ## If there are any empty fields in the output structure, then remove them + if (isstruct (output) && length (output) == 1) + fields = fieldnames (output); + for i=1:length (fields) + if (isempty (getfield (output, fields{i}))) + output = rmfield (output, fields{i}); + endif + endfor + + ## If only one field is left, replace the structure with the field, + ## i.e. output = output.onlyFieldLeft + + ## Update the list of fields + fields = fieldnames (output); + if (length (fields) == 1) + output = getfield (output, fields{1}); + endif + endif +endfunction + + +######################################## + +function [output, delimiter, header_rows] = \ + importdata_ascii (fname, delimiter, header_rows) + + ## Define the fields in the output structure so that the order will be + ## correct. + + output.data = []; + output.textdata = []; + output.rowheaders = []; + output.colheaders = []; + + ## Read file into string and count the number of header rows + #file_content = fileread (fname); + file_content_rows={}; + fid=fopen(fname); + currline=0; + firstline=1; + while 1==1 + if not(isempty(currline)) + if currline==-1 + break + end + end + currline=fgetl(fid); + file_content_rows{end+1}=currline; + end + fclose(fid); + file_content_rows={file_content_rows{1:(end-1)}}; + + ## Split the file into rows (using \r\n or \n as delimiters between rows). + #file_content_rows = regexp (file_content, "\r?\n", "split"); + + ## FIXME: guess delimiter, if it isn't defined + if (isempty (delimiter)) + error ("importdata: Guessing delimiter is not implemented yet, you have to specify it."); + endif + + ## FIXME: A more intelligent way to count number of header rows. This + ## is needed e.g. when delimiter=' ' and the header contains spaces... + + ## If number of header rows is undefined, then count the number of + ## header rows by step through row by row and look for the delimiter. + ## Assume that the header can't contain any delimiter. + if (header_rows < 0) + header_rows = 0; + for i=1:length (file_content_rows) + if (isempty (regexp(file_content_rows{i}, delimiter, "once"))) + header_rows++; + else + ## Data part has begun and therefore no more header rows can be + ## found + break; + endif + endfor + endif + + ## Put the header rows in output.textdata. + if (header_rows > 0) + output.textdata = file_content_rows (1:header_rows)'; + endif + + ## If space is the delimiter, then remove spaces in the beginning of + ## each data row. + if (strcmpi (delimiter, " ")) + for i=(header_rows+1):length (file_content_rows) + ## strtrim does not only remove the leading spaces but also the + ## tailing spaces, but that doesn't really matter. + file_content_rows{i} = strtrim (file_content_rows{i}); + endfor + endif + + ## Remove empty data rows. Go through them backwards so that you wont + ## get out of bounds. + for i=length (file_content_rows):-1:(header_rows + 1) + if (length (file_content_rows{i}) < 1) + file_content_rows = [file_content_rows(1:i-1), \ + file_content_rows(i+1:length(file_content_rows))]; + endif + endfor + + ## Count the number of data columns. If there are different number of + ## columns, use the greatest value. + data_columns = 0; + delimiter_pattern = delimiter; + ## If space is the delimiter, then multiple spaces should count as ONE + ## delimiter. Also ignore leading spaces. + if (strcmpi (delimiter, " ")) + delimiter_pattern = ' +'; + endif + for i=(header_rows+1):length(file_content_rows) + data_columns = max (data_columns, + length (regexp (file_content_rows{i}, + delimiter_pattern, "split"))); + endfor + + ## Go through the data and put it in either output.data or + ## output.textdata depending on if it is numeric or not. + output.data = NaN (length (file_content_rows) - header_rows, data_columns); + + cut_rows=zeros(1,data_columns); + for i=(header_rows+1):length(file_content_rows) + ## Only use the row if it contains anything other than white-space + ## characters. + if (any (file_content_rows{i} != " ")) + row_data = regexp (file_content_rows{i}, delimiter_pattern, "split"); + + for j=1:length(row_data) + ## Try to convert the column to a number, if it works put it in + ## output.data, otherwise in output.textdata + if (!isempty (row_data{j})) + data_numeric = str2double (row_data{j}); + if and(!isempty (data_numeric),not(isnan(data_numeric))) + output.data(i-header_rows, j) = data_numeric; + if not(isnan(data_numeric)) + cut_rows(j)=1; + end + else + output.textdata{i,j} = row_data{j}; + endif + endif + endfor + + endif + endfor + output.data=output.data(:,cut_rows>0); + + ## Check wether rowheaders or colheaders should be used + if ((header_rows == data_columns) && (size (output.textdata, 2) == 1)) + output.rowheaders = output.textdata; + elseif (size (output.textdata, 2) == data_columns) + output.colheaders = output.textdata(end,:); + endif + + ## When delimiter = "\\t" convert it to a tab, done for Matlab compatibility. + if (strcmp (delimiter, '\t')) + delimiter = "\t"; + endif + +endfunction + + +function [RET]=regexp(ARG1,ARG2,ARG3) +%ARG3 is always 'split' in this context + if (strcmp (ARG2, '\t')) + ARG2 = "\t"; + endif + RET = strsplit(ARG1,"\t",fixed=true); +endfunction + +######################################## + +%!test +%! # Comma separated values +%! A = [3.1 -7.2 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "3.1,-7.2,0\n0.012,6.5,128"); +%! fclose (fid); +%! [a,d,h] = importdata (fn, ","); +%! unlink (fn); +%! assert (a, A); +%! assert (d, ","); +%! assert (h, 0); + +%!test +%! # Tab separated values +%! A = [3.1 -7.2 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); +%! fclose (fid); +%! [a,d,h] = importdata (fn, "\\t"); +%! unlink (fn); +%! assert (a, A); +%! assert (d, "\t"); +%! assert (h, 0); + +%!test +%! # Space separated values, using multiple spaces to align in columns. +%! A = [3.1 -7.2 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); +%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); +%! fclose (fid); +%! [a,d,h] = importdata (fn, " "); +%! unlink (fn); +%! assert (a, A); +%! assert (d, " "); +%! assert (h, 0); + +%!test +%! # Header +%! A.data = [3.1 -7.2 0; 0.012 6.5 128]; +%! A.textdata = {"This is a header row."; \ +%! "this row does not contain any data, but the next one does."}; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fputs (fid, [A.textdata{1} "\n"]); +%! fputs (fid, [A.textdata{2} "\n"]); +%! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128"); +%! fclose (fid); +%! [a,d,h] = importdata (fn, "\\t"); +%! unlink (fn); +%! assert (a, A); +%! assert (d, "\t"); +%! assert (h, 2); + +%!test +%! # Ignore empty rows containing only spaces +%! A = [3.1 -7.2 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:)); +%! fputs (fid, " "); +%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:)); +%! fclose (fid); +%! [a,d,h] = importdata (fn, " "); +%! unlink (fn); +%! assert (a, A); +%! assert (d, " "); +%! assert (h, 0); + +%!test +%! # Exponentials +%! A = [3.1 -7.2 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "+3.1e0\t-72E-1\t0\n12e-3\t6.5\t128"); +%! fclose (fid); +%! [a,d,h] = importdata (fn, "\\t"); +%! unlink (fn); +%! assert (a, A); +%! assert (d, "\t"); +%! assert (h, 0); + +%!test +%! # Missing values +%! A = [3.1 NaN 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "3.1\t\t0\n0.012\t6.5\t128"); +%! fclose (fid); +%! [a,d,h] = importdata (fn, "\\t"); +%! unlink (fn); +%! assert (a, A); +%! assert (d, "\t"); +%! assert (h, 0); + +%!test +%! # CRLF for line breaks +%! A = [3.1 -7.2 0; 0.012 6.5 128]; +%! fn = tmpnam (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "3.1\t-7.2\t0\r\n0.012\t6.5\t128"); +%! fclose (fid); +%! [a,d,h] = importdata (fn, "\\t"); +%! unlink (fn); +%! assert (a, A); +%! assert (d, "\t"); +%! assert (h, 0); +