diff rDiff/src/octave/importdata.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rDiff/src/octave/importdata.m	Thu Feb 14 23:38:36 2013 -0500
@@ -0,0 +1,444 @@
+## Copyright (C) 2012 Erik Kjellson
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {Function File} {@var{A} =} importdata (@var{fname})
+## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter})
+## @deftypefnx {Function File} {@var{A} =} importdata (@var{fname}, @var{delimiter},  @var{header_rows})
+## @deftypefnx {Function File} {[@var{A}, @var{delimiter}] =} importdata (...)
+## @deftypefnx {Function File} {[@var{A}, @var{delimiter}, @var{header_rows}] =} importdata (...)
+## Importing data from file.
+##
+## Importing the contents of file @var{fname} into workspace.
+##
+## Input parameters:
+## @itemize
+## @item @var{fname}
+## The file name for the file to import.
+## 
+## @item @var{delimiter}
+## The character separating columns of data. Use @code{\t} for tab.
+## (Only valid for ascii files)
+##
+## @item @var{header_rows}
+## Number of header rows before the data begins. (Only valid for ascii files)
+## @end itemize
+##
+## Different file types are supported:
+## @itemize
+## @item Ascii table
+##
+## Importing ascii table using the specified number of header rows and
+## the specified delimiter.
+##
+## @item Image file
+##
+## @item @sc{Matlab} file
+##
+## @item Spreadsheet files (depending on external software)
+##
+## @item Wav file
+##
+## @end itemize
+##
+## @seealso{textscan, dlmread, csvread, load}
+## @end deftypefn
+
+## Author: Erik Kjellson <erikiiofph7@users.sourceforge.net>
+
+function [output, delimiter, header_rows] = importdata (varargin)
+
+    
+  ## Default values
+  fname   = "";
+  delimiter  = "";
+  header_rows = -1;
+
+  ##########
+
+  ## Check input arguments
+
+  if (nargin < 1)
+    print_usage ();
+  endif
+
+  fname = varargin{1};
+  ## Check that the file name really is a string
+  if (! ischar (fname))
+    error ("importdata: file name needs to be a string");
+  endif
+  if ( strcmpi (fname, "-pastespecial"))
+    error ("importdata: option -pastespecial not implemented");
+  endif
+
+  if (nargin > 1)
+    delimiter = varargin{2};
+    ## Check that the delimiter really is a string
+    if (!ischar (delimiter))
+      error("importdata: delimiter needs to be a character");
+    endif
+    if (length (delimiter) > 1 && !strcmpi (delimiter, "\\t"))
+      error("importdata: delimiter cannot be longer than 1 character");
+    endif
+    if (strcmpi (delimiter, "\\"))
+      delimiter = "\\\\";
+    endif
+  endif
+
+  if (nargin > 2)
+    header_rows = varargin{3};
+    if (!isnumeric (header_rows) || header_rows < 0)
+      error ("importdata: number of header rows needs to be an integer number >= 0");
+    endif
+  endif
+
+  if (nargin > 3)
+    error ("importdata: too many input arguments");
+  endif
+
+  ##########
+
+  ## Check file format
+  ## Get the extension from the file name.
+  [d n fileExt v] = fileparts (fname);
+  ## Make sure file extension is in lower case.
+  fileExt = lower (fileExt);
+
+  switch fileExt
+    case {".au", ".snd"}
+      error (sprintf ("importdata: not implemented for file format %s", 
+                      fileExt));
+    case ".avi"
+      error (sprintf ("importdata: not implemented for file format %s", 
+                      fileExt));
+    case {".bmp", ".cur", ".gif", ".hdf", ".ico", ".jpe", ".jpeg", ".jpg", \
+          ".pbm", ".pcx", ".pgm", ".png", ".pnm", ".ppm", ".ras", \
+          ".tif", ".tiff", ".xwd"}
+      delimiter  = NaN;
+      header_rows = 0;
+      [output.cdata, output.colormap, output.alpha] = imread (fname);
+    case ".mat"
+      delimiter  = NaN;
+      header_rows = 0;
+      output = load (fname);
+    case {".wk1", ".xls", ".xlsx", ".dbf", ".pxl"}
+      ## If there's no Excel file support simply fall back to unimplemented.m
+      output = xlsread (fname);
+    case {".ods", ".sxc", ".fods", ".uos", ".xml"}
+      ## unimplemented.m only knows ML functions; odsread isn't one but is in OF
+      try
+        output = odsread (fname);
+      catch
+        ## Fall back to unimplemented.m.
+        output = xlsread (fname);
+      end_try_catch
+    case {".wav", ".wave"}
+      delimiter  = NaN;
+      header_rows = 0;
+      [output.data, output.fs] = wavread (fname);
+    otherwise
+      ## Assume the file is in ascii format.
+      [output, delimiter, header_rows]  = \
+          importdata_ascii (fname, delimiter, header_rows);
+  endswitch
+
+  ## If there are any empty fields in the output structure, then remove them
+  if (isstruct (output) && length (output) == 1)
+    fields = fieldnames (output);
+    for i=1:length (fields)
+      if (isempty (getfield (output, fields{i})))
+        output = rmfield (output, fields{i});
+      endif
+    endfor
+
+    ## If only one field is left, replace the structure with the field,
+    ## i.e. output = output.onlyFieldLeft
+
+    ## Update the list of fields
+    fields = fieldnames (output);
+    if (length (fields) == 1)
+      output = getfield (output, fields{1});
+    endif
+  endif
+endfunction
+
+
+########################################
+
+function [output, delimiter, header_rows] = \
+      importdata_ascii (fname, delimiter, header_rows)
+
+  ## Define the fields in the output structure so that the order will be
+  ## correct.
+
+  output.data       = [];
+  output.textdata   = [];
+  output.rowheaders = [];
+  output.colheaders = [];
+
+  ## Read file into string and count the number of header rows
+  #file_content = fileread (fname);
+  file_content_rows={};
+  fid=fopen(fname);
+  currline=0;
+  firstline=1;
+  while 1==1
+      if not(isempty(currline))
+          if currline==-1
+              break
+          end
+      end     
+      currline=fgetl(fid);
+      file_content_rows{end+1}=currline;
+  end
+  fclose(fid);
+  file_content_rows={file_content_rows{1:(end-1)}};
+  
+  ## Split the file into rows (using \r\n or \n as delimiters between rows).
+  #file_content_rows = regexp (file_content, "\r?\n", "split");
+
+  ## FIXME: guess delimiter, if it isn't defined
+  if (isempty (delimiter))
+    error ("importdata: Guessing delimiter is not implemented yet, you have to specify it.");
+  endif
+
+  ## FIXME: A more intelligent way to count number of header rows. This
+  ## is needed e.g. when delimiter=' ' and the header contains spaces...
+
+  ## If number of header rows is undefined, then count the number of
+  ## header rows by step through row by row and look for the delimiter.
+  ## Assume that the header can't contain any delimiter.
+  if (header_rows < 0)
+    header_rows = 0;
+    for i=1:length (file_content_rows)
+      if (isempty (regexp(file_content_rows{i}, delimiter, "once")))
+        header_rows++;
+      else
+        ## Data part has begun and therefore no more header rows can be
+        ## found
+        break;
+      endif
+    endfor
+  endif
+
+  ## Put the header rows in output.textdata.
+  if (header_rows > 0)
+    output.textdata   = file_content_rows (1:header_rows)';
+  endif
+
+  ## If space is the delimiter, then remove spaces in the beginning of
+  ## each data row.
+  if (strcmpi (delimiter, " "))
+    for i=(header_rows+1):length (file_content_rows)
+      ## strtrim does not only remove the leading spaces but also the
+      ## tailing spaces, but that doesn't really matter.
+      file_content_rows{i} = strtrim (file_content_rows{i});
+    endfor
+  endif
+
+  ## Remove empty data rows. Go through them backwards so that you wont
+  ## get out of bounds.
+  for i=length (file_content_rows):-1:(header_rows + 1)
+    if (length (file_content_rows{i}) < 1)
+      file_content_rows = [file_content_rows(1:i-1), \
+                           file_content_rows(i+1:length(file_content_rows))];
+    endif
+  endfor
+
+  ## Count the number of data columns. If there are different number of
+  ## columns, use the greatest value.
+  data_columns = 0;
+  delimiter_pattern = delimiter;
+  ## If space is the delimiter, then multiple spaces should count as ONE
+  ## delimiter. Also ignore leading spaces.
+  if (strcmpi (delimiter, " "))
+    delimiter_pattern = ' +';
+  endif
+  for i=(header_rows+1):length(file_content_rows)
+    data_columns = max (data_columns,
+                        length (regexp (file_content_rows{i},
+                                        delimiter_pattern, "split")));
+  endfor
+
+  ## Go through the data and put it in either output.data or
+  ## output.textdata depending on if it is numeric or not.
+  output.data = NaN (length (file_content_rows) - header_rows, data_columns);
+
+  cut_rows=zeros(1,data_columns);
+  for i=(header_rows+1):length(file_content_rows)
+    ## Only use the row if it contains anything other than white-space
+    ## characters.
+    if (any (file_content_rows{i} != " "))
+      row_data = regexp (file_content_rows{i}, delimiter_pattern, "split");
+
+      for j=1:length(row_data)
+        ## Try to convert the column to a number, if it works put it in
+        ## output.data, otherwise in output.textdata
+        if (!isempty (row_data{j}))
+          data_numeric = str2double (row_data{j});
+          if and(!isempty (data_numeric),not(isnan(data_numeric)))
+            output.data(i-header_rows, j) = data_numeric;
+            if not(isnan(data_numeric))
+                cut_rows(j)=1;
+            end
+          else
+            output.textdata{i,j} = row_data{j};
+          endif
+        endif
+      endfor
+
+    endif
+  endfor
+  output.data=output.data(:,cut_rows>0);
+  
+  ## Check wether rowheaders or colheaders should be used
+  if ((header_rows == data_columns) && (size (output.textdata, 2) == 1))
+    output.rowheaders = output.textdata;
+  elseif (size (output.textdata, 2) == data_columns)
+    output.colheaders = output.textdata(end,:);
+  endif
+
+  ## When delimiter = "\\t" convert it to a tab, done for Matlab compatibility.
+  if (strcmp (delimiter, '\t'))
+    delimiter = "\t";
+  endif
+
+endfunction
+
+
+function [RET]=regexp(ARG1,ARG2,ARG3)
+%ARG3 is always 'split' in this context
+  if (strcmp (ARG2, '\t'))
+    ARG2 = "\t";
+  endif
+  RET = strsplit(ARG1,"\t",fixed=true);
+endfunction
+
+########################################
+
+%!test
+%! # Comma separated values
+%! A = [3.1 -7.2 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fputs (fid, "3.1,-7.2,0\n0.012,6.5,128");
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, ",");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, ",");
+%! assert (h, 0);
+
+%!test
+%! # Tab separated values
+%! A = [3.1 -7.2 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128");
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, "\\t");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, "\t");
+%! assert (h, 0);
+
+%!test
+%! # Space separated values, using multiple spaces to align in columns.
+%! A = [3.1 -7.2 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:));
+%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:));
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, " ");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, " ");
+%! assert (h, 0);
+
+%!test
+%! # Header
+%! A.data = [3.1 -7.2 0; 0.012 6.5 128];
+%! A.textdata = {"This is a header row."; \
+%!               "this row does not contain any data, but the next one does."};
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fputs (fid, [A.textdata{1} "\n"]);
+%! fputs (fid, [A.textdata{2} "\n"]);
+%! fputs (fid, "3.1\t-7.2\t0\n0.012\t6.5\t128");
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, "\\t");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, "\t");
+%! assert (h, 2);
+
+%!test
+%! # Ignore empty rows containing only spaces
+%! A = [3.1 -7.2 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(1,:));
+%! fputs (fid, "      ");
+%! fprintf (fid, "%10.3f %10.3f %10.3f\n", A(2,:));
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, " ");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, " ");
+%! assert (h, 0);
+
+%!test
+%! # Exponentials
+%! A = [3.1 -7.2 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fputs (fid, "+3.1e0\t-72E-1\t0\n12e-3\t6.5\t128");
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, "\\t");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, "\t");
+%! assert (h, 0);
+
+%!test
+%! # Missing values
+%! A = [3.1 NaN 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fputs (fid, "3.1\t\t0\n0.012\t6.5\t128");
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, "\\t");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, "\t");
+%! assert (h, 0);
+
+%!test
+%! # CRLF for line breaks
+%! A = [3.1 -7.2 0; 0.012 6.5 128];
+%! fn  = tmpnam ();
+%! fid = fopen (fn, "w");
+%! fputs (fid, "3.1\t-7.2\t0\r\n0.012\t6.5\t128");
+%! fclose (fid);
+%! [a,d,h] = importdata (fn, "\\t");
+%! unlink (fn);
+%! assert (a, A);
+%! assert (d, "\t");
+%! assert (h, 0);
+