Mercurial > repos > clustalomega > clustalomega
diff clustalomega/clustal-omega-0.2.0/src/clustal/symmatrix.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/clustalomega/clustal-omega-0.2.0/src/clustal/symmatrix.c Tue Jun 07 17:04:25 2011 -0400 @@ -0,0 +1,496 @@ +/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ + +/********************************************************************* + * Clustal Omega - Multiple sequence alignment + * + * Copyright (C) 2010 University College Dublin + * + * Clustal-Omega is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is part of Clustal-Omega. + * + ********************************************************************/ + +/* + * RCS $Id: symmatrix.c 230 2011-04-09 15:37:50Z andreas $ + * + * + * Functions for symmetric (square) matrices including diagonal. + * supports the notion of non-square sub-matrices of a symmetric + * matrix, i.e. where |rows|<|cols|. + * + * FIXME Allocating one big chunk of memory is probably + * much faster and also easier to maintain. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include "symmatrix.h" + + +#if 0 +#define DEBUG +#endif + +#define MAX_BUF_SIZE 65536 + +/** + * @brief Allocates symmat and its members and initialises them. Data + * will be calloced, i.e. initialised with zeros. + * + * @param[out] symmat + * newly allocated and initialised symmatrix instance + * @param[in] nrows + * number of rows + * @param[in] + * ncols number of columns + * + * @return: non-zero on error + * + * @see FreeSymMatrix() + * + * @note: symmat data will be of fake shape nrows x ncols + * + */ +int +NewSymMatrix(symmatrix_t **symmat, int nrows, int ncols) +{ + int i; /* aux */ + + assert(nrows>0 && ncols>0 && ncols>=nrows); + assert(ncols>0 && ncols>=nrows); + + (*symmat) = (symmatrix_t *) malloc(1*sizeof(symmatrix_t)); + if (NULL == (*symmat)) { + fprintf(stderr, "Couldn't allocate memory (%s|%s)\n", + __FILE__, __FUNCTION__); + return -1; + } + + (*symmat)->data = (double **) malloc(nrows * sizeof(double *)); + if (NULL == (*symmat)->data) { + fprintf(stderr, "Couldn't allocate memory (%s|%s)\n", + __FILE__, __FUNCTION__); + free(*symmat); + *symmat = NULL; + return -1; + } + for (i=0; i<nrows; i++) { + (*symmat)->data[i] = (double *) calloc((ncols-i), sizeof(double)); + if (NULL == (*symmat)->data[i]) { + fprintf(stderr, "Couldn't allocate memory (%s|%s)\n", + __FILE__, __FUNCTION__); + while (0!=--i) { + free((*symmat)->data[i]); + } + free((*symmat)->data); + free(*symmat); + *symmat = NULL; + return -1; + } +#ifdef TRACE + fprintf(stderr, "DEBUG(%s|%s():%d) initialising symmat with the number of the beast\n", + __FILE__, __FUNCTION__, __LINE__); + { + int j; + for (j=0; j<ncols-i; j++) { + (*symmat)->data[i][j] = -666.0; + } + + } +#endif + } + + (*symmat)->nrows = nrows; + (*symmat)->ncols = ncols; + + return 0; +} +/*** end: new_symmatrix ***/ + + +/** + * @brief Sets symmat data of given index to given value + * + * @param[in] symmat + * symmatrix_t whose data is to be set + * @param[in] i + * first index + * @param[in] j + * second index + * @param[in] value + * value used to set data point + * + * @see SymMatrixGetValue() + * + * @note This is a convenience function that checks index order. + * + */ +void +SymMatrixSetValue(symmatrix_t *symmat, const int i, const int j, const double value) +{ + assert(NULL != symmat); + + if (i<=j) { + assert(i < symmat->nrows && j < symmat->ncols); + symmat->data[i][j-i] = value; + } else { + assert(j < symmat->nrows && i < symmat->ncols); + symmat->data[j][i-j] = value; + } +} +/*** end: symmatrix_setvalue ***/ + + + +/** + * @brief Returns element of symmat corresponding to given indices + * + * @param[in] symmat + * symmatrix_t of question + * @param[in] i + * index i + * @param[in] j + * index j + * + * @return requested value + * + * @see SymMatrixSetValue() + * + * @note This is a convenience function that checks index order. + */ +double +SymMatrixGetValue(symmatrix_t *symmat, const int i, const int j) +{ + assert(NULL != symmat); + + if (i<=j) { + assert(i < symmat->nrows && j < symmat->ncols); + return symmat->data[i][j-i]; + } else { + assert(j < symmat->nrows && i < symmat->ncols); + return symmat->data[j][i-j]; + } +} +/*** end: symmatrix_getvalue ***/ + + + + + +/** + * @brief Returns a pointer to an element of symmat corresponding to + * given indices + * + * @param[out] val + * Value to be set + * @param[in] symmat + * symmatrix_t of question + * @param[in] i + * index i + * @param[in] j + * index j + * + * @return pointer to value + * + * @see SymMatrixGetValue() + * + * @note This is a convenience function that checks index order. + * + */ +void +SymMatrixGetValueP(double **val, symmatrix_t *symmat, + const int i, const int j) +{ + assert(NULL != symmat); + + if (i<=j) { + assert(i < symmat->nrows && j < symmat->ncols); + *val = &(symmat->data[i][j-i]); + } else { + assert(j < symmat->nrows && i < symmat->ncols); + *val = &(symmat->data[j][i-j]); + } +} +/*** end: symmatrix_getvaluep ***/ + + +/** + * @brief Frees memory allocated by data members of symmat and symmat + * itself. + * + * @param[in] symmat + * symmatrix_t to be freed + * + * @note Use in conjunction with NewSymMatrix() + * + * @see NewSymMatrix() + * + */ +void +FreeSymMatrix(symmatrix_t **symmat) +{ + int i; + if (NULL != (*symmat)) { + if (NULL != (*symmat)->data) { + for (i=0; i<(*symmat)->nrows; i++) { + free((*symmat)->data[i]); + } + free((*symmat)->data); + } + } + free(*symmat); + *symmat = NULL; +} +/*** end: free_symmatrix ***/ + + + +/** + * @brief Print out a symmat in phylip style. Distances are printed on + * one line per sequence/object. Since we also support matrices with + * rows\<cols, the first line can also be nrows by ncolumns instead + * of just nrows + * + * @param[in] symmat + * the symmatrix_t to print + * @param[in] labels + * sequence/objects labels/names. must be at least of length symmat nrows + * @param[in] path + * filename or NULL. If NULL stdout will be used. + * + */ +void +SymMatrixPrint(symmatrix_t *symmat, char **labels, const char *path) +{ + FILE *fp = NULL; + int i, j; /* aux */ + int max_label_len = 0; + + if (NULL==symmat || NULL==labels) { + fprintf(stderr, + "One of the provided arguments is empty or NULL (print_matrix)\n"); + return; + } + if (NULL==path) { + fp = stdout; + } else { + if (NULL==(fp=fopen(path, "w"))) { + fprintf(stderr, "Couldn't open %s for writing.", path); + return; + } + } + + /* find maximum label length for pretty printing + */ + for (i=0; i<symmat->nrows; i++) { + int this_len; + assert(NULL != labels[i]); + this_len = strlen(labels[i]); + if (this_len>max_label_len) + max_label_len = this_len; + } + + if (symmat->ncols==symmat->nrows) { + fprintf(fp, "%u\n", symmat->ncols); + } else { + /* this is not standard phylip, but necessary to indicate + seed matrices */ + fprintf(fp, "%u x %u\n", symmat->nrows, symmat->ncols); + } + for (i=0; i<symmat->nrows; i++) { + /* phylip restriction is 10 characters. we don't care here + */ + fprintf(fp, "%-*s", max_label_len, labels[i]); + /* we are lazy and do it like fastdist: write all distances + * for one seq to one line + */ + for (j=0; j<symmat->ncols; j++) { + fprintf(fp, " %f", SymMatrixGetValue(symmat, i, j)); + } + fprintf(fp, "%s", "\n"); + } + if (NULL != path) { + (void) fclose(fp); + } else { + (void) fflush(fp); + } +} +/*** end: SymMatrixPrint ***/ + + + +/** + * + * @brief Read a distance matrix in phylip format + * + * @param[in] pcFileIn + * distance matrix filename + * @param[out] prSymMat_p + * the symmatrix_t. will be allocated here. + * @return: + * non-zero on error + * + * @note: + * FIXME untested code + */ +int +SymMatrixRead(char *pcFileIn, symmatrix_t **prSymMat_p) +{ + FILE *prFilePointer; + char *buf; + /* number of parsed sequences */ + int iNParsedSeq = 0; + /* number of parsed distances per sequence */ + int iNParsedDists = 0; + /* total number of sequences/objects */ + int iTotalNSeq = 0; + int iRetCode = 0; + + assert(NULL!=pcFileIn); + + /* FIXME: test correctness and implement label checking */ + fprintf(stderr, "WARNING: Reading of distance matrix from file not thoroughly tested!\n"); + fprintf(stderr, "WARNING: Assuming same order of sequences in sequence file and distance matrix file (matching of labels not implemented)\n"); + + if (NULL == (buf = (char *) malloc(MAX_BUF_SIZE * sizeof(char)))) { + fprintf(stderr, "ERROR: couldn't allocate memory at %s:%s:%d\n", + __FILE__, __FUNCTION__, __LINE__); + return -1; + } + + if (NULL == (prFilePointer = fopen(pcFileIn, "r"))) { + fprintf(stderr, "ERROR: Couldn't open %s for reading\n", pcFileIn); + free(buf); + return -1; + } + + /* get number of sequences from first line and allocate memory for + * distance matrix + * + */ + if (NULL == fgets(buf, MAX_BUF_SIZE, prFilePointer) ) { + fprintf(stderr, "Couldn't read first line from %s\n", pcFileIn); + iRetCode = -1; + goto closefile_and_freebuf; + } + if (strlen(buf)==MAX_BUF_SIZE-1) { + fprintf(stderr, "%s\n", "Looks like I couldn't read complete line. Wrong format (or too small MAX_BUF_SIZE)"); + iRetCode = -1; + goto closefile_and_freebuf; + } + if (sscanf(buf, "%d", &iTotalNSeq)!=1) { + fprintf(stderr, "ERROR: couldn't parse number of sequences from first line of %s\n", pcFileIn); + iRetCode = -1; + goto closefile_and_freebuf; + } + +#if TRACE + Log(&rLog, LOG_FORCED_DEBUG, "iTotalNSeq parsed from %s is %d\n", pcFileIn, iTotalNSeq); +#endif + + if (NewSymMatrix(prSymMat_p, iTotalNSeq, iTotalNSeq)) { + fprintf(stderr, "FATAL %s", "Memory allocation for distance matrix failed"); + iRetCode = -1; + goto closefile_and_freebuf; + } + + + /* parse file line by line + * + */ + while (NULL != fgets(buf, MAX_BUF_SIZE, prFilePointer)) { + char *szToken; + int is_newseq; + + if (MAX_BUF_SIZE-1 == strlen(buf)) { + fprintf(stderr, "%s\n", "Looks like I couldn't read complete line. Wrong format (or too small MAX_BUF_SIZE)"); + iRetCode = -1; + goto closefile_and_freebuf; + } + +#ifdef DEBUG + Log(&rLog, LOG_FORCED_DEBUG, "Got line: %s\n", buf); +#endif + + /* new sequence label at beginning of line? + */ + if (isblank(buf[0])) { + is_newseq = 0; + } else { + is_newseq = 1; + } + + /* tokenise line and treat new sequence specially + */ + szToken = strtok(buf, " \t"); + if (is_newseq==1) { + iNParsedSeq++; + iNParsedDists=0; + + /* if format is lower dimensional matrix then first + * sequence has no distances but might have newline + * character at it's end. + */ + while (isspace(szToken[strlen(szToken)-1])) { + szToken[strlen(szToken)-1]='\0'; + } + /* FIXME save label? */ + szToken = strtok(NULL, " \t"); + } + /* from here on it's only parsing of distances */ + while (szToken != NULL) { + double dist; + iNParsedDists++; + + /* only parse what's needed */ + if (iNParsedDists!=iNParsedSeq) { + /* parse and store distance + */ + if (sscanf(szToken, "%lf", &dist)!=1) { + fprintf(stderr, "Couldn't parse float from entry '%s'\n", szToken); + iRetCode = -1; + goto closefile_and_freebuf; + } +#if TRACE + Log(&rLog, LOG_FORCED_DEBUG, "Parsed distance %d for seq %d = %f\n", iNParsedDists-1, iNParsedSeq-1, dist); +#endif + SymMatrixSetValue(*prSymMat_p, iNParsedSeq-1, iNParsedDists-1, dist); + SymMatrixSetValue(*prSymMat_p, iNParsedDists-1, iNParsedSeq-1, dist); + } + szToken = strtok(NULL, " \t"); + } + } + + if (iTotalNSeq!=iNParsedSeq) { + fprintf(stderr, "expected %d seqs, but only parsed %d\n", iTotalNSeq, iNParsedSeq); + iRetCode = -1; + goto closefile_and_freebuf; + } +#if TRACE + for (i=0; i<iNParsedSeq; i++) { + int j; + for (j=0; j<iNParsedSeq; j++) { + Log(&rLog, LOG_FORCED_DEBUG, "prSymMat_p[%d][%d]=%f\n", i, j, (*prSymMat_p)[i][j]); + } + } +#endif + +closefile_and_freebuf: + fclose(prFilePointer); + free(buf); + + return iRetCode; +} +/*** end: SymMatrixRead ***/ +