Mercurial > repos > calkan > mrfast
diff mrfast-2.1.0.4/HashTable.c @ 1:d4054b05b015 default tip
Version update to 2.1.0.5
author | calkan |
---|---|
date | Fri, 09 Mar 2012 07:35:51 -0500 |
parents | 7b3dc85dc7fd |
children |
line wrap: on
line diff
--- a/mrfast-2.1.0.4/HashTable.c Tue Feb 21 10:29:47 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,472 +0,0 @@ -/* - * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, - * are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this list - * of conditions and the following disclaimer. - * - Redistributions in binary form must reproduce the above copyright notice, this - * list of conditions and the following disclaimer in the documentation and/or other - * materials provided with the distribution. - * - Neither the names of the University of Washington, Simon Fraser University, - * nor the names of its contributors may be - * used to endorse or promote products derived from this software without specific - * prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - Authors: - Farhad Hormozdiari - Faraz Hach - Can Alkan - Emails: - farhadh AT uw DOT edu - fhach AT cs DOT sfu DOT ca - calkan AT uw DOT edu -*/ - - - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> -#include "Common.h" -#include "RefGenome.h" -#include "HashTable.h" -/**********************************************/ -FILE *_ih_fp = NULL; -IHashTable *_ih_hashTable = NULL; -int _ih_maxHashTableSize = 0; -char *_ih_refGen = NULL; -char *_ih_refGenName = NULL; -long long _ih_memUsage = 0; -int _ih_refGenOff = 0; -/**********************************************/ - -int hashVal(char *seq) -{ - int i=0; - int val=0, numericVal=0; - - while(i<WINDOW_SIZE) - { - switch (seq[i]) - { - case 'A': - numericVal = 0; - break; - case 'C': - numericVal = 1; - break; - case 'G' : - numericVal = 2; - break; - case 'T': - numericVal = 3; - break; - default: - return -1; - break; - } - val = (val << 2)|numericVal; - i++; - } - return val; -} -/**********************************************/ -void freeIHashTableContent(IHashTable *hashTable, unsigned int maxSize) -{ - int i=0; - for (i=0; i<maxSize; i++) - { - if (hashTable[i].locs != NULL) - { - freeMem(hashTable[i].locs, (hashTable[i].locs[0]+1)*(sizeof(unsigned int))); - hashTable[i].locs = NULL; - } - } -} -/**********************************************/ -void initSavingIHashTable(char *fileName) -{ - int tmp; - _ih_fp = fileOpen(fileName, "w"); - unsigned char bIndex = 0; // Bisulfite Index - - // First Two bytes are indicating the type of the index & window size - tmp = fwrite(&bIndex, sizeof(bIndex), 1, _ih_fp); - tmp = fwrite(&WINDOW_SIZE, sizeof(WINDOW_SIZE), 1, _ih_fp); - - if (tmp == 0) - fprintf(stderr, "Write error while initializing hash table.\n"); - -} -/**********************************************/ -void finalizeSavingIHashTable() -{ - fclose(_ih_fp); -} -/**********************************************/ -void saveIHashTable(IHashTable *hashTable, unsigned int size, unsigned int maxSize, char *refGen, char *refGenName, int refGenOffset) -{ - int tmp; - - // Every Chunk starts with a byte indicating whether it has extra info; - unsigned char extraInfo = 0; - tmp = fwrite (&extraInfo, sizeof(extraInfo), 1, _ih_fp); - - short len = strlen(refGenName); - tmp = fwrite(&len, sizeof(len), 1, _ih_fp); - tmp = fwrite(refGenName, sizeof(char), len, _ih_fp); - - tmp = fwrite(&refGenOffset, sizeof(refGenOffset), 1, _ih_fp); - - unsigned int refGenLength = strlen(refGen); - tmp = fwrite(&refGenLength, sizeof(refGenLength), 1, _ih_fp); - - tmp = fwrite(refGen, sizeof(char), refGenLength, _ih_fp); - tmp = fwrite(&size, sizeof(size), 1, _ih_fp); - - int i=0,j=0; - unsigned char cnt=0; - for (i=0; i<maxSize; i++) - { - if (hashTable[i].locs != NULL) - { - tmp = fwrite(&i, sizeof(i), 1, _ih_fp); - - if (hashTable[i].locs[0] < 250) - { - cnt = hashTable[i].locs[0]; - tmp = fwrite(&cnt, sizeof(cnt), 1, _ih_fp); - } - else - { - cnt =0; - tmp = fwrite (&cnt, sizeof(cnt), 1, _ih_fp); - tmp = fwrite (&(hashTable[i].locs[0]), sizeof(hashTable[i].locs[0]), 1, _ih_fp); - } - - for (j=1; j<=hashTable[i].locs[0]; j++) - tmp = fwrite(&(hashTable[i].locs[j]), sizeof(hashTable[i].locs[j]), 1, _ih_fp); - } - } - - if (tmp == 0) - fprintf(stderr, "Write error while saving hash table.\n"); -} -/**********************************************/ -unsigned int addIHashTableLocation(IHashTable *hashTable, int hv, int location) -{ - unsigned int sizeInc = 0; - - if (hashTable[hv].locs == NULL) - { - sizeInc = 1; - hashTable[hv].locs = getMem (sizeof(unsigned int)*2); - hashTable[hv].locs[0]=1; - hashTable[hv].locs[1]=location; - } - else - { - int size = hashTable[hv].locs[0]; - int i; - unsigned int *tmp = getMem( (size + 2) * sizeof(unsigned int) ); - - for (i = 0; i <= size; i++) - { - tmp[i] = hashTable[hv].locs[i]; - } - size++; - tmp[0] = size; - tmp[size] = location; - - freeMem(hashTable[hv].locs, (hashTable[hv].locs[0]*(sizeof(unsigned int)))); - hashTable[hv].locs = tmp; - } - return sizeInc; -} -/**********************************************/ -void generateIHashTable(char *fileName, char *indexName) -{ - double startTime = getTime(); - unsigned int hashTableSize = 0; - unsigned int hashTableMaxSize = pow(4, WINDOW_SIZE); - IHashTable *hashTable = getMem(sizeof(IHashTable)*hashTableMaxSize); - char *refGenName; - char *refGen; - int refGenOff = 0; - int i, hv, l, flag; - - - for ( i = 0; i < hashTableMaxSize; i++) - { - hashTable[i].locs = NULL; - } - - //Loading Fasta File - if (!initLoadingRefGenome(fileName)) - return; - initSavingIHashTable(indexName); - - fprintf(stdout, "Generating Index from %s", fileName); - fflush(stdout); - - char *prev = getMem (CONTIG_NAME_SIZE); - prev[0]='\0'; - - do - { - flag = loadRefGenome (&refGen, &refGenName, &refGenOff); - - if ( strcmp(prev, refGenName) != 0) - { - fprintf(stdout, "\n - %s ", refGenName); - fflush(stdout); - sprintf(prev, "%s", refGenName); - } - else - { - fprintf(stdout, "."); - fflush(stdout); - } - - l = strlen(refGen) - WINDOW_SIZE; - - for (i=0; i < l; i++) - { - hv = hashVal(refGen + i); - if (hv != -1) - { - hashTableSize += addIHashTableLocation (hashTable, hv, i+1); - } - } - - saveIHashTable(hashTable, hashTableSize, hashTableMaxSize, refGen, refGenName, refGenOff); - freeIHashTableContent(hashTable, hashTableMaxSize); - hashTableSize = 0; - } while (flag); - - freeMem(prev, CONTIG_NAME_SIZE); - freeMem(hashTable, sizeof(IHashTable)*hashTableMaxSize); - - finalizeLoadingRefGenome(); - finalizeSavingIHashTable(); - - fprintf(stdout, "\nDONE in %0.2fs!\n", (getTime()-startTime)); -} - -/**********************************************/ -void finalizeLoadingIHashTable() -{ - freeIHashTableContent(_ih_hashTable, _ih_maxHashTableSize); - freeMem(_ih_hashTable, sizeof(IHashTable)* _ih_maxHashTableSize); - freeMem(_ih_refGen, strlen(_ih_refGen)+1) ; - freeMem(_ih_refGenName, strlen(_ih_refGenName)+1); - fclose(_ih_fp); -} - -/**********************************************/ -int loadIHashTable(double *loadTime, int errThreshould) -{ - double startTime = getTime(); - unsigned char extraInfo = 0; - short len; - unsigned int refGenLength; - unsigned int hashTableSize; - unsigned int tmpSize; - int tmp; - int i=0,j=0; - - if ( fread(&extraInfo, sizeof(extraInfo), 1, _ih_fp) != sizeof(extraInfo) ) - return 0; - - freeIHashTableContent(_ih_hashTable, _ih_maxHashTableSize); - freeMem(_ih_refGen, strlen(_ih_refGen)+1) ; - freeMem(_ih_refGenName, strlen(_ih_refGenName)+1); - - // Reading Chr Name - tmp = fread(&len, sizeof(len), 1, _ih_fp); - if (tmp == 0){ - fprintf(stderr, "Read error while loading hash table.\n"); - exit(0); - } - - _ih_refGenName = getMem(sizeof(char)* (len+1)); - tmp = fread(_ih_refGenName, sizeof(char), len, _ih_fp); - _ih_refGenName [len] ='\0'; - - tmp = fread(&_ih_refGenOff, sizeof (_ih_refGenOff), 1, _ih_fp); - - // Reading Size and Content of Ref Genome - tmp = fread(&refGenLength, sizeof(refGenLength), 1, _ih_fp); - _ih_refGen = getMem(sizeof(char)*(refGenLength+1)); - tmp = fread(_ih_refGen, sizeof(char), refGenLength, _ih_fp); - _ih_refGen[refGenLength]='\0'; - - //Reading Hashtable Size and Content - tmp = fread(&hashTableSize, sizeof(hashTableSize), 1, _ih_fp); - - unsigned int hv; - unsigned char cnt=0; - unsigned int index = 1; - unsigned int tmpValue = 0; - for (i=0; i<hashTableSize; i++) - { - index = 1; - tmp = fread(&hv, sizeof(hv), 1, _ih_fp); - tmp = fread(&cnt, sizeof(cnt), 1, _ih_fp); - - if (cnt>0) - { - tmpSize = cnt; - } - else - { - tmp = fread(&tmpSize, sizeof(tmpSize), 1, _ih_fp); - } - - _ih_hashTable[hv].locs = getMem( sizeof(unsigned int)* (tmpSize+1) ); - _ih_hashTable[hv].locs[0] = tmpSize; - -/* for (j=1; j<=tmpSize; j++) - tmp = fread(&(_ih_hashTable[hv].locs[j]), sizeof(_ih_hashTable[hv].locs[j]), 1, _ih_fp); -*/ - - if(tmpSize != 0) - { - tmp = fread(&(_ih_hashTable[hv].locs[index]), sizeof(_ih_hashTable[hv].locs[index]), 1, _ih_fp); - index++; - } - - for (j=2; j<=tmpSize; j++) - { - tmp = fread(&tmpValue, sizeof(_ih_hashTable[hv].locs[index-1]), 1, _ih_fp); - if(abs(tmpValue-_ih_hashTable[hv].locs[index-1]) > errThreshould) - { - _ih_hashTable[hv].locs[index] = tmpValue; - index++; - } - } - _ih_hashTable[hv].locs[0] = index-1; - } - *loadTime = getTime()-startTime; - return 1; -} -/**********************************************/ -unsigned int *getIHashTableCandidates(int hv) -{ - if ( hv != -1 ) - return _ih_hashTable[hv].locs; - else - return NULL; -} -/**********************************************/ -/**********************************************/ -/**********************************************/ -void configHashTable() -{ - if (WINDOW_SIZE <= 15) - { - generateHashTable = &generateIHashTable; - loadHashTable = &loadIHashTable; - finalizeLoadingHashTable = &finalizeLoadingIHashTable; - getCandidates = &getIHashTableCandidates; - } - else - { - - - } -} -/**********************************************/ -int initLoadingHashTable(char *fileName) -{ - int i; - unsigned char bsIndex; - int tmp; - - _ih_fp = fileOpen(fileName, "r"); - - if (_ih_fp == NULL) - return 0; - - tmp = fread(&bsIndex, sizeof(bsIndex), 1, _ih_fp); - - if (tmp == 0){ - fprintf(stderr, "Read error while inilializing hash table loading.\n"); - exit(0); - } - - if (bsIndex) - { - fprintf(stdout, "Error: Wrong Type of Index indicated"); - return 0; - } - - tmp = fread(&WINDOW_SIZE, sizeof(WINDOW_SIZE), 1, _ih_fp); - - configHashTable(); - - if (_ih_maxHashTableSize != pow(4, WINDOW_SIZE)) - { - - if (_ih_hashTable != NULL) - { - freeIHashTableContent(_ih_hashTable, _ih_maxHashTableSize); - freeMem(_ih_hashTable, sizeof(IHashTable)* _ih_maxHashTableSize); - freeMem(_ih_refGen, strlen(_ih_refGen)+1) ; - freeMem(_ih_refGenName, strlen(_ih_refGenName)+1); - } - - _ih_maxHashTableSize = pow(4, WINDOW_SIZE); - - _ih_hashTable = getMem (sizeof(IHashTable) * _ih_maxHashTableSize); - for (i=0; i<_ih_maxHashTableSize; i++) - _ih_hashTable[i].locs = NULL; - _ih_refGen = getMem(1); - _ih_refGen[0]='\0'; - _ih_refGenName = getMem(1); - _ih_refGenName[0] = '\0'; - } - - return 1; -} -/**********************************************/ -char *getRefGenome() -{ - return _ih_refGen; - -} -/**********************************************/ -char *getRefGenomeName() -{ - return _ih_refGenName; - -} -/**********************************************/ -int getRefGenomeOffset() -{ - return _ih_refGenOff; - -} -/**********************************************/ -HashTable *getHashTable() -{ - return NULL; -}