Mercurial > repos > fgiacomoni > hr2
diff lib/HR2_v1.04.cpp @ 0:86296c048e46 draft
Init repository for [hr2]
| author | fgiacomoni |
|---|---|
| date | Wed, 05 Jun 2019 09:40:20 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/HR2_v1.04.cpp Wed Jun 05 09:40:20 2019 -0400 @@ -0,0 +1,1018 @@ +/* + + HR2.C + V1.04 + (Changes: meb) + + A program to calculate elemental compositions for a given mass. + See the file README for details. + +-------------------------------------------------------------------- + Copyright (c) 2001...2005 Joerg Hau <joerg.hau(at)dplanet.ch>. + + mail: joerg.hau@dplanet.ch + www: http://www.mysunrise.ch/users/joerg.hau/ + + *changed version by Tobias Kind (TK), 2006 , Fiehnlab, + *added extended valencies, added implementation of + seven golden rules of molecular formula filtering + + + This program is free software; you can redistribute it and/or + modify it under the terms of version 2 of the GNU General Public + License as published by the Free Software Foundation. See the + file LICENSE for details. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +-------------------------------------------------------------------- + + Creation: somewhere in 1992 by JHa. + Revision: 2001-04-18, GPL'd, first public release (JHa) + 2001-04-21, improved help text (JHa) + 2002-06-27, added sodium (JHa) + 2002-10-09, added 15N (JHa) + 2005-02-25, added -v option; license now GPL *v2* (JHa) + 2005-02-27, optimised code in calc loop (JHa) + 2005-02-28, verified and updated atomic masses (JHa) + 2005-06-17, added GPL text when "-h" is used (JHa) + 2006-01-01, extended version for BMC Bioinformatics publication - HR2 (TK) + 2006-03-03, added element ratio checks, extended valencies, only even electrons - HR2 (TK) + 2006-09-09, 1000x-10000x speedup hand optimized hehe. - HR2 (TK) + -->special version for CHNSOP-F-Cl-Br-Si + 2009-05-28, David Enot introduced the concept of 'Adducts' (nadd) for MZedDB and corrected + some inaccuracies in April 2008. Manfred Beckmann has now corrected the use of + 'nadd' and 'charge' by calculating 'nadd' from 'charge' using abscharge = abs(charge) + (did it manually because I couldn't find 'abs') to correct 'measured_mass' and limits, + but keeping nadd = 0 for rdb calculation of neutral MW (MB) + 2014-08-29, Marion Landi corrected the mass of atoms. + + This is ANSI C and should compile with any C compiler; use + something along the lines of "gcc -Wall -O3 -o hr hr.c". + "g++ -O2 -o myhr HR2.cpp" on a Mac OS X G5 proc + Optimize for speed, you may gain factor 3! + NOW compiled under Visual C++ Express (faster than GCC) in C++ mode for boolean type. + + + --------------------------------------------------------------------- + Example arguments: + 1) -m 1 -t 100000 -C 1-100 -H 1-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10 + 2) -m 500 -t 1 -C 50-100 -H 10-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10 + 3) hr2-all-res -c "Hexaflumuron_459.982882Da_3ppm" -m 459.982882 -t 1.37995 -C 0-39 -H 0-98 -N 0-34 -O 0-30 -P 0-12 -S 0-12 -F 0-12 -L 0-14 -B 0-7 -I 0-0 + 945 formulas found in 253 seconds. (now 4 seconds, before eternal) + 4) hr2 -m 459.982882 -t 1.37995 -C 10-39 -H 28-98 -N 4-34 -O 0-30 -P 1-12 -S 1-12 -F 0-12 -L 1-14 -B 2-6 -I 0-0 + 1 formula in 0 seconds (former eternal time) + */ + +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <time.h> + +/* Uncomment this for windows +#include <windows.h> +#include <process.h> +*/ +#include <math.h> + + +#define VERSION "20050617" /* String ! */ +#define TRUE 1 +#define FALSE 0 +#define MAXLEN 181 /* max. length of input string */ + +#define _CRT_SECURE_NO_DEPRECATE 1 + +typedef struct { + const char *sym; /* symbol */ + const char *symi; /* non iso symbol */ + const double mass; /* accurate mass */ + const float val; /* to calculate unsaturations */ + const int key; /* used for decoding cmd line */ + int min, /* atom count min */ + max, /* atom count max */ + cnt, /* atom count actual */ + save; /* atom count old - for loop exiting*/ + } Element; + + +/* --- atomic masses as published by IUPAC, 2002-10-02 ---------- */ + +Element el[]= +/* array of the elements used here: + Symbol, exact mass, dbe, keycode, number, default-min, default-max + dle: isoptopes are given by iC for 13C, iK for 41K etc.... +*/ +{ +{ "C", "C", 12.000000000, +2.0, 'C', 0, 100, 0,0 }, +{ "iC", "C", 13.0033548378, +2.0, '1', 0, 0, 0 ,0 }, +{ "H", "H", 1.0078250321, -1.0, 'H', 0, 200, 0 ,0}, +{ "iH", "H", 2.0141017780, -1.0, 'D', 0, 0, 0 ,0}, +{ "N", "N", 14.0030740052, +1.0, 'N', 0, 40, 0,0 }, //org +1 = valence = 3: now +3 for valence = 5 +{ "iN", "N", 15.0001088984, +1.0, 'M', 0, 0, 0,0 }, +{ "O", "O", 15.9949146221, 0.0, 'O', 0, 70, 0 ,0}, +{ "F", "F", 18.99840322, -1.0, 'F', 0, 0, 0 ,0}, +{ "Na", "Na", 22.98976928, -1.0, 'A', 0, 0, 0 ,0}, +{ "Si", "Si", 27.9769265327, +2.0, 'I', 0, 0, 0 ,0}, +{ "P", "P", 30.97376163, +3.0, 'P', 0, 10, 0 ,0}, //org +1 valence = 3: now +3 for valence = 5 +{ "S", "S", 31.97207069, +4.0, 'S', 0, 0, 0 ,0}, //org 0 = valence = 2; now +4 for valence = 6 +{ "Cl", "Cl", 34.96885268, -1.0, 'L', 0, 0, 0 ,0}, +{ "iCl", "Cl", 36.96590259, -1.0, 'E', 0, 0, 0 ,0}, // added dle +{ "Br", "Br", 78.9183371, -1.0, 'B', 0, 0, 0 ,0}, +{ "iBr", "Br", 80.9162906, -1.0, 'G', 0, 0, 0 ,0}, // added dle +{ "K", "K", 38.96370668, -1.0, 'K', 0, 0, 0 ,0}, // added dle +{ "iK", "K", 40.96182576, -1.0, 'J', 0, 0, 0 ,0}, // added dle +}; + +const double electron = 0.0005486; + + +/* --- global variables --- */ + +double charge, /* charge on the molecule */ + tol, /* mass tolerance in mmu */ + abscharge, /* abs(charge): to calculate 'measured_mass' and limits - meb */ + nadd; /* nadd now calculated as abs(charge) - meb */ +char comment[MAXLEN]=""; /* some text ;-) */ +int single; /* flag to indicate if we calculate only once and exit */ +int nr_el; /* number of elements in array (above) */ +bool verbose; +bool applygr; + +/* --- some variables needed for reading the cmd line --- */ + +char *optarg; /* global: pointer to argument of current option */ +static int optind = 1; /* global: index of which argument is next. Is used + as a global variable for collection of further + arguments (= not options) via argv pointers. */ + + +/* --- function prototypes ------------------- */ + +int input(char *text, double *zahl); +int readfile(char *whatfile); +double calc_mass(void); +float calc_rdb(void); +long do_calculations(double mass, double tolerance); +int clean (char *buf); +int getopt(int argc, char *argv[], char *optionS); +/* you have to compile with C++ or define yourself this bool type (C99 compiler definition) */ +bool calc_element_ratios(bool element_probability); + +/* --- threading ------------------- */ +/* mass and RDB calculation could be in several other threads +however the loop is very fast, context switching takes longer and +slows down the whole process. Single-Thread multiprocessor (cluster) implementation +seems superior here. */ +/* Uncomment for windows +HANDLE hThread2,hThread3; +unsigned threadID2,threadID3; +*/ + + +/* --- main --- */ + +int main (int argc, char **argv) +{ +double mz; /* mass */ +char buf[MAXLEN]; +int i, tmp; + +static char *id = +"hr version %s. Copyright (C) by Joerg Hau 2001...2005 & Tobias Kind 2006 :-).\n"; + +static char *disclaimer = +"\nThis program is free software; you can redistribute it and/or modify it under\n" +"the terms of version 2 of the GNU General Public License as published by the\n" +"Free Software Foundation.\n\n" +"This program is distributed in the hope that it will be useful, but WITHOUT ANY\n" +"WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\n" +"PARTICULAR PURPOSE. See the GNU General Public License for details.\n"; + +static char *msg = +"Calculates possible elemental compositions for a given mass.\n\n" +"usage: hr [options] file\n\nValid command line options are:\n" +"-h This Help screen.\n" +"-v Display version information.\n" +"-t tol Set tolerance to 'tol' mmu (default 5).\n" +"-m mz Set mass to 'mz'.\n" +"-s Print only the results (dle)" +"-c Number of charges i.e -c 1 is equivalent to -p (dle).\n" +"-p Positive ions; electron mass is removed from the formula.\n" +"-n Negative ions; electron mass is added to the formula.\n" +"-g Does not apply 4-7 golden rules (dle).\n" +"-a Maximum num. of adducts. (dle)\n" +"-X a-b For element X, use atom range a to b. List of valid atoms:\n\n" +" X key mass (6 decimals shown)\n" +" -------------------------------------\n"; + +/* initialise variables */ + +nadd = 0.0; /* added dle: number of adducts*/ +single = FALSE; /* run continuously */ +charge = 0.0; /* default charge is neutral */ +tol = 1.0; /* default tolerance in mmu */ +nr_el = sizeof(el)/sizeof(el[0]); /* calculate array size */ +verbose = TRUE; /* added dle: should everything printed out?*/ +applygr = TRUE; /* added dle: should rules 4-7 applied?*/ + + +/* decode and read the command line */ + +while ((tmp = getopt(argc, argv, "hvpnsgt:m:c:a:C:H:N:M:O:D:1:S:F:L:B:P:I:A:K:E:G:J:")) != EOF) + switch (tmp) + { + case 'h': /* help me */ + printf (id, VERSION); + printf (msg); + for (i=0; i < nr_el; i++) + printf (" %4s -%c %15.6lf\n", + el[i].sym, el[i].key, el[i].mass); + printf(disclaimer, "\n"); + return 0; + case 'v': /* version */ + printf (id, VERSION); + return 0; + case 'p': /* positive charge */ + charge = +1.0; + continue; + case 's': /* simple output dle*/ + verbose = FALSE; + continue; + case 'n': /* negative carge */ + charge = -1.0; + continue; + case 'g': /* apply 7GR dle */ + applygr = false; + continue; + case 't': /* tolerance */ + strcpy(buf, optarg); + sscanf(buf, "%lf", &tol); + continue; + case 'a': /* num of adducts dle */ + strcpy(buf, optarg); + sscanf(buf, "%lf", &nadd); + continue; + case 'm': /* single mass */ + strcpy(buf, optarg); + sscanf(buf, "%lf", &mz); + single = TRUE; + continue; + case 'c': /* comment for single mass */ + strcpy(buf, optarg); + sscanf(buf, "%lf", &charge); + continue; + continue; + case 'C': /* C12 */ + case 'H': /* 1H */ + case 'N': /* 14N */ + case 'M': /* 15N */ + case 'O': /* 16O */ + case 'D': /* 2H */ + case '1': /* 13C */ + case 'A': /* Na ('N' is taken for Nitrogen!) */ + case 'S': /* 32S */ + case 'F': /* 19F */ + case 'L': /* 35Cl ('C' is taken!) */ + case 'E': /* 37Cl (chloridE!) */ + case 'B': /* 79Br */ + case 'K': /* 39K */ + case 'J': /* 41K */ + case 'G': /* 81Br */ + case 'P': /* 31P */ + case 'I': /* 28Si ('S' is taken!) */ + i = 0; + /* compare keys until found */ + while ((i < nr_el) && (el[i].key != tmp)) + i++; + strcpy(buf, optarg); + sscanf(buf, "%d-%d", &el[i].min, &el[i].max); /* copy over */ + if (el[i].min > el[i].max) /* swap them */ + { + tmp = el[i].min; + el[i].min = el[i].max; + el[i].max = tmp; + } + + /* printf ("\n %c = %c ... %s (%d-%d)", \ + tmp, el[i].key, el[i].sym, el[i].min, el[i].max); */ + continue; + case '~': /* invalid arg */ + default: + printf ("'%s -h' for help.\n", argv[0]); + return 1; + } + +if (argv[optind] != NULL) /* remaining parameter on cmd line? */ + /* must be a file -- treat it line by line */ + return (readfile (argv[optind])); + +if (single == TRUE) /* only one calculation requested? */ + do_calculations(mz, tol); /* do it, then exit ... */ +else + { /* otherwise run a loop */ + while (input(comment, &mz)) + { + tmp = do_calculations(mz, tol); + printf("\n"); + } + } + +return 0; +} + + +/*************************************************************************** +* INPUT: reads a dataset in "dialog mode". * +* Input: Pointer to comment text, pointer to mass. * +* Returns: Result of sscanf(), 0 if prog is to be finished. * +* Note: This is a fairly primitive way to do it, but it works ;-) * +****************************************************************************/ +int input(char *txt, double *zahl) +{ +char buf[MAXLEN]; /* input line */ + +*zahl = 0.0; /* reset */ + +printf("Comment : "); /* display prompt */ +fgets(buf, MAXLEN-1, stdin); /* read line */ +buf[MAXLEN] = 0x0; /* terminate string */ +clean (buf); /* remove linefeed */ +strcpy(txt, buf); /* copy text over */ + +printf("Mass (ENTER to quit): "); /* display prompt */ +fgets(buf, MAXLEN-1, stdin); /* read line */ +buf[MAXLEN] = 0x0; /* terminate string */ +if (!clean (buf)) /* only a CR ? --> quit */ + return 0; +sscanf(buf,"%lf", zahl); /* scan string */ + +return 1; +} + + +/*************************************************************************** +* READFILE: reads dataset from file. * +* Input: Pointer to comment text, pointer to mass. * +* Returns: 0 if OK, 1 if error. * +****************************************************************************/ +int readfile(char *whatfile) +{ +double mz; /* measured mass */ +char buf[MAXLEN]; /* input line */ +FILE *infile; + +infile = fopen(whatfile, "r"); +if (NULL == infile) + { + fprintf (stderr, "Error: Cannot open %s.", whatfile); + return 1; + } + +while (fgets(buf, MAXLEN-1, infile)) + { + buf[MAXLEN] = 0x0; /* terminate string */ + if (*buf == ';') /* comment line */ + continue; + if (!clean (buf)) /* only a CR ? --> quit */ + return 0; + sscanf(buf,"%s %lf", comment, &mz); /* scan string */ + do_calculations(mz, tol); + mz = 0.0; /* reset */ + } +return 0; +} + + +/************************************************************************ +* CALC_MASS: Calculates mass of an ion from its composition. * +* Input: nothing (uses global variables) * +* Returns. mass of the ion. * +* Note: Takes care of charge and electron mass! * +* (Positive charge means removal of electrons). * +*************************************************************************/ +double calc_mass(void) +{ +int i; +double sum = 0.0; + +for (i=0; i < nr_el; i++) + sum += el[i].mass * el[i].cnt; + +return (sum - (charge * electron)); +} + + +/************************************************************************ +* CALC_RDB: Calculates rings & double bond equivalents. * +* Input: nothing (uses global variables) * +* Returns. RDB. * +*************************************************************************/ +float calc_rdb(void) +{ +int i; +float sum = 2.0; + +for (i=0; i < nr_el; i++) + sum += el[i].val * el[i].cnt; + +return (sum/2.0); +} +/************************************************************************ +* Calculates element ratios , CH2 (more than 8 electrons needed is not handled) +* Calculations element probabilities if element_probability = true +* Input: nothing (uses global variables) +* Returns. true/false. +*************************************************************************/ +bool calc_element_ratios(bool element_probability) +{ +bool CHNOPS_ok; +float HC_ratio; +float NC_ratio; +float OC_ratio; +float PC_ratio; +float SC_ratio; + +/* added the number of isotopes to the calculation - dle*/ +float C_count = (float)el[0].cnt+(float)el[1].cnt; // C_count=12C+13C +float H_count = (float)el[2].cnt+(float)el[3].cnt; +float N_count = (float)el[4].cnt+(float)el[5].cnt; +/* modif end here */ +float O_count = (float)el[6].cnt; +float P_count = (float)el[10].cnt; +float S_count = (float)el[11].cnt; + + + /* ELEMENT RATIOS allowed + MIN MAX (99.99%) + H/C 0.07 6.00 + N/C 0.00 4.00 + O/C 0.00 3.00 + P/C 0.00 2.00 + S/C 0.00 6.00 + */ + + // set CHNOPS_ok = true and assume all ratios are ok + CHNOPS_ok = true; + /*element_probability = false; */ + + + if (C_count && H_count >0) // C and H must have one count anyway (remove for non-organics// + { + HC_ratio = H_count/C_count; + if (element_probability) + { + if ((HC_ratio < 0.2) || (HC_ratio > 3.0)) // this is the H/C probability check ; + CHNOPS_ok = false; + } + else if (HC_ratio > 6.0) // this is the normal H/C ratio check - type cast from int to float is important + CHNOPS_ok = false; + } + + if (N_count >0) // if positive number of nitrogens then thes N/C ratio else just calc normal + { + NC_ratio = N_count/C_count; + if (element_probability) + { + if (NC_ratio > 2.0) // this is the N/C probability check ; + CHNOPS_ok = false; + } + else if (NC_ratio > 4.0) + CHNOPS_ok = false; + } + + if (O_count >0) // if positive number of O then thes O/C ratio else just calc normal + { + OC_ratio = O_count/C_count; + if (element_probability) + { + if (OC_ratio > 1.2) // this is the O/C probability check ; + CHNOPS_ok = false; + } + else if (OC_ratio > 3.0) + CHNOPS_ok = false; + } + + if (P_count >0) // if positive number of P then thes P/C ratio else just calc normal + { + PC_ratio = P_count/C_count; + if (element_probability) + { + if (PC_ratio > 0.32) // this is the P/C probability check ; + CHNOPS_ok = false; + + } + else if (PC_ratio > 6.0) + CHNOPS_ok = false; + } + + if (S_count >0) // if positive number of S then thes S/C ratio else just calc normal + { + SC_ratio = S_count/C_count; + if (element_probability) + { + if (SC_ratio > 0.65) // this is the S/C probability check ; + CHNOPS_ok = false; + } + else if (SC_ratio > 2.0) + CHNOPS_ok = false; + } + +//----------------------------------------------------------------------------- + + // check for multiple element ratios together with probability check + //if N<10, O<20, P<4, S<3 then true + if (element_probability && (N_count > 10) && (O_count > 20) && (P_count > 4) && (S_count > 1)) + CHNOPS_ok = false; + + // NOP check for multiple element ratios together with probability check + // NOP all > 3 and (N<11, O <22, P<6 then true) + if (element_probability && (N_count > 3) && (O_count > 3) && (P_count > 3)) + { + if (element_probability && (N_count > 11) && (O_count > 22) && (P_count > 6)) + CHNOPS_ok = false; + } + + // OPS check for multiple element ratios together with probability check + // O<14, P<3, S<3 then true + if (element_probability && (O_count > 14) && (P_count > 3) && (S_count > 3)) + CHNOPS_ok = false; + + // PSN check for multiple element ratios together with probability check + // P<3, S<3, N<4 then true + if (element_probability && (P_count > 3) && (S_count > 3) && (N_count >4)) + CHNOPS_ok = false; + + + // NOS check for multiple element ratios together with probability check + // NOS all > 6 and (N<19 O<14 S<8 then true) + if (element_probability && (N_count >6) && (O_count >6) && (S_count >6)) + { + if (element_probability && (N_count >19) && (O_count >14) && (S_count >8)) + CHNOPS_ok = false; + } + + // function return value; + if (CHNOPS_ok == true) + return true; + else + return false; +} + +/************************************************************************ +* DO_CALCULATIONS: Does the actual calculation loop. * +* Input: measured mass (in amu), tolerance (in mmu) * +* Returns. number of hits. * +*************************************************************************/ +long do_calculations (double measured_mass, double tolerance) +{ +time_t start, finish; +double elapsed_time; + +double mass; /* calc'd mass */ +double limit_lo, limit_hi; /* mass limits */ +float rdb, lewis, rdbori; /* Rings & double bonds */ +long i,j; +long long hit; /* counts the hits, with long declaration, overflow after 25h with all formulas < 2000 Da + long = FFFFFFFFh = 4,294,967,295d*/ +int niso; +long long counter; +bool elementcheck; +bool set_break; + + +time( &start ); // start time + +/* calculate limits */ +/* correct m/z value 'measured_mass' for z>1 using 'abscharge'; +keep charge = 0 for neutral mass searches (z=0); +additionally, keep the idea of 'adducts' for 'rdb', but use charge state instead -meb */ +if (charge<0) { + abscharge = -charge; + nadd = abscharge; +} else if (charge>0) { + abscharge = charge; + nadd = abscharge; +} else if (charge==0) { + abscharge = 1; /* for limits */ + nadd = 0; /* for rdb */ +} + +limit_lo = (measured_mass * abscharge) - (tolerance / 1000.0); +limit_hi = (measured_mass * abscharge) + (tolerance / 1000.0); + +if(verbose) /* extended output as in original code - dle */ +{ + if (strlen(comment)) /* print only if there is some text to print */ + printf ("Text \t%s\n", comment); + + printf("\n"); /* linefeed */ + printf ("Composition = \t"); + for (i=0; i < nr_el; i++) + if (el[i].max > 0) + printf("%s:%d-%d ", el[i].sym, el[i].min, el[i].max); + printf ("\n"); + + printf ("Mass Tolerance [mmu]: \t%.1f\n",tolerance); + printf ("Measured Mass: \t%.6lf\n", measured_mass); + printf ("Charge: \t%+.0lf\n", charge); /* dle */ + printf ("Num. of Adducts/Losses: \t%.0lf\n", nadd); /* dle */ + printf ("Apply 7GR: \t%d\n\n", applygr); /* dle */ +} + +hit = 0; /* Reset counter */ +counter = 0; +set_break = false; /* set breaker for element counts to false */ + +/* Now let's run the big big loop ... I'd like to do that + recursively but did not yet figure out how ;-) + TK Adds: the loop is just fine. +*/ + +/* now comes the "COOL trick" for calculating all formulae: +sorting the high mass elements to the outer loops, the small weights (H) +to the inner loops; + +This will reduce the computational time by factor ~10-60-1000 +OLD HR: Cangrelor at 1ppm 4465 formulas found in 5866 seconds. +NEW HR2: Cangrelor at 1ppm 4465 formulas found in 96 seconds. +NEW2 HR2: Cangrelor at 1ppm 4465 formulas found in 60 seconds. +NEW3 HR2: Cangrelor at 1ppm 4465 formulas found in 59 seconds. +HR2 Fast: Cangrelor at 1ppm 4465 formulas found in 41 seconds by evaluating 2,003,436,894 formulae. +hr2 -c "Cangrelor" -m 774.948 -t 0.77 -C 1-64 -H 1-112 -N 0-30 -O 0-80 -P 0-12 -S 0-9 -F 0-10 -L 0-10 + +Another additional trick is to end the 2nd.. 3rd.. 4th.. xth innermost loop +to prevent loops which are just higher and higher in mass. +*/ + + /*dle: process new elements */ +el[17].cnt = el[17].min - 1; el[17].save = el[17].cnt; +while (el[17].cnt++ < el[17].max) /*"iK"*/{ + +el[16].cnt = el[16].min - 1; el[16].save = el[16].cnt; +while (el[16].cnt++ < el[16].max) /*"K"*/{ + +el[15].cnt = el[15].min - 1; el[15].save = el[15].cnt; +while (el[15].cnt++ < el[15].max) /* "iBr"*/ { + +el[14].cnt = el[14].min - 1; el[14].save = el[14].cnt; +while (el[14].cnt++ < el[14].max) /* "Br"*/ { + +el[13].cnt = el[13].min - 1; el[13].save = el[13].cnt; +while (el[13].cnt++ < el[13].max) /*"iCl"*/ { + +el[12].cnt = el[12].min - 1; el[12].save = el[12].cnt; +while (el[12].cnt++ < el[12].max) /*"Cl"*/ { + +el[11].cnt = el[11].min - 1; el[11].save = el[11].cnt; +while (el[11].cnt++ < el[11].max) /*"S"*/ { + +el[10].cnt = el[10].min - 1; el[10].save = el[10].cnt; +while (el[10].cnt++ < el[10].max) /*"P"*/ { + +el[9].cnt = el[9].min - 1; el[9].save = el[9].cnt; +while (el[9].cnt++ < el[9].max) /*"Si"*/ { + +el[8].cnt = el[8].min - 1; el[8].save = el[8].cnt; +while (el[8].cnt++ < el[8].max) /*"Na"*/{ + +el[7].cnt = el[7].min - 1; el[7].save = el[7].cnt; +while (el[7].cnt++ < el[7].max) /*"F"*/ { + +el[6].cnt = el[6].min - 1; el[6].save = el[6].cnt; +while (el[6].cnt++ < el[6].max) /*"O"*/ { + +el[5].cnt = el[5].min - 1; el[5].save = el[5].cnt; +while (el[5].cnt++ < el[5].max) /*"15N"*/{ + +el[4].cnt = el[4].min - 1; el[4].save = el[4].cnt; +while (el[4].cnt++ < el[4].max) /*"N"*/{ + +el[1].cnt = el[1].min - 1; el[1].save = el[1].cnt; +while (el[1].cnt++ < el[1].max) /*"13C"*/ { + +el[0].cnt = el[0].min - 1; el[0].save = el[0].cnt; +while (el[0].cnt++ < el[0].max) /* "C"*/ { + +el[3].cnt = el[3].min - 1; el[3].save = el[3].cnt; +while (el[3].cnt++ < el[3].max) /*"D"*/{ + +el[2].cnt = el[2].min - 1; el[2].save = el[2].cnt; +while (el[2].cnt++ < el[2].max) /*"H"*/{ + + mass = calc_mass(); + counter++; + + //just for debug purposes + //if (mass > limit_hi) + //printf("mass: %f\tC: %d H: %d N: %d O: %d P: %d S: %d Cl: %d Br: %d\n",mass,el[0].cnt,el[2].cnt,el[4].cnt,el[6].cnt,el[10].cnt,el[11].cnt,el[12].cnt,el[13].cnt); + + /* if we exceed the upper limit, we can stop the calculation + for this particular element (JHa 20050227). <-- comment TK that will only bust the innermost while loop, which is "H"*/ + + // break H loop + if (mass > limit_hi) break; + + //************************************************************************************************************/ + //Calculus loop with print out + //************************************************************************************************************/ + + if ((mass >= limit_lo) && (mass <= limit_hi)) /* within limits? */ + { + // element check will be performed always, if variable bool element_probability is true also probabilities will be calculated + // not an elegant implementation, but fast. + + elementcheck = calc_element_ratios(applygr); /* pass applygr boolean by dle */ + + if (elementcheck) + { + rdbori = calc_rdb(); /* get RDB */ + + rdb = rdbori + 0.5*nadd; /* dle: if nadd addcuts */ + lewis = (float)(fmod(rdb, 1)); /*calc reminder*/ + if ((rdb >= 0) && (lewis != 0.5) && (lewis !=-0.5))/* less than -0.5 RDB does not make sense */ + { /* NO(!) CH3F10NS2 exists , RDB = -4.0 M= 282.9547*/ + hit ++; + for (i = 0; i < nr_el; i++) /* print composition */ + if (el[i].cnt > 0) /* but only if useful */ + printf("%s%d.", el[i].sym, el[i].cnt); // print formula + + printf("\t"); + /* dle: print out a more explicit molecular formula for further processing and + variable niso counts number of isotope elements in the solution */ + niso=0; + for (i = 0; i < nr_el; i++) + { /* print composition */ + if (el[i].cnt > 0) + { /* but only if useful */ + printf("%s%d", el[i].symi, el[i].cnt); // print formula + if (el[i].symi != el[i].sym) + niso=niso+el[i].cnt; + } + + } + /* dle: end of molecular print out */ + + /* dle: additionnaly print nadd and niso */ + printf("\t%.2f\t%.7lf\t%.0lf\t%d\t%+.2lf\n", rdb, mass, nadd,niso, 1000.0 * ((measured_mass * abscharge) - mass)); + + } /* end of 'rdb' loop */ + + } // end of elementcheck loop + + } /* end of 'limit' loop */ + //************************************************************************************************************/ + + + /* + TK: if the current mass is larger than the limit the loop can be exited. + Each element must point to the element which is in use and before. + This is a static implementation which can be enhanced with a pointer chain to the lower element. + Actually now its only allowed for CHNSOP-Fl-Cl-Br-Si !!! Brute-force <> elegance :-) + */ + } /*"H"*/ + + if ((mass >= limit_lo) && (el[2].save == el[2].cnt-1)) break; + } /*"D"*/ + + if ((mass >= limit_lo) && (el[3].save == el[3].cnt-1)) break; /* dle addons */ + } /* "C"*/ + + if ((mass >= limit_lo) && (el[0].save == el[0].cnt-1)) break; + } /*"13C"*/ + + if ((mass >= limit_lo) && (el[1].save == el[1].cnt-1)) break; /* dle addons */ + } /*"N"*/ + + if ((mass >= limit_lo) && (el[4].save == el[4].cnt-1)) break; + } /*"15N"*/ + + if ((mass >= limit_lo) && (el[5].save == el[5].cnt-1)) break; /* dle addons */ + } /*"O"*/ + + if ((mass >= limit_lo) && (el[6].save == el[6].cnt-1)) break; + } /*"F"*/ + + if ((mass >= limit_lo) && (el[7].save == el[7].cnt-1)) break; + } /*"Na"*/ + + if ((mass >= limit_lo) && (el[8].save == el[8].cnt-1)) break; + } /*"Si"*/ + + if ((mass >= limit_lo) && (el[9].save == el[9].cnt-1)) break; + } /*"P"*/ + + if ((mass >= limit_lo) && (el[10].save == el[10].cnt-1)) break; + } /*"S"*/ + + if ((mass >= limit_lo) && (el[11].save == el[11].cnt-1)) break; + } /*"Cl"*/ + + if ((mass >= limit_lo) && (el[12].save == el[12].cnt-1)) break; + } /*"iCl"*/ + + if ((mass >= limit_lo) && (el[13].save == el[13].cnt-1)) break; /* dle addons */ + } /*"Br"*/ + + if ((mass >= limit_lo) && (el[14].save == el[14].cnt-1)) break; + } /*"iBr"*/ + + if ((mass >= limit_lo) && (el[15].save == el[15].cnt-1)) break; /* dle addons */ + } /*"K"*/ + + if ((mass >= limit_lo) && (el[16].save == el[16].cnt-1)) break; /* dle addons */ + } /*"iK"*/ + +/* close that giant loop thing started above */ + + + + +time(&finish); // stop timer +elapsed_time = difftime(finish , start); // calulate time difference + +if(verbose){ + if (!hit) + printf("No matching combination found in %6.0f seconds.\n", elapsed_time ); + else{ + printf("\n%llu formulas found in %6.0f seconds by evaluating %llu formulae.\n",hit,elapsed_time,counter); + printf("RDBs are overloaded to maximum valence values (N=3,P=5,S=6).\n"); + } +} +return hit; +} + + +/************************************************************************ +* CLEAN: "cleans" a buffer obtained by fgets() * +* Input: Pointer to text buffer * +* Returns: strlen of buffer. * +*************************************************************************/ +int clean (char *buf) +{ +int i; + +for(i = 0; i < strlen(buf); i++) /* search for CR/LF */ + { + if(buf[i] == '\n' || buf[i] == '\r') + { + buf[i] = 0; /* stop at CR or LF */ + break; + } + } +return (strlen(buf)); +} + + + +/*************************************************************************** +* GETOPT: Command line parser, system V style. +* +* This routine is widely (and wildly) adapted from code that was +* made available by Borland International Inc. +* +* Standard option syntax is: +* +* option ::= SW [optLetter]* [argLetter space* argument] +* +* where +* - SW is '-' +* - there is no space before any optLetter or argLetter. +* - opt/arg letters are alphabetic, not punctuation characters. +* - optLetters, if present, must be matched in optionS. +* - argLetters, if present, are found in optionS followed by ':'. +* - argument is any white-space delimited string. Note that it +* can include the SW character. +* - upper and lower case letters are distinct. +* +* There may be multiple option clusters on a command line, each +* beginning with a SW, but all must appear before any non-option +* arguments (arguments not introduced by SW). Opt/arg letters may +* be repeated: it is up to the caller to decide if that is an error. +* +* The character SW appearing alone as the last argument is an error. +* The lead-in sequence SWSW ("--") causes itself and all the rest +* of the line to be ignored (allowing non-options which begin +* with the switch char). +* +* The string *optionS allows valid opt/arg letters to be recognized. +* argLetters are followed with ':'. Getopt () returns the value of +* the option character found, or EOF if no more options are in the +* command line. If option is an argLetter then the global optarg is +* set to point to the argument string (having skipped any white-space). +* +* The global optind is initially 1 and is always left as the index +* of the next argument of argv[] which getopt has not taken. Note +* that if "--" or "//" are used then optind is stepped to the next +* argument before getopt() returns EOF. +* +* If an error occurs, that is an SW char precedes an unknown letter, +* then getopt() will return a '~' character and normally prints an +* error message via perror(). If the global variable opterr is set +* to false (zero) before calling getopt() then the error message is +* not printed. +* +* For example, if +* +* *optionS == "A:F:PuU:wXZ:" +* +* then 'P', 'u', 'w', and 'X' are option letters and 'A', 'F', +* 'U', 'Z' are followed by arguments. A valid command line may be: +* +* aCommand -uPFPi -X -A L someFile +* +* where: +* - 'u' and 'P' will be returned as isolated option letters. +* - 'F' will return with "Pi" as its argument string. +* - 'X' is an isolated option. +* - 'A' will return with "L" as its argument. +* - "someFile" is not an option, and terminates getOpt. The +* caller may collect remaining arguments using argv pointers. +***************************************************************************/ +int getopt(int argc, char *argv[], char *optionS) +{ +static char *letP = NULL; /* remember next option char's location */ +static char SW = '-'; /* switch character */ + +int opterr = 1; /* allow error message */ +unsigned char ch; +char *optP; + +if (argc > optind) + { + if (letP == NULL) + { + if ((letP = argv[optind]) == NULL || *(letP++) != SW) + goto gopEOF; + + if (*letP == SW) + { + optind++; + goto gopEOF; + } + } + if (0 == (ch = *(letP++))) + { + optind++; + goto gopEOF; + } + if (':' == ch || (optP = strchr(optionS, ch)) == NULL) + goto gopError; + if (':' == *(++optP)) + { + optind++; + if (0 == *letP) + { + if (argc <= optind) + goto gopError; + letP = argv[optind++]; + } + optarg = letP; + letP = NULL; + } + else + { + if (0 == *letP) + { + optind++; + letP = NULL; + } + optarg = NULL; + } + return ch; +} + +gopEOF: + optarg = letP = NULL; + return EOF; + +gopError: + optarg = NULL; + errno = EINVAL; + if (opterr) + perror ("Command line option"); + return ('~'); +} + +/* +List of elements sorted according to mass +_______________________ +INo# El Mass +2 H 1.007825032 +3 D 2.014101778 +0 C 12 +1 13C 13.00335484 +4 N 14.00307401 +5 15N 15.0001089 +6 O 15.99491462 +7 F 18.9984032 +8 Na 22.98976967 +9 Si 27.97692653 +10 P 30.97376151 +11 S 31.97207069 +12 Cl 34.96885271 +13 Br 78.9183376 +------------------------ +*/
