comparison lib/HR2_v1.04.cpp @ 0:86296c048e46 draft

Init repository for [hr2]
author fgiacomoni
date Wed, 05 Jun 2019 09:40:20 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:86296c048e46
1 /*
2
3 HR2.C
4 V1.04
5 (Changes: meb)
6
7 A program to calculate elemental compositions for a given mass.
8 See the file README for details.
9
10 --------------------------------------------------------------------
11 Copyright (c) 2001...2005 Joerg Hau <joerg.hau(at)dplanet.ch>.
12
13 mail: joerg.hau@dplanet.ch
14 www: http://www.mysunrise.ch/users/joerg.hau/
15
16 *changed version by Tobias Kind (TK), 2006 , Fiehnlab,
17 *added extended valencies, added implementation of
18 seven golden rules of molecular formula filtering
19
20
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of version 2 of the GNU General Public
23 License as published by the Free Software Foundation. See the
24 file LICENSE for details.
25
26 This program is distributed in the hope that it will be useful,
27 but WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 GNU General Public License for more details.
30 --------------------------------------------------------------------
31
32 Creation: somewhere in 1992 by JHa.
33 Revision: 2001-04-18, GPL'd, first public release (JHa)
34 2001-04-21, improved help text (JHa)
35 2002-06-27, added sodium (JHa)
36 2002-10-09, added 15N (JHa)
37 2005-02-25, added -v option; license now GPL *v2* (JHa)
38 2005-02-27, optimised code in calc loop (JHa)
39 2005-02-28, verified and updated atomic masses (JHa)
40 2005-06-17, added GPL text when "-h" is used (JHa)
41 2006-01-01, extended version for BMC Bioinformatics publication - HR2 (TK)
42 2006-03-03, added element ratio checks, extended valencies, only even electrons - HR2 (TK)
43 2006-09-09, 1000x-10000x speedup hand optimized hehe. - HR2 (TK)
44 -->special version for CHNSOP-F-Cl-Br-Si
45 2009-05-28, David Enot introduced the concept of 'Adducts' (nadd) for MZedDB and corrected
46 some inaccuracies in April 2008. Manfred Beckmann has now corrected the use of
47 'nadd' and 'charge' by calculating 'nadd' from 'charge' using abscharge = abs(charge)
48 (did it manually because I couldn't find 'abs') to correct 'measured_mass' and limits,
49 but keeping nadd = 0 for rdb calculation of neutral MW (MB)
50 2014-08-29, Marion Landi corrected the mass of atoms.
51
52 This is ANSI C and should compile with any C compiler; use
53 something along the lines of "gcc -Wall -O3 -o hr hr.c".
54 "g++ -O2 -o myhr HR2.cpp" on a Mac OS X G5 proc
55 Optimize for speed, you may gain factor 3!
56 NOW compiled under Visual C++ Express (faster than GCC) in C++ mode for boolean type.
57
58
59 ---------------------------------------------------------------------
60 Example arguments:
61 1) -m 1 -t 100000 -C 1-100 -H 1-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10
62 2) -m 500 -t 1 -C 50-100 -H 10-220 -N 0-10 -O 0-10 -P 0-10 -S 0-10 -L 0-10 -B 0-10
63 3) hr2-all-res -c "Hexaflumuron_459.982882Da_3ppm" -m 459.982882 -t 1.37995 -C 0-39 -H 0-98 -N 0-34 -O 0-30 -P 0-12 -S 0-12 -F 0-12 -L 0-14 -B 0-7 -I 0-0
64 945 formulas found in 253 seconds. (now 4 seconds, before eternal)
65 4) hr2 -m 459.982882 -t 1.37995 -C 10-39 -H 28-98 -N 4-34 -O 0-30 -P 1-12 -S 1-12 -F 0-12 -L 1-14 -B 2-6 -I 0-0
66 1 formula in 0 seconds (former eternal time)
67 */
68
69 #include <stdio.h>
70 #include <string.h>
71 #include <errno.h>
72 #include <time.h>
73
74 /* Uncomment this for windows
75 #include <windows.h>
76 #include <process.h>
77 */
78 #include <math.h>
79
80
81 #define VERSION "20050617" /* String ! */
82 #define TRUE 1
83 #define FALSE 0
84 #define MAXLEN 181 /* max. length of input string */
85
86 #define _CRT_SECURE_NO_DEPRECATE 1
87
88 typedef struct {
89 const char *sym; /* symbol */
90 const char *symi; /* non iso symbol */
91 const double mass; /* accurate mass */
92 const float val; /* to calculate unsaturations */
93 const int key; /* used for decoding cmd line */
94 int min, /* atom count min */
95 max, /* atom count max */
96 cnt, /* atom count actual */
97 save; /* atom count old - for loop exiting*/
98 } Element;
99
100
101 /* --- atomic masses as published by IUPAC, 2002-10-02 ---------- */
102
103 Element el[]=
104 /* array of the elements used here:
105 Symbol, exact mass, dbe, keycode, number, default-min, default-max
106 dle: isoptopes are given by iC for 13C, iK for 41K etc....
107 */
108 {
109 { "C", "C", 12.000000000, +2.0, 'C', 0, 100, 0,0 },
110 { "iC", "C", 13.0033548378, +2.0, '1', 0, 0, 0 ,0 },
111 { "H", "H", 1.0078250321, -1.0, 'H', 0, 200, 0 ,0},
112 { "iH", "H", 2.0141017780, -1.0, 'D', 0, 0, 0 ,0},
113 { "N", "N", 14.0030740052, +1.0, 'N', 0, 40, 0,0 }, //org +1 = valence = 3: now +3 for valence = 5
114 { "iN", "N", 15.0001088984, +1.0, 'M', 0, 0, 0,0 },
115 { "O", "O", 15.9949146221, 0.0, 'O', 0, 70, 0 ,0},
116 { "F", "F", 18.99840322, -1.0, 'F', 0, 0, 0 ,0},
117 { "Na", "Na", 22.98976928, -1.0, 'A', 0, 0, 0 ,0},
118 { "Si", "Si", 27.9769265327, +2.0, 'I', 0, 0, 0 ,0},
119 { "P", "P", 30.97376163, +3.0, 'P', 0, 10, 0 ,0}, //org +1 valence = 3: now +3 for valence = 5
120 { "S", "S", 31.97207069, +4.0, 'S', 0, 0, 0 ,0}, //org 0 = valence = 2; now +4 for valence = 6
121 { "Cl", "Cl", 34.96885268, -1.0, 'L', 0, 0, 0 ,0},
122 { "iCl", "Cl", 36.96590259, -1.0, 'E', 0, 0, 0 ,0}, // added dle
123 { "Br", "Br", 78.9183371, -1.0, 'B', 0, 0, 0 ,0},
124 { "iBr", "Br", 80.9162906, -1.0, 'G', 0, 0, 0 ,0}, // added dle
125 { "K", "K", 38.96370668, -1.0, 'K', 0, 0, 0 ,0}, // added dle
126 { "iK", "K", 40.96182576, -1.0, 'J', 0, 0, 0 ,0}, // added dle
127 };
128
129 const double electron = 0.0005486;
130
131
132 /* --- global variables --- */
133
134 double charge, /* charge on the molecule */
135 tol, /* mass tolerance in mmu */
136 abscharge, /* abs(charge): to calculate 'measured_mass' and limits - meb */
137 nadd; /* nadd now calculated as abs(charge) - meb */
138 char comment[MAXLEN]=""; /* some text ;-) */
139 int single; /* flag to indicate if we calculate only once and exit */
140 int nr_el; /* number of elements in array (above) */
141 bool verbose;
142 bool applygr;
143
144 /* --- some variables needed for reading the cmd line --- */
145
146 char *optarg; /* global: pointer to argument of current option */
147 static int optind = 1; /* global: index of which argument is next. Is used
148 as a global variable for collection of further
149 arguments (= not options) via argv pointers. */
150
151
152 /* --- function prototypes ------------------- */
153
154 int input(char *text, double *zahl);
155 int readfile(char *whatfile);
156 double calc_mass(void);
157 float calc_rdb(void);
158 long do_calculations(double mass, double tolerance);
159 int clean (char *buf);
160 int getopt(int argc, char *argv[], char *optionS);
161 /* you have to compile with C++ or define yourself this bool type (C99 compiler definition) */
162 bool calc_element_ratios(bool element_probability);
163
164 /* --- threading ------------------- */
165 /* mass and RDB calculation could be in several other threads
166 however the loop is very fast, context switching takes longer and
167 slows down the whole process. Single-Thread multiprocessor (cluster) implementation
168 seems superior here. */
169 /* Uncomment for windows
170 HANDLE hThread2,hThread3;
171 unsigned threadID2,threadID3;
172 */
173
174
175 /* --- main --- */
176
177 int main (int argc, char **argv)
178 {
179 double mz; /* mass */
180 char buf[MAXLEN];
181 int i, tmp;
182
183 static char *id =
184 "hr version %s. Copyright (C) by Joerg Hau 2001...2005 & Tobias Kind 2006 :-).\n";
185
186 static char *disclaimer =
187 "\nThis program is free software; you can redistribute it and/or modify it under\n"
188 "the terms of version 2 of the GNU General Public License as published by the\n"
189 "Free Software Foundation.\n\n"
190 "This program is distributed in the hope that it will be useful, but WITHOUT ANY\n"
191 "WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\n"
192 "PARTICULAR PURPOSE. See the GNU General Public License for details.\n";
193
194 static char *msg =
195 "Calculates possible elemental compositions for a given mass.\n\n"
196 "usage: hr [options] file\n\nValid command line options are:\n"
197 "-h This Help screen.\n"
198 "-v Display version information.\n"
199 "-t tol Set tolerance to 'tol' mmu (default 5).\n"
200 "-m mz Set mass to 'mz'.\n"
201 "-s Print only the results (dle)"
202 "-c Number of charges i.e -c 1 is equivalent to -p (dle).\n"
203 "-p Positive ions; electron mass is removed from the formula.\n"
204 "-n Negative ions; electron mass is added to the formula.\n"
205 "-g Does not apply 4-7 golden rules (dle).\n"
206 "-a Maximum num. of adducts. (dle)\n"
207 "-X a-b For element X, use atom range a to b. List of valid atoms:\n\n"
208 " X key mass (6 decimals shown)\n"
209 " -------------------------------------\n";
210
211 /* initialise variables */
212
213 nadd = 0.0; /* added dle: number of adducts*/
214 single = FALSE; /* run continuously */
215 charge = 0.0; /* default charge is neutral */
216 tol = 1.0; /* default tolerance in mmu */
217 nr_el = sizeof(el)/sizeof(el[0]); /* calculate array size */
218 verbose = TRUE; /* added dle: should everything printed out?*/
219 applygr = TRUE; /* added dle: should rules 4-7 applied?*/
220
221
222 /* decode and read the command line */
223
224 while ((tmp = getopt(argc, argv, "hvpnsgt:m:c:a:C:H:N:M:O:D:1:S:F:L:B:P:I:A:K:E:G:J:")) != EOF)
225 switch (tmp)
226 {
227 case 'h': /* help me */
228 printf (id, VERSION);
229 printf (msg);
230 for (i=0; i < nr_el; i++)
231 printf (" %4s -%c %15.6lf\n",
232 el[i].sym, el[i].key, el[i].mass);
233 printf(disclaimer, "\n");
234 return 0;
235 case 'v': /* version */
236 printf (id, VERSION);
237 return 0;
238 case 'p': /* positive charge */
239 charge = +1.0;
240 continue;
241 case 's': /* simple output dle*/
242 verbose = FALSE;
243 continue;
244 case 'n': /* negative carge */
245 charge = -1.0;
246 continue;
247 case 'g': /* apply 7GR dle */
248 applygr = false;
249 continue;
250 case 't': /* tolerance */
251 strcpy(buf, optarg);
252 sscanf(buf, "%lf", &tol);
253 continue;
254 case 'a': /* num of adducts dle */
255 strcpy(buf, optarg);
256 sscanf(buf, "%lf", &nadd);
257 continue;
258 case 'm': /* single mass */
259 strcpy(buf, optarg);
260 sscanf(buf, "%lf", &mz);
261 single = TRUE;
262 continue;
263 case 'c': /* comment for single mass */
264 strcpy(buf, optarg);
265 sscanf(buf, "%lf", &charge);
266 continue;
267 continue;
268 case 'C': /* C12 */
269 case 'H': /* 1H */
270 case 'N': /* 14N */
271 case 'M': /* 15N */
272 case 'O': /* 16O */
273 case 'D': /* 2H */
274 case '1': /* 13C */
275 case 'A': /* Na ('N' is taken for Nitrogen!) */
276 case 'S': /* 32S */
277 case 'F': /* 19F */
278 case 'L': /* 35Cl ('C' is taken!) */
279 case 'E': /* 37Cl (chloridE!) */
280 case 'B': /* 79Br */
281 case 'K': /* 39K */
282 case 'J': /* 41K */
283 case 'G': /* 81Br */
284 case 'P': /* 31P */
285 case 'I': /* 28Si ('S' is taken!) */
286 i = 0;
287 /* compare keys until found */
288 while ((i < nr_el) && (el[i].key != tmp))
289 i++;
290 strcpy(buf, optarg);
291 sscanf(buf, "%d-%d", &el[i].min, &el[i].max); /* copy over */
292 if (el[i].min > el[i].max) /* swap them */
293 {
294 tmp = el[i].min;
295 el[i].min = el[i].max;
296 el[i].max = tmp;
297 }
298
299 /* printf ("\n %c = %c ... %s (%d-%d)", \
300 tmp, el[i].key, el[i].sym, el[i].min, el[i].max); */
301 continue;
302 case '~': /* invalid arg */
303 default:
304 printf ("'%s -h' for help.\n", argv[0]);
305 return 1;
306 }
307
308 if (argv[optind] != NULL) /* remaining parameter on cmd line? */
309 /* must be a file -- treat it line by line */
310 return (readfile (argv[optind]));
311
312 if (single == TRUE) /* only one calculation requested? */
313 do_calculations(mz, tol); /* do it, then exit ... */
314 else
315 { /* otherwise run a loop */
316 while (input(comment, &mz))
317 {
318 tmp = do_calculations(mz, tol);
319 printf("\n");
320 }
321 }
322
323 return 0;
324 }
325
326
327 /***************************************************************************
328 * INPUT: reads a dataset in "dialog mode". *
329 * Input: Pointer to comment text, pointer to mass. *
330 * Returns: Result of sscanf(), 0 if prog is to be finished. *
331 * Note: This is a fairly primitive way to do it, but it works ;-) *
332 ****************************************************************************/
333 int input(char *txt, double *zahl)
334 {
335 char buf[MAXLEN]; /* input line */
336
337 *zahl = 0.0; /* reset */
338
339 printf("Comment : "); /* display prompt */
340 fgets(buf, MAXLEN-1, stdin); /* read line */
341 buf[MAXLEN] = 0x0; /* terminate string */
342 clean (buf); /* remove linefeed */
343 strcpy(txt, buf); /* copy text over */
344
345 printf("Mass (ENTER to quit): "); /* display prompt */
346 fgets(buf, MAXLEN-1, stdin); /* read line */
347 buf[MAXLEN] = 0x0; /* terminate string */
348 if (!clean (buf)) /* only a CR ? --> quit */
349 return 0;
350 sscanf(buf,"%lf", zahl); /* scan string */
351
352 return 1;
353 }
354
355
356 /***************************************************************************
357 * READFILE: reads dataset from file. *
358 * Input: Pointer to comment text, pointer to mass. *
359 * Returns: 0 if OK, 1 if error. *
360 ****************************************************************************/
361 int readfile(char *whatfile)
362 {
363 double mz; /* measured mass */
364 char buf[MAXLEN]; /* input line */
365 FILE *infile;
366
367 infile = fopen(whatfile, "r");
368 if (NULL == infile)
369 {
370 fprintf (stderr, "Error: Cannot open %s.", whatfile);
371 return 1;
372 }
373
374 while (fgets(buf, MAXLEN-1, infile))
375 {
376 buf[MAXLEN] = 0x0; /* terminate string */
377 if (*buf == ';') /* comment line */
378 continue;
379 if (!clean (buf)) /* only a CR ? --> quit */
380 return 0;
381 sscanf(buf,"%s %lf", comment, &mz); /* scan string */
382 do_calculations(mz, tol);
383 mz = 0.0; /* reset */
384 }
385 return 0;
386 }
387
388
389 /************************************************************************
390 * CALC_MASS: Calculates mass of an ion from its composition. *
391 * Input: nothing (uses global variables) *
392 * Returns. mass of the ion. *
393 * Note: Takes care of charge and electron mass! *
394 * (Positive charge means removal of electrons). *
395 *************************************************************************/
396 double calc_mass(void)
397 {
398 int i;
399 double sum = 0.0;
400
401 for (i=0; i < nr_el; i++)
402 sum += el[i].mass * el[i].cnt;
403
404 return (sum - (charge * electron));
405 }
406
407
408 /************************************************************************
409 * CALC_RDB: Calculates rings & double bond equivalents. *
410 * Input: nothing (uses global variables) *
411 * Returns. RDB. *
412 *************************************************************************/
413 float calc_rdb(void)
414 {
415 int i;
416 float sum = 2.0;
417
418 for (i=0; i < nr_el; i++)
419 sum += el[i].val * el[i].cnt;
420
421 return (sum/2.0);
422 }
423 /************************************************************************
424 * Calculates element ratios , CH2 (more than 8 electrons needed is not handled)
425 * Calculations element probabilities if element_probability = true
426 * Input: nothing (uses global variables)
427 * Returns. true/false.
428 *************************************************************************/
429 bool calc_element_ratios(bool element_probability)
430 {
431 bool CHNOPS_ok;
432 float HC_ratio;
433 float NC_ratio;
434 float OC_ratio;
435 float PC_ratio;
436 float SC_ratio;
437
438 /* added the number of isotopes to the calculation - dle*/
439 float C_count = (float)el[0].cnt+(float)el[1].cnt; // C_count=12C+13C
440 float H_count = (float)el[2].cnt+(float)el[3].cnt;
441 float N_count = (float)el[4].cnt+(float)el[5].cnt;
442 /* modif end here */
443 float O_count = (float)el[6].cnt;
444 float P_count = (float)el[10].cnt;
445 float S_count = (float)el[11].cnt;
446
447
448 /* ELEMENT RATIOS allowed
449 MIN MAX (99.99%)
450 H/C 0.07 6.00
451 N/C 0.00 4.00
452 O/C 0.00 3.00
453 P/C 0.00 2.00
454 S/C 0.00 6.00
455 */
456
457 // set CHNOPS_ok = true and assume all ratios are ok
458 CHNOPS_ok = true;
459 /*element_probability = false; */
460
461
462 if (C_count && H_count >0) // C and H must have one count anyway (remove for non-organics//
463 {
464 HC_ratio = H_count/C_count;
465 if (element_probability)
466 {
467 if ((HC_ratio < 0.2) || (HC_ratio > 3.0)) // this is the H/C probability check ;
468 CHNOPS_ok = false;
469 }
470 else if (HC_ratio > 6.0) // this is the normal H/C ratio check - type cast from int to float is important
471 CHNOPS_ok = false;
472 }
473
474 if (N_count >0) // if positive number of nitrogens then thes N/C ratio else just calc normal
475 {
476 NC_ratio = N_count/C_count;
477 if (element_probability)
478 {
479 if (NC_ratio > 2.0) // this is the N/C probability check ;
480 CHNOPS_ok = false;
481 }
482 else if (NC_ratio > 4.0)
483 CHNOPS_ok = false;
484 }
485
486 if (O_count >0) // if positive number of O then thes O/C ratio else just calc normal
487 {
488 OC_ratio = O_count/C_count;
489 if (element_probability)
490 {
491 if (OC_ratio > 1.2) // this is the O/C probability check ;
492 CHNOPS_ok = false;
493 }
494 else if (OC_ratio > 3.0)
495 CHNOPS_ok = false;
496 }
497
498 if (P_count >0) // if positive number of P then thes P/C ratio else just calc normal
499 {
500 PC_ratio = P_count/C_count;
501 if (element_probability)
502 {
503 if (PC_ratio > 0.32) // this is the P/C probability check ;
504 CHNOPS_ok = false;
505
506 }
507 else if (PC_ratio > 6.0)
508 CHNOPS_ok = false;
509 }
510
511 if (S_count >0) // if positive number of S then thes S/C ratio else just calc normal
512 {
513 SC_ratio = S_count/C_count;
514 if (element_probability)
515 {
516 if (SC_ratio > 0.65) // this is the S/C probability check ;
517 CHNOPS_ok = false;
518 }
519 else if (SC_ratio > 2.0)
520 CHNOPS_ok = false;
521 }
522
523 //-----------------------------------------------------------------------------
524
525 // check for multiple element ratios together with probability check
526 //if N<10, O<20, P<4, S<3 then true
527 if (element_probability && (N_count > 10) && (O_count > 20) && (P_count > 4) && (S_count > 1))
528 CHNOPS_ok = false;
529
530 // NOP check for multiple element ratios together with probability check
531 // NOP all > 3 and (N<11, O <22, P<6 then true)
532 if (element_probability && (N_count > 3) && (O_count > 3) && (P_count > 3))
533 {
534 if (element_probability && (N_count > 11) && (O_count > 22) && (P_count > 6))
535 CHNOPS_ok = false;
536 }
537
538 // OPS check for multiple element ratios together with probability check
539 // O<14, P<3, S<3 then true
540 if (element_probability && (O_count > 14) && (P_count > 3) && (S_count > 3))
541 CHNOPS_ok = false;
542
543 // PSN check for multiple element ratios together with probability check
544 // P<3, S<3, N<4 then true
545 if (element_probability && (P_count > 3) && (S_count > 3) && (N_count >4))
546 CHNOPS_ok = false;
547
548
549 // NOS check for multiple element ratios together with probability check
550 // NOS all > 6 and (N<19 O<14 S<8 then true)
551 if (element_probability && (N_count >6) && (O_count >6) && (S_count >6))
552 {
553 if (element_probability && (N_count >19) && (O_count >14) && (S_count >8))
554 CHNOPS_ok = false;
555 }
556
557 // function return value;
558 if (CHNOPS_ok == true)
559 return true;
560 else
561 return false;
562 }
563
564 /************************************************************************
565 * DO_CALCULATIONS: Does the actual calculation loop. *
566 * Input: measured mass (in amu), tolerance (in mmu) *
567 * Returns. number of hits. *
568 *************************************************************************/
569 long do_calculations (double measured_mass, double tolerance)
570 {
571 time_t start, finish;
572 double elapsed_time;
573
574 double mass; /* calc'd mass */
575 double limit_lo, limit_hi; /* mass limits */
576 float rdb, lewis, rdbori; /* Rings & double bonds */
577 long i,j;
578 long long hit; /* counts the hits, with long declaration, overflow after 25h with all formulas < 2000 Da
579 long = FFFFFFFFh = 4,294,967,295d*/
580 int niso;
581 long long counter;
582 bool elementcheck;
583 bool set_break;
584
585
586 time( &start ); // start time
587
588 /* calculate limits */
589 /* correct m/z value 'measured_mass' for z>1 using 'abscharge';
590 keep charge = 0 for neutral mass searches (z=0);
591 additionally, keep the idea of 'adducts' for 'rdb', but use charge state instead -meb */
592 if (charge<0) {
593 abscharge = -charge;
594 nadd = abscharge;
595 } else if (charge>0) {
596 abscharge = charge;
597 nadd = abscharge;
598 } else if (charge==0) {
599 abscharge = 1; /* for limits */
600 nadd = 0; /* for rdb */
601 }
602
603 limit_lo = (measured_mass * abscharge) - (tolerance / 1000.0);
604 limit_hi = (measured_mass * abscharge) + (tolerance / 1000.0);
605
606 if(verbose) /* extended output as in original code - dle */
607 {
608 if (strlen(comment)) /* print only if there is some text to print */
609 printf ("Text \t%s\n", comment);
610
611 printf("\n"); /* linefeed */
612 printf ("Composition = \t");
613 for (i=0; i < nr_el; i++)
614 if (el[i].max > 0)
615 printf("%s:%d-%d ", el[i].sym, el[i].min, el[i].max);
616 printf ("\n");
617
618 printf ("Mass Tolerance [mmu]: \t%.1f\n",tolerance);
619 printf ("Measured Mass: \t%.6lf\n", measured_mass);
620 printf ("Charge: \t%+.0lf\n", charge); /* dle */
621 printf ("Num. of Adducts/Losses: \t%.0lf\n", nadd); /* dle */
622 printf ("Apply 7GR: \t%d\n\n", applygr); /* dle */
623 }
624
625 hit = 0; /* Reset counter */
626 counter = 0;
627 set_break = false; /* set breaker for element counts to false */
628
629 /* Now let's run the big big loop ... I'd like to do that
630 recursively but did not yet figure out how ;-)
631 TK Adds: the loop is just fine.
632 */
633
634 /* now comes the "COOL trick" for calculating all formulae:
635 sorting the high mass elements to the outer loops, the small weights (H)
636 to the inner loops;
637
638 This will reduce the computational time by factor ~10-60-1000
639 OLD HR: Cangrelor at 1ppm 4465 formulas found in 5866 seconds.
640 NEW HR2: Cangrelor at 1ppm 4465 formulas found in 96 seconds.
641 NEW2 HR2: Cangrelor at 1ppm 4465 formulas found in 60 seconds.
642 NEW3 HR2: Cangrelor at 1ppm 4465 formulas found in 59 seconds.
643 HR2 Fast: Cangrelor at 1ppm 4465 formulas found in 41 seconds by evaluating 2,003,436,894 formulae.
644 hr2 -c "Cangrelor" -m 774.948 -t 0.77 -C 1-64 -H 1-112 -N 0-30 -O 0-80 -P 0-12 -S 0-9 -F 0-10 -L 0-10
645
646 Another additional trick is to end the 2nd.. 3rd.. 4th.. xth innermost loop
647 to prevent loops which are just higher and higher in mass.
648 */
649
650 /*dle: process new elements */
651 el[17].cnt = el[17].min - 1; el[17].save = el[17].cnt;
652 while (el[17].cnt++ < el[17].max) /*"iK"*/{
653
654 el[16].cnt = el[16].min - 1; el[16].save = el[16].cnt;
655 while (el[16].cnt++ < el[16].max) /*"K"*/{
656
657 el[15].cnt = el[15].min - 1; el[15].save = el[15].cnt;
658 while (el[15].cnt++ < el[15].max) /* "iBr"*/ {
659
660 el[14].cnt = el[14].min - 1; el[14].save = el[14].cnt;
661 while (el[14].cnt++ < el[14].max) /* "Br"*/ {
662
663 el[13].cnt = el[13].min - 1; el[13].save = el[13].cnt;
664 while (el[13].cnt++ < el[13].max) /*"iCl"*/ {
665
666 el[12].cnt = el[12].min - 1; el[12].save = el[12].cnt;
667 while (el[12].cnt++ < el[12].max) /*"Cl"*/ {
668
669 el[11].cnt = el[11].min - 1; el[11].save = el[11].cnt;
670 while (el[11].cnt++ < el[11].max) /*"S"*/ {
671
672 el[10].cnt = el[10].min - 1; el[10].save = el[10].cnt;
673 while (el[10].cnt++ < el[10].max) /*"P"*/ {
674
675 el[9].cnt = el[9].min - 1; el[9].save = el[9].cnt;
676 while (el[9].cnt++ < el[9].max) /*"Si"*/ {
677
678 el[8].cnt = el[8].min - 1; el[8].save = el[8].cnt;
679 while (el[8].cnt++ < el[8].max) /*"Na"*/{
680
681 el[7].cnt = el[7].min - 1; el[7].save = el[7].cnt;
682 while (el[7].cnt++ < el[7].max) /*"F"*/ {
683
684 el[6].cnt = el[6].min - 1; el[6].save = el[6].cnt;
685 while (el[6].cnt++ < el[6].max) /*"O"*/ {
686
687 el[5].cnt = el[5].min - 1; el[5].save = el[5].cnt;
688 while (el[5].cnt++ < el[5].max) /*"15N"*/{
689
690 el[4].cnt = el[4].min - 1; el[4].save = el[4].cnt;
691 while (el[4].cnt++ < el[4].max) /*"N"*/{
692
693 el[1].cnt = el[1].min - 1; el[1].save = el[1].cnt;
694 while (el[1].cnt++ < el[1].max) /*"13C"*/ {
695
696 el[0].cnt = el[0].min - 1; el[0].save = el[0].cnt;
697 while (el[0].cnt++ < el[0].max) /* "C"*/ {
698
699 el[3].cnt = el[3].min - 1; el[3].save = el[3].cnt;
700 while (el[3].cnt++ < el[3].max) /*"D"*/{
701
702 el[2].cnt = el[2].min - 1; el[2].save = el[2].cnt;
703 while (el[2].cnt++ < el[2].max) /*"H"*/{
704
705 mass = calc_mass();
706 counter++;
707
708 //just for debug purposes
709 //if (mass > limit_hi)
710 //printf("mass: %f\tC: %d H: %d N: %d O: %d P: %d S: %d Cl: %d Br: %d\n",mass,el[0].cnt,el[2].cnt,el[4].cnt,el[6].cnt,el[10].cnt,el[11].cnt,el[12].cnt,el[13].cnt);
711
712 /* if we exceed the upper limit, we can stop the calculation
713 for this particular element (JHa 20050227). <-- comment TK that will only bust the innermost while loop, which is "H"*/
714
715 // break H loop
716 if (mass > limit_hi) break;
717
718 //************************************************************************************************************/
719 //Calculus loop with print out
720 //************************************************************************************************************/
721
722 if ((mass >= limit_lo) && (mass <= limit_hi)) /* within limits? */
723 {
724 // element check will be performed always, if variable bool element_probability is true also probabilities will be calculated
725 // not an elegant implementation, but fast.
726
727 elementcheck = calc_element_ratios(applygr); /* pass applygr boolean by dle */
728
729 if (elementcheck)
730 {
731 rdbori = calc_rdb(); /* get RDB */
732
733 rdb = rdbori + 0.5*nadd; /* dle: if nadd addcuts */
734 lewis = (float)(fmod(rdb, 1)); /*calc reminder*/
735 if ((rdb >= 0) && (lewis != 0.5) && (lewis !=-0.5))/* less than -0.5 RDB does not make sense */
736 { /* NO(!) CH3F10NS2 exists , RDB = -4.0 M= 282.9547*/
737 hit ++;
738 for (i = 0; i < nr_el; i++) /* print composition */
739 if (el[i].cnt > 0) /* but only if useful */
740 printf("%s%d.", el[i].sym, el[i].cnt); // print formula
741
742 printf("\t");
743 /* dle: print out a more explicit molecular formula for further processing and
744 variable niso counts number of isotope elements in the solution */
745 niso=0;
746 for (i = 0; i < nr_el; i++)
747 { /* print composition */
748 if (el[i].cnt > 0)
749 { /* but only if useful */
750 printf("%s%d", el[i].symi, el[i].cnt); // print formula
751 if (el[i].symi != el[i].sym)
752 niso=niso+el[i].cnt;
753 }
754
755 }
756 /* dle: end of molecular print out */
757
758 /* dle: additionnaly print nadd and niso */
759 printf("\t%.2f\t%.7lf\t%.0lf\t%d\t%+.2lf\n", rdb, mass, nadd,niso, 1000.0 * ((measured_mass * abscharge) - mass));
760
761 } /* end of 'rdb' loop */
762
763 } // end of elementcheck loop
764
765 } /* end of 'limit' loop */
766 //************************************************************************************************************/
767
768
769 /*
770 TK: if the current mass is larger than the limit the loop can be exited.
771 Each element must point to the element which is in use and before.
772 This is a static implementation which can be enhanced with a pointer chain to the lower element.
773 Actually now its only allowed for CHNSOP-Fl-Cl-Br-Si !!! Brute-force <> elegance :-)
774 */
775 } /*"H"*/
776
777 if ((mass >= limit_lo) && (el[2].save == el[2].cnt-1)) break;
778 } /*"D"*/
779
780 if ((mass >= limit_lo) && (el[3].save == el[3].cnt-1)) break; /* dle addons */
781 } /* "C"*/
782
783 if ((mass >= limit_lo) && (el[0].save == el[0].cnt-1)) break;
784 } /*"13C"*/
785
786 if ((mass >= limit_lo) && (el[1].save == el[1].cnt-1)) break; /* dle addons */
787 } /*"N"*/
788
789 if ((mass >= limit_lo) && (el[4].save == el[4].cnt-1)) break;
790 } /*"15N"*/
791
792 if ((mass >= limit_lo) && (el[5].save == el[5].cnt-1)) break; /* dle addons */
793 } /*"O"*/
794
795 if ((mass >= limit_lo) && (el[6].save == el[6].cnt-1)) break;
796 } /*"F"*/
797
798 if ((mass >= limit_lo) && (el[7].save == el[7].cnt-1)) break;
799 } /*"Na"*/
800
801 if ((mass >= limit_lo) && (el[8].save == el[8].cnt-1)) break;
802 } /*"Si"*/
803
804 if ((mass >= limit_lo) && (el[9].save == el[9].cnt-1)) break;
805 } /*"P"*/
806
807 if ((mass >= limit_lo) && (el[10].save == el[10].cnt-1)) break;
808 } /*"S"*/
809
810 if ((mass >= limit_lo) && (el[11].save == el[11].cnt-1)) break;
811 } /*"Cl"*/
812
813 if ((mass >= limit_lo) && (el[12].save == el[12].cnt-1)) break;
814 } /*"iCl"*/
815
816 if ((mass >= limit_lo) && (el[13].save == el[13].cnt-1)) break; /* dle addons */
817 } /*"Br"*/
818
819 if ((mass >= limit_lo) && (el[14].save == el[14].cnt-1)) break;
820 } /*"iBr"*/
821
822 if ((mass >= limit_lo) && (el[15].save == el[15].cnt-1)) break; /* dle addons */
823 } /*"K"*/
824
825 if ((mass >= limit_lo) && (el[16].save == el[16].cnt-1)) break; /* dle addons */
826 } /*"iK"*/
827
828 /* close that giant loop thing started above */
829
830
831
832
833 time(&finish); // stop timer
834 elapsed_time = difftime(finish , start); // calulate time difference
835
836 if(verbose){
837 if (!hit)
838 printf("No matching combination found in %6.0f seconds.\n", elapsed_time );
839 else{
840 printf("\n%llu formulas found in %6.0f seconds by evaluating %llu formulae.\n",hit,elapsed_time,counter);
841 printf("RDBs are overloaded to maximum valence values (N=3,P=5,S=6).\n");
842 }
843 }
844 return hit;
845 }
846
847
848 /************************************************************************
849 * CLEAN: "cleans" a buffer obtained by fgets() *
850 * Input: Pointer to text buffer *
851 * Returns: strlen of buffer. *
852 *************************************************************************/
853 int clean (char *buf)
854 {
855 int i;
856
857 for(i = 0; i < strlen(buf); i++) /* search for CR/LF */
858 {
859 if(buf[i] == '\n' || buf[i] == '\r')
860 {
861 buf[i] = 0; /* stop at CR or LF */
862 break;
863 }
864 }
865 return (strlen(buf));
866 }
867
868
869
870 /***************************************************************************
871 * GETOPT: Command line parser, system V style.
872 *
873 * This routine is widely (and wildly) adapted from code that was
874 * made available by Borland International Inc.
875 *
876 * Standard option syntax is:
877 *
878 * option ::= SW [optLetter]* [argLetter space* argument]
879 *
880 * where
881 * - SW is '-'
882 * - there is no space before any optLetter or argLetter.
883 * - opt/arg letters are alphabetic, not punctuation characters.
884 * - optLetters, if present, must be matched in optionS.
885 * - argLetters, if present, are found in optionS followed by ':'.
886 * - argument is any white-space delimited string. Note that it
887 * can include the SW character.
888 * - upper and lower case letters are distinct.
889 *
890 * There may be multiple option clusters on a command line, each
891 * beginning with a SW, but all must appear before any non-option
892 * arguments (arguments not introduced by SW). Opt/arg letters may
893 * be repeated: it is up to the caller to decide if that is an error.
894 *
895 * The character SW appearing alone as the last argument is an error.
896 * The lead-in sequence SWSW ("--") causes itself and all the rest
897 * of the line to be ignored (allowing non-options which begin
898 * with the switch char).
899 *
900 * The string *optionS allows valid opt/arg letters to be recognized.
901 * argLetters are followed with ':'. Getopt () returns the value of
902 * the option character found, or EOF if no more options are in the
903 * command line. If option is an argLetter then the global optarg is
904 * set to point to the argument string (having skipped any white-space).
905 *
906 * The global optind is initially 1 and is always left as the index
907 * of the next argument of argv[] which getopt has not taken. Note
908 * that if "--" or "//" are used then optind is stepped to the next
909 * argument before getopt() returns EOF.
910 *
911 * If an error occurs, that is an SW char precedes an unknown letter,
912 * then getopt() will return a '~' character and normally prints an
913 * error message via perror(). If the global variable opterr is set
914 * to false (zero) before calling getopt() then the error message is
915 * not printed.
916 *
917 * For example, if
918 *
919 * *optionS == "A:F:PuU:wXZ:"
920 *
921 * then 'P', 'u', 'w', and 'X' are option letters and 'A', 'F',
922 * 'U', 'Z' are followed by arguments. A valid command line may be:
923 *
924 * aCommand -uPFPi -X -A L someFile
925 *
926 * where:
927 * - 'u' and 'P' will be returned as isolated option letters.
928 * - 'F' will return with "Pi" as its argument string.
929 * - 'X' is an isolated option.
930 * - 'A' will return with "L" as its argument.
931 * - "someFile" is not an option, and terminates getOpt. The
932 * caller may collect remaining arguments using argv pointers.
933 ***************************************************************************/
934 int getopt(int argc, char *argv[], char *optionS)
935 {
936 static char *letP = NULL; /* remember next option char's location */
937 static char SW = '-'; /* switch character */
938
939 int opterr = 1; /* allow error message */
940 unsigned char ch;
941 char *optP;
942
943 if (argc > optind)
944 {
945 if (letP == NULL)
946 {
947 if ((letP = argv[optind]) == NULL || *(letP++) != SW)
948 goto gopEOF;
949
950 if (*letP == SW)
951 {
952 optind++;
953 goto gopEOF;
954 }
955 }
956 if (0 == (ch = *(letP++)))
957 {
958 optind++;
959 goto gopEOF;
960 }
961 if (':' == ch || (optP = strchr(optionS, ch)) == NULL)
962 goto gopError;
963 if (':' == *(++optP))
964 {
965 optind++;
966 if (0 == *letP)
967 {
968 if (argc <= optind)
969 goto gopError;
970 letP = argv[optind++];
971 }
972 optarg = letP;
973 letP = NULL;
974 }
975 else
976 {
977 if (0 == *letP)
978 {
979 optind++;
980 letP = NULL;
981 }
982 optarg = NULL;
983 }
984 return ch;
985 }
986
987 gopEOF:
988 optarg = letP = NULL;
989 return EOF;
990
991 gopError:
992 optarg = NULL;
993 errno = EINVAL;
994 if (opterr)
995 perror ("Command line option");
996 return ('~');
997 }
998
999 /*
1000 List of elements sorted according to mass
1001 _______________________
1002 INo# El Mass
1003 2 H 1.007825032
1004 3 D 2.014101778
1005 0 C 12
1006 1 13C 13.00335484
1007 4 N 14.00307401
1008 5 15N 15.0001089
1009 6 O 15.99491462
1010 7 F 18.9984032
1011 8 Na 22.98976967
1012 9 Si 27.97692653
1013 10 P 30.97376151
1014 11 S 31.97207069
1015 12 Cl 34.96885271
1016 13 Br 78.9183376
1017 ------------------------
1018 */