Mercurial > repos > fubar > microsatbed
view seqrequester/src/seqrequester/summarize.C @ 1:1085e094cf5f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
author | fubar |
---|---|
date | Sat, 13 Jul 2024 12:39:06 +0000 |
parents | |
children |
line wrap: on
line source
/****************************************************************************** * * This file is part of seqrequester, a tool for summarizing, extracting, * generating and modifying DNA sequences. * * This software is based on: * 'Canu' v2.0 (https://github.com/marbl/canu) * which is based on: * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net) * the 'kmer package' r1994 (http://kmer.sourceforge.net) * * Except as indicated otherwise, this is a 'United States Government Work', * and is released in the public domain. * * File 'README.licenses' in the root directory of this distribution * contains full conditions and disclaimers. */ #include "seqrequester.H" bool summarizeParameters::parseOption(opMode &mode, int32 &arg, int32 argc, char **argv) { if (strcmp(argv[arg], "summarize") == 0) { mode = modeSummarize; } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-size") == 0)) { genomeSize = strtoull(argv[++arg], nullptr, 10); } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-1x") == 0)) { limitTo1x = true; } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-split-n") == 0)) { breakAtN = true; } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-simple") == 0)) { asSimple(); } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-lengths") == 0)) { asLengths(); } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-seqlen") == 0)) { asSeqLen(); } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-assequences") == 0)) { asSequences = true; asBases = false; } else if ((mode == modeSummarize) && (strcmp(argv[arg], "-asbases") == 0)) { asSequences = false; asBases = true; } else { return(false); } return(true); } void summarizeParameters::showUsage(opMode mode) { if (mode != modeSummarize) return; fprintf(stderr, "OPTIONS for summarize mode:\n"); fprintf(stderr, " -size base size to use for N50 statistics\n"); fprintf(stderr, " -1x limit NG table to 1x coverage\n"); fprintf(stderr, "\n"); fprintf(stderr, " -split-n split sequences at N bases before computing length\n"); fprintf(stderr, "\n"); fprintf(stderr, " Output format:\n"); fprintf(stderr, " (default) output an N50 table, a histogram picture and mono-,\n"); fprintf(stderr, " di- and tri-nucleotide frequencies\n"); fprintf(stderr, " -simple output a simple 'length numSequences' histogram\n"); fprintf(stderr, " -lengths output a list of 'length' for each sequence\n"); fprintf(stderr, " -seqlen output a list of 'length name' for each sequence\n"); fprintf(stderr, "\n"); fprintf(stderr, " -assequences load data as complete sequences (for testing)\n"); fprintf(stderr, " -asbases load data as blocks of bases (for testing)\n"); fprintf(stderr, "\n"); } bool summarizeParameters::checkOptions(opMode mode, vector<char const *> &inputs, vector<char const *> &errors) { if (mode != modeSummarize) return(false); if (inputs.size() == 0) sprintf(errors, "ERROR: No input sequence files supplied.\n"); return(errors.size() > 0); } bool doSummarize_loadSequence(dnaSeqFile *sf, bool asSequences, char *&name, uint32 &nameMax, char *&seq, uint8 *&qlt, uint64 &seqMax, uint64 &seqLen) { uint32 error = 0; if (asSequences) return(sf->loadSequence(name, nameMax, seq, qlt, seqMax, seqLen, error)); // Otherwise, piece it together from multiple calls to get bases. // loadBases() returns true if bases were loaded, and sets endOfSeq // if the block returned is to the end of a sequence. uint64 bufferMax = 23; uint64 bufferLen = 0; char *buffer = new char [bufferMax]; bool endOfSeq = false; merylutil::resizeArray(name, 0, nameMax, (uint32)1024); merylutil::resizeArrayPair(seq, qlt, 0, seqMax, seqLen+1); name[0] = 0; seq[0] = 0; qlt[0] = 0; seqLen = 0; while (sf->loadBases(buffer, bufferMax, bufferLen, endOfSeq)) { if (seqLen + bufferLen >= seqMax) merylutil::resizeArrayPair(seq, qlt, seqLen, seqMax, 2 * (seqLen + bufferLen + 1)); assert(seqLen + bufferLen + 1 < seqMax); memcpy(seq + seqLen, buffer, sizeof(char) * bufferLen); seqLen += bufferLen; seq[seqLen] = 0; if (endOfSeq) break; } // We get here in two ways: // loadBases() immediately hit EOF, it will return false and set endOfSeq = false. // Otherwise, it found a sequence and endOfSeq (here) _must_ be true. // So, we can just return endOfSeq to mean 'there might be another sequence to load'. delete [] buffer; return(endOfSeq); } void doSummarize_lengthHistogramSimple(uint64 *shortLengths, uint32 shortLengthsLen, vector<uint64> &longLengths) { // Short lengths are super easy; they're already a histogram. for (uint64 ll=0; ll<shortLengthsLen; ll++) if (shortLengths[ll] > 0) fprintf(stdout, "%lu\t%lu\n", ll, shortLengths[ll]); // Long lengths need to be sorted then counted. sort(longLengths.begin(), longLengths.end(), less<uint64>()); for (uint64 bgn=0, end=1; bgn < longLengths.size(); bgn=end, end++) { while ((end < longLengths.size()) && (longLengths[bgn] == longLengths[end])) end++; fprintf(stdout, "%lu\t%lu\n", longLengths[bgn], end - bgn); } } void doSummarize_dumpLengths(uint64 *shortLengths, uint32 shortLengthsLen, vector<uint64> &longLengths) { // Dump the shorter lengths. for (uint64 ll=0; ll<shortLengthsLen; ll++) { if (shortLengths[ll] == 0) continue; for (uint64 cc=0; cc<shortLengths[ll]; cc++) fprintf(stdout, "%lu\n", ll); } // Dump the longer lengths. sort(longLengths.begin(), longLengths.end(), less<uint64>()); for (uint64 ii=0; ii<longLengths.size(); ii++) fprintf(stdout, "%lu\n", longLengths[ii]); } void doSummarize_lengthHistogram(uint64 *shortLengths, uint32 shortLengthsLen, vector<uint64> &longLengths, uint64 genomeSize, bool limitTo1x) { // NG table dimensions uint32 nNG = 0; // Number of lines in the NG table. uint64 lSum = 0; // Sum of the lengths we've encountered so far uint32 nStep = 10; // Step of each N report. uint32 nVal = nStep; // Index of the threshold we're next printing. uint64 nThr = genomeSize * nVal / 100; // Threshold lenth; if sum is bigger, emit and move to the next threshold // These need to be sorted at some point, so might as well do it now. // Note! Sorted high to low. sort(longLengths.begin(), longLengths.end(), greater<uint64>()); // // Find the minimum and maximum lengths, and count the number of // non-zero-length sequences. // uint64 minLength = uint64max; uint64 maxLength = uint64min; uint64 nSeqs = longLengths.size(); // Number of sequences we're summarizing. for (uint64 ll=0; ll<shortLengthsLen; ll++) { if (shortLengths[ll] > 0) { minLength = std::min(minLength, ll); maxLength = std::max(maxLength, ll); nSeqs += shortLengths[ll]; } } if (longLengths.size() > 0) { minLength = std::min(minLength, longLengths.back()); maxLength = std::max(minLength, longLengths.front()); } // // Count the number of lines we expect to get in the NG table. // auto setStep = [&]() { while (lSum >= nThr) { nNG++; if (nVal < 200) nVal += nStep; else if (nVal < 2000) nVal += nStep * 10; else if (nVal < 20000) nVal += nStep * 100; else if (nVal < 200000) nVal += nStep * 1000; else nVal += nStep * 10000; nThr = genomeSize * nVal / 100; } }; for (uint64 ll=0; ll<shortLengthsLen; ll++) { for (uint64 cc=0; cc<shortLengths[ll]; cc++) { lSum += ll; setStep(); } } for (uint64 ii=0; ii<longLengths.size(); ii++) { lSum += longLengths[ii]; setStep(); } if (nNG < 10) nNG = 10; // // Decide how many rows to make in the length histogram table, and how // many sequences each row will capture. // // Set it to max(40, nNG) -- either a comfortable table or whatever the NG // table is -- but reset to 1 sequence per row if it is too big. // uint32 nLHcols = 63; // Magic number to make the histogram the same width as the trinucleotide list uint32 nLH = 40; // Default height of the histogram; dynamically set below. uint32 bucketSize = (uint32)ceil((double)(maxLength - minLength) / std::max(nLH, nNG)); if (bucketSize == 0) { nLH = 1; bucketSize = 1; } nLH = 1 + (maxLength - minLength) / bucketSize; // With new bucketSize set, compute actual number of rows. if (lSum == 0) nLH = 0; // // Compute the length histogram. // uint32 *nSeqPerLen = new uint32 [nLH]; for (uint32 rr=0; rr<nLH; rr++) // Clear the histogram. nSeqPerLen[rr] = 0; for (uint64 ll=0; ll<shortLengthsLen; ll++) { // Count the number of sequences per size range. if (shortLengths[ll] > 0) { uint32 r = (ll - minLength) / bucketSize; assert(r < nLH); nSeqPerLen[r] += shortLengths[ll]; } } for (uint64 ii=0; ii<longLengths.size(); ii++) { uint32 r = (longLengths[ii] - minLength) / bucketSize; assert(r < nLH); nSeqPerLen[r]++; } // Find the maximum size of any bucket. uint64 maxCount = 1; for (uint32 rr=0; rr<nLH; rr++) if (maxCount < nSeqPerLen[rr]) maxCount = nSeqPerLen[rr]; // // Generate the length histogram table. // lSum = 0; // Reset for actually generating the length histogram. nStep = 10; nVal = nStep; nThr = genomeSize * nVal / 100; // For each line in the histogram table, write the size range and draw a picture. char **histPlot = new char * [nLH]; for (uint32 rr=0; rr<nLH; rr++) { uint32 nn = (uint32)ceil(nSeqPerLen[rr] * nLHcols / (double)maxCount); uint64 lo = (rr+0) * bucketSize + minLength; uint64 hi = (rr+1) * bucketSize + minLength - 1; //fprintf(stdout, "rr %u bucketsize %u minLength %lu lo %lu hi %lu\n", rr, bucketSize, minLength, lo, hi); histPlot[rr] = new char [28 + nLHcols + 1]; // 28 = 9 + 1 + 9 + 1 + 7 + 1 if (lo == hi) // Size range and number of sequences. sprintf(histPlot[rr], "%9lu %7u|", lo, nSeqPerLen[rr]); else sprintf(histPlot[rr], "%9lu-%-9lu %7u|", lo, hi, nSeqPerLen[rr]); for (uint32 cc=0; cc<nn; cc++) // ...histogram bars. histPlot[rr][28 + cc] = '-'; histPlot[rr][28 + nn] = 0; // ...terminate the line. //fprintf(stdout, "histplot'%s'\n", histPlot[rr]); } // // Output N table, with length histogram appended at the end of each line. // uint32 np = 0; uint32 hp = 0; fprintf(stdout, "\n"); fprintf(stdout, "G=%-12lu" " sum of || length num\n", genomeSize); fprintf(stdout, "NG length index lengths || range seqs\n"); fprintf(stdout, "----- ------------ --------- ------------ || ------------------- -------\n"); // Write lines if we're showing all data, or if we're below 1x coverage. auto emitLine = [&](uint64 seqlen, uint64 seqnum) { lSum += seqlen; while (lSum >= nThr) { if ((limitTo1x == false) || (nVal <= 100)) { if (hp < nLH) fprintf(stdout, "%05u %12lu %9lu %12lu || %s\n", nVal, seqlen, seqnum, lSum, histPlot[hp++]); else fprintf(stdout, "%05u %12lu %9lu %12lu ||\n", nVal, seqlen, seqnum, lSum); } if (nVal < 200) nVal += nStep; else if (nVal < 2000) nVal += nStep * 10; else if (nVal < 20000) nVal += nStep * 100; else if (nVal < 200000) nVal += nStep * 1000; else nVal += nStep * 10000; nThr = genomeSize * nVal / 100; } }; uint64 ns = 1; // Sequences start at 1, not zero! for (uint64 ii=0; ii<longLengths.size(); ii++, ns++) emitLine(longLengths[ii], ns); for (uint64 ll=shortLengthsLen; ll-- > 0; ) for (uint64 cc=0; cc<shortLengths[ll]; cc++, ns++) emitLine(ll, ns); assert(ns-1 == nSeqs); // Output up to 1x coverage regardless of how much is there. while (nVal <= 100) { if (hp < nLH) fprintf(stdout, "%05" F_U32P " %12s %9s %12s || %s\n", nVal, "-", "-", "-", histPlot[hp++]); else fprintf(stdout, "%05" F_U32P " %12s %9s %12s ||\n", nVal, "-", "-", "-"); nVal += nStep; } // If we're displaying exactly 1x, write empty lines to get to there. #if 0 if (limitTo1x == true) { while (nVal <= 100) { if (hp < nLH) fprintf(stdout, "%05" F_U32P " %12s %9s %12s || %s\n", nVal, "-", "-", "-", histPlot[hp++]); else fprintf(stdout, "%05" F_U32P " %12s %9s %12s ||\n", nVal, "-", "-", "-"); nVal += nStep; } } #endif // Now the summary line for the NG table. if (genomeSize == 0) { if (hp < nLH) fprintf(stdout, "%07.3fx %9lu %12lu || %s\n", 0.0, nSeqs, lSum, histPlot[hp++]); else fprintf(stdout, "%07.3fx %9lu %12lu ||\n", 0.0, nSeqs, lSum); } else { if (hp < nLH) fprintf(stdout, "%07.3fx %9lu %12lu || %s\n", (double)lSum / genomeSize, nSeqs, lSum, histPlot[hp++]); else fprintf(stdout, "%07.3fx %9lu %12lu ||\n", (double)lSum / genomeSize, nSeqs, lSum); } // And any remaining length table lines. while (hp < nLH) fprintf(stdout, " || %s\n", histPlot[hp++]); fprintf(stdout, "\n"); // Cleanup. for (uint32 rr=0; rr<nLH; rr++) delete [] histPlot[rr]; delete [] histPlot; delete [] nSeqPerLen; } void doSummarize(vector<char const *> &inputs, summarizeParameters &sumPar) { uint32 shortLengthsLen = 0; uint64 *shortLengths = nullptr; vector<uint64> longLengths; uint64 nBases = 0; uint32 mer = 0; uint64 mn[4] = {0}; uint64 dn[4*4] = {0}; uint64 tn[4*4*4] = {0}; double nmn = 0; double ndn = 0; double ntn = 0; uint32 nameMax = 0; char *name = nullptr; uint64 seqMax = 0; char *seq = nullptr; uint8 *qlt = nullptr; uint64 seqLen = 0; if (sumPar.isSeqLen()) { fprintf(stdout, "--------- ------------ ----------------\n"); fprintf(stdout, "-index length sequence-name\n"); fprintf(stdout, "--------- ------------ ----------------\n"); } merylutil::allocateArray(shortLengths, shortLengthsLen, 1048576); for (uint32 ff=0; ff<inputs.size(); ff++) { dnaSeqFile *sf = openSequenceFile(inputs[ff]); // If sequence, // Count mono-, di- and tri-nucleotides. // Count number of mono-, di- and tri-nucleotides. // Count number of sequences and total bases. // Save the lengths of sequences. while (doSummarize_loadSequence(sf, sumPar.asSequences, name, nameMax, seq, qlt, seqMax, seqLen) == true) { uint64 pos = 0; uint64 bgn = 0; if (pos < seqLen) { mer = ((mer << 2) | ((seq[pos++] >> 1) & 0x03)) & 0x3f; mn[mer & 0x03]++; } if (pos < seqLen) { mer = ((mer << 2) | ((seq[pos++] >> 1) & 0x03)) & 0x3f; mn[mer & 0x03]++; dn[mer & 0x0f]++; } while (pos < seqLen) { mer = ((mer << 2) | ((seq[pos++] >> 1) & 0x03)) & 0x3f; mn[mer & 0x03]++; dn[mer & 0x0f]++; tn[mer & 0x3f]++; } nmn += (seqLen-0); ndn += (seqLen < 2) ? 0 : (seqLen-1); ntn += (seqLen < 3) ? 0 : (seqLen-2); // Report seq-len if requensted. if (sumPar.isSeqLen()) fprintf(stdout, "%-9lu %12lu %s\n", sf->seqIdx()+1, seqLen, name); // If we're NOT splitting on N, add one sequence of the given length. if (sumPar.breakAtN == false) { nBases += seqLen; if (seqLen < shortLengthsLen) shortLengths[seqLen]++; else longLengths.push_back(seqLen); continue; } // But if we ARE splitting on N, add multiple sequences. pos = 0; bgn = 0; while (pos < seqLen) { while ((pos < seqLen) && ((seq[pos] == 'n') || // Skip N's. (seq[pos] == 'N'))) pos++; bgn = pos; // Remember our start position. while ((pos < seqLen) && ((seq[pos] != 'n') && // Move ahead until the end of sequence or an N. (seq[pos] != 'N'))) pos++; if (pos - bgn > 0) { // If a non-empty sequence nBases += pos - bgn; // summarize it. if (pos - bgn < shortLengthsLen) shortLengths[pos - bgn]++; else longLengths.push_back(pos - bgn); } } } // Over sequences in the file. delete sf; } // Over input files. delete [] name; delete [] seq; delete [] qlt; if (sumPar.genomeSize == 0) // If no genome size supplied, set it to the sum of lengths. sumPar.genomeSize = nBases; // If only a simple histogram of lengths is requested, dump and done. if (sumPar.isSimple() == true) { doSummarize_lengthHistogramSimple(shortLengths, shortLengthsLen, longLengths); } // If only the read lengths are requested, dump and done. else if (sumPar.isLengths() == true) { doSummarize_dumpLengths(shortLengths, shortLengthsLen, longLengths); } // Otherwise, generate a fancy histogram plot. // And finish with the mono-, di- and tri-nucleotide frequencies. else if (sumPar.isComplex() == true) { doSummarize_lengthHistogram(shortLengths, shortLengthsLen, longLengths, sumPar.genomeSize, sumPar.limitTo1x); if (nmn == 0) nmn = 1; // Avoid divide by zero. if (ndn == 0) ndn = 1; if (ntn == 0) ntn = 1; double gc = 100.0 * (mn[0x01] + mn[0x03]) / nmn; double at = 100.0 * (mn[0x00] + mn[0x02]) / nmn; #define FMT "%12lu %6.4f" #define GC "%05.02f%%" char gcat[16] = { 0 }; if (gc >= 99.995) snprintf(gcat, 16, "100.0%% 00.00%%"); // What a bother! The default 5.2 format overflows else if (at >= 99.995) snprintf(gcat, 16, "00.00%% 100.0%%"); // when the value is 100, and we're _forced_ to special else snprintf(gcat, 16, "%05.02f%% %05.02f%%", gc, at); // case those two values. fprintf(stdout, "--------------------- --------------------- ----------------------------------------------------------------------------------------------\n"); fprintf(stdout, " mononucleotide dinucleotide trinucleotide\n"); fprintf(stdout, "--------------------- --------------------- ----------------------------------------------------------------------------------------------\n"); fprintf(stdout, "" FMT " A" FMT " AA" FMT " AAA " FMT " AAC " FMT " AAG " FMT " AAT\n", mn[0x00], mn[0x00] / nmn, dn[0x00], dn[0x00] / ndn, tn[0x00], tn[0x00] / ntn, tn[0x01], tn[0x01] / ntn, tn[0x03], tn[0x03] / ntn, tn[0x02], tn[0x02] / ntn); fprintf(stdout, "" FMT " C" FMT " AC" FMT " ACA " FMT " ACC " FMT " ACG " FMT " ACT\n", mn[0x01], mn[0x01] / nmn, dn[0x01], dn[0x01] / ndn, tn[0x04], tn[0x04] / ntn, tn[0x05], tn[0x05] / ntn, tn[0x07], tn[0x07] / ntn, tn[0x06], tn[0x06] / ntn); fprintf(stdout, "" FMT " G" FMT " AG" FMT " AGA " FMT " AGC " FMT " AGG " FMT " AGT\n", mn[0x03], mn[0x03] / nmn, dn[0x03], dn[0x03] / ndn, tn[0x0c], tn[0x0c] / ntn, tn[0x0d], tn[0x0d] / ntn, tn[0x0f], tn[0x0f] / ntn, tn[0x0e], tn[0x0e] / ntn); fprintf(stdout, "" FMT " T" FMT " AT" FMT " ATA " FMT " ATC " FMT " ATG " FMT " ATT\n", mn[0x02], mn[0x02] / nmn, dn[0x02], dn[0x02] / ndn, tn[0x08], tn[0x08] / ntn, tn[0x09], tn[0x09] / ntn, tn[0x0b], tn[0x0b] / ntn, tn[0x0a], tn[0x0a] / ntn); fprintf(stdout, " " FMT " CA" FMT " CAA " FMT " CAC " FMT " CAG " FMT " CAT\n", dn[0x04], dn[0x04] / ndn, tn[0x10], tn[0x10] / ntn, tn[0x11], tn[0x11] / ntn, tn[0x13], tn[0x13] / ntn, tn[0x12], tn[0x12] / ntn); fprintf(stdout, " --GC-- --AT-- " FMT " CC" FMT " CCA " FMT " CCC " FMT " CCG " FMT " CCT\n", dn[0x05], dn[0x05] / ndn, tn[0x14], tn[0x14] / ntn, tn[0x15], tn[0x15] / ntn, tn[0x17], tn[0x17] / ntn, tn[0x16], tn[0x16] / ntn); fprintf(stdout, " " "%s" " " FMT " CG" FMT " CGA " FMT " CGC " FMT " CGG " FMT " CGT\n", gcat, dn[0x07], dn[0x07] / ndn, tn[0x1c], tn[0x1c] / ntn, tn[0x1d], tn[0x1d] / ntn, tn[0x1f], tn[0x1f] / ntn, tn[0x1e], tn[0x1e] / ntn); fprintf(stdout, " " FMT " CT" FMT " CTA " FMT " CTC " FMT " CTG " FMT " CTT\n", dn[0x06], dn[0x06] / ndn, tn[0x18], tn[0x18] / ntn, tn[0x19], tn[0x19] / ntn, tn[0x1b], tn[0x1b] / ntn, tn[0x1a], tn[0x1a] / ntn); fprintf(stdout, " " FMT " GA" FMT " GAA " FMT " GAC " FMT " GAG " FMT " GAT\n", dn[0x0c], dn[0x0c] / ndn, tn[0x30], tn[0x30] / ntn, tn[0x31], tn[0x31] / ntn, tn[0x33], tn[0x33] / ntn, tn[0x32], tn[0x32] / ntn); fprintf(stdout, " " FMT " GC" FMT " GCA " FMT " GCC " FMT " GCG " FMT " GCT\n", dn[0x0d], dn[0x0d] / ndn, tn[0x34], tn[0x34] / ntn, tn[0x35], tn[0x35] / ntn, tn[0x37], tn[0x37] / ntn, tn[0x36], tn[0x36] / ntn); fprintf(stdout, " " FMT " GG" FMT " GGA " FMT " GGC " FMT " GGG " FMT " GGT\n", dn[0x0f], dn[0x0f] / ndn, tn[0x3c], tn[0x3c] / ntn, tn[0x3d], tn[0x3d] / ntn, tn[0x3f], tn[0x3f] / ntn, tn[0x3e], tn[0x3e] / ntn); fprintf(stdout, " " FMT " GT" FMT " GTA " FMT " GTC " FMT " GTG " FMT " GTT\n", dn[0x0e], dn[0x0e] / ndn, tn[0x38], tn[0x38] / ntn, tn[0x39], tn[0x39] / ntn, tn[0x3b], tn[0x3b] / ntn, tn[0x3a], tn[0x3a] / ntn); fprintf(stdout, " " FMT " TA" FMT " TAA " FMT " TAC " FMT " TAG " FMT " TAT\n", dn[0x08], dn[0x08] / ndn, tn[0x20], tn[0x20] / ntn, tn[0x21], tn[0x21] / ntn, tn[0x23], tn[0x23] / ntn, tn[0x22], tn[0x22] / ntn); fprintf(stdout, " " FMT " TC" FMT " TCA " FMT " TCC " FMT " TCG " FMT " TCT\n", dn[0x09], dn[0x09] / ndn, tn[0x24], tn[0x24] / ntn, tn[0x25], tn[0x25] / ntn, tn[0x27], tn[0x27] / ntn, tn[0x26], tn[0x26] / ntn); fprintf(stdout, " " FMT " TG" FMT " TGA " FMT " TGC " FMT " TGG " FMT " TGT\n", dn[0x0b], dn[0x0b] / ndn, tn[0x2c], tn[0x2c] / ntn, tn[0x2d], tn[0x2d] / ntn, tn[0x2f], tn[0x2f] / ntn, tn[0x2e], tn[0x2e] / ntn); fprintf(stdout, " " FMT " TT" FMT " TTA " FMT " TTC " FMT " TTG " FMT " TTT\n", dn[0x0a], dn[0x0a] / ndn, tn[0x28], tn[0x28] / ntn, tn[0x29], tn[0x29] / ntn, tn[0x2b], tn[0x2b] / ntn, tn[0x2a], tn[0x2a] / ntn); fprintf(stdout, "\n"); } delete [] shortLengths; }