//----------------------------------------------------------------------------- // (C) 2008 Sebastian Mach // seb@greenhybrid.net // http://greenhybrid.net , http://picogen.org // -- // contribution for the language-shootout in the german // 'Linux Magazin', issue 10/2008 // -- // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // Please see http://www.gnu.org/licenses/gpl.txt for the license text or // contact the author. // // -> contact author for use not covered by GPLv3 //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- // miscellaneous //----------------------------------------------------------------------------- // The reason why we use #warning in the following is that #message // is not portable. #define TRUE_EQUALS_1 (true==1) #if TRUE_EQUALS_1 #warning Hooray, can use some optimizations based on the assumption that true==1 #else #warning Meh, can not use some optimizations based on the assumption that true==1 #endif // A safer way than #ifdef/#define combos. enum optimization_flags { of_true_is_1 = true == 1, // can we rely on that true==1? of_nums_contigous = // can we rely that '9'-'0'==10 and that numbers go low->high? '1'-'0' == 1 && '2'-'0' == 2 && '3'-'0' == 3 && '4'-'0' == 4 && '5'-'0' == 5 && '6'-'0' == 6 && '7'-'0' == 7 && '8'-'0' == 8 && '9'-'0' == 9 }; //----------------------------------------------------------------------------- // include files //----------------------------------------------------------------------------- #include #include #include #include #include #include //----------------------------------------------------------------------------- // convenience types/functions //----------------------------------------------------------------------------- // Can also be used to check whether c is in ['0'..'9'] // (e.g. "bool isNum = toInt(c) != -1;"). template int toInt (C c) { // note that the C++ standard does not mandate // that the number-chars 0-9 are continous when // interpeted as integers, thus this function // does the portable way. // --> trust in constant folding // --> check if '0'-'9' are continous when cast to integer if (of_nums_contigous) { static int ret [2] = {-1, -1}; ret [1] = c - '0'; return ret [c>('0'-1) & c<('9'+1)]; } else { switch (c) { // not much slower if the compiler emits jump tables case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; }; return -1; } } // Compiler should do a good job with this convenience class. // Note that the case that A==B is not covered. template class tuple { const A a_; const B b_; public: tuple (A a, B b) : a_(a), b_(b) {} const A& a () const { return a_; } const B& b () const { return b_; } operator A () const { return a_; } operator B () const { return b_; } }; //----------------------------------------------------------------------------- // A custom implementation of an array class. Allocates large chunks of memory // instead of allocating each single element. // Note: This is an micro-optimization, if any. //----------------------------------------------------------------------------- template struct growing_array { // -- data/cache --------------------------------------------------- PT mem [chunksize]; mutable int cachedmajor, cachedminor; // cachedminor only used in fast_*() mutable const growing_array *cachedchunk; // -- alloc/dealloc-vars ------------------------------------------- growing_array *curr; growing_array *next; int nextfree; int size_; // -- members ------------------------------------------------------ growing_array () : cachedmajor (0), cachedchunk (this), curr (this), next (0), nextfree(0), size_ (0) {} ~growing_array () { if (0 != next) { next->~growing_array (); } } void push_back(const PT &pt) { using namespace std; if (curr->nextfree >= chunksize) { curr->next = new growing_array (); curr = curr->next; } curr->mem [curr->nextfree++] = pt; ++size_; } const int size () const { return size_; } void fast_begin () { cachedchunk = this; cachedmajor = 0; cachedminor = 0; } void fast_end () { cachedchunk = this; cachedmajor = 0; cachedminor = 0; } const PT & fast_next () const { if (cachedminor >= chunksize) { cachedminor = 0; cachedchunk = cachedchunk->next; } return cachedchunk->mem [cachedminor++]; } const PT & operator [] (int index) const { const int major = index / chunksize; // <-- gcc should recognize the div/mod- const int minor = index % chunksize; // combo and emit only 1 div const int diff = major - cachedmajor; // Optimization for the case the new - old = 0 || 1 switch (diff) { case 1: cachedchunk = cachedchunk->next; cachedmajor = major; // <-- should be faster than ++cachedmajor on modern cpu // fallthrough case 0: return cachedchunk->mem [minor]; }; // I am not sure if the C++ standard mandates that true==1, but the following code depends on that if (of_true_is_1) { const bool d = diff >= 0; growing_array const * const tmpchunk [2] = {this, cachedchunk}; const int tmpmajor [2] = {0, cachedmajor}; cachedchunk = tmpchunk [d]; for (cachedmajor=tmpmajor [d]; cachedmajor!=major; ++cachedmajor) cachedchunk = cachedchunk->next; return cachedchunk->mem [minor]; } else { if (diff < 0) { cachedchunk = this; for (cachedmajor=0; cachedmajor!=major; ++cachedmajor) cachedchunk = cachedchunk->next; return cachedchunk->mem [minor]; } else { for (; cachedmajor!=major; ++cachedmajor) cachedchunk = cachedchunk->next; return cachedchunk->mem [minor]; } } } }; //----------------------------------------------------------------------------- // typesafe template arguments //----------------------------------------------------------------------------- enum order_t { order_in_main_section, order_in_footnotes_section }; enum safemode_t { safe, unsafe }; //----------------------------------------------------------------------------- // Reorder is a RAII class which does the reordering. // // template arguments: // typename C --> char type for our strings // C L --> open bracket for references/footnotes // C R --> close bracket for references/footnotes // order_t SO=order_in_main_section --> order_in_main_section or // order_in_footnotes_section // safemode_t S=safe --> catch some errors? // (though "safe" will result in slower code) // bool use_pool=true --> are we using our custom memory pool? //----------------------------------------------------------------------------- template < typename C, C L, C R, order_t SO=order_in_main_section, safemode_t S=safe, bool use_pool=true > class Reorder { private: template struct mempool { enum { size = (4096*1024) / sizeof (PT) }; // -- data/cache -------------------------------------------------- PT mem [size]; // -- alloc/dealloc-vars ------------------------------------------ mempool *curr; mempool *next; int nextfree; // -- members ----------------------------------------------------- mempool () : curr (this), next (0), nextfree(0) {} ~mempool () { if (0 != next) { next->~mempool (); } } PT *alloc() { using namespace std; if (curr->nextfree >= size) { curr->next = new mempool (); curr = curr->next; } return & curr->mem [curr->nextfree++]; // could also work with non-pod-types: //new (& curr->mem [curr->nextfree++]) PT; } }; // that function assumes we're already inside the pair of // brackets (i.e. after '[') template tuple scanRefTarget (I &curr, I end) { using namespace std; I old_curr = curr; int len = 0; // used to check validity of reference int refID = 0; // the reference-id while (R != *curr) { // <-- until we hit an end-bracket if (S == safe) { if (end == curr) { // <-- rudimentary error checking cerr << "found end of file but expected numbers or '" << R << endl; exit(1); } } const int d = toInt (*curr); if (-1 != d) { refID = refID * 10 + d; ++len; } ++curr; } if (0 != len) { return tuple (true, refID); } curr = old_curr; // <-- was not a reference, so rollback return tuple (false, -1); } template struct Chunk { // intentionally POD I begin, end; int ref_target; Chunk *next; }; template struct Reference { // intentionally POD I begin, end; int order_in_footnotes; int order_in_text; }; // Scans the main sections. template Chunk *scanText (I &curr, I end, mempool > &chunk_pool) { Chunk *chunk; if (use_pool) chunk = chunk_pool.alloc(); else chunk = new Chunk; Chunk * const first_chunk = chunk; chunk->begin = curr; chunk->next = 0; chunk->ref_target = -1; while (true) { if (L == *curr) { // <-- we've hit a start-bracket ++curr; // <-- eat start-bracket const I old_curr = curr; const tuple refID = scanRefTarget (curr, end); if (refID) { chunk->ref_target = refID; chunk->end = old_curr-1; if (use_pool) chunk->next = chunk_pool.alloc(); else chunk->next = new Chunk; chunk = chunk->next; chunk->next = 0; chunk->ref_target = -1; chunk->begin = curr+1; } } else if (end == curr) { chunk->end = curr; break; } else if ('\n' == *curr) { // Here we used the bitor '&' intentionally instead of '&&' // , the difference being that '&' most probably produces // branchfree code. // (see http://ompf.org/forum/viewtopic.php?p=10086#p10086) I old_curr = curr; if ('@' == curr[1] & 'f' == curr[2] & 'o' == curr[3] & 'o' == curr[4] & 't' == curr[5] & 'n' == curr[6] & 'o' == curr[7] & 't' == curr[8] & 'e' == curr[9] & ':' == curr[10] ) { curr += 11; // += strlen ("@footnote:") chunk->end = old_curr; // eat forward to newline while (curr != end && *curr != '\n') ++curr; ++curr; // eatup newline break; } else { curr = old_curr; } } ++curr; }; return first_chunk; } // Scans the footnotes section. template < typename I, class int_array // <-- template template = nonsense, // since std::vector, but growing_array > void scanFootnotes ( std::map > &refs, int_array &orderedRRefs, I &curr, I end ) { using namespace std; int footnodeID = 1; while (true) { while (L != *curr && '\n' != *curr && end != curr) ++curr; if (L == *curr) { ++curr; // <-- eat start-bracket const I old_curr = curr; const tuple refID = scanRefTarget (curr, end); ++curr; if (refID) { Reference ref; // eat whitespace while ('\t' == *curr || ' ' == *curr || '\n' == *curr) ++curr; ref.begin = curr; while (*curr != '\n' && curr != end) ++curr; ref.end = curr; ref.order_in_footnotes = footnodeID++; ref.order_in_text = -1; orderedRRefs.push_back (refID); refs [refID] = ref; } } else if (end == curr) { break; } ++curr; } } public: template Reorder (I begin, I end) { using namespace std; I curr = begin; // Scan text. mempool > chunk_pool; Chunk *chunk = scanText (curr, end, chunk_pool); // Scan footnotes. map > refs; typedef growing_array int_array; int_array orderedRRefs; scanFootnotes (refs, orderedRRefs, curr, end); switch (SO) { case order_in_footnotes_section: { // Dump Text for (const Chunk *it = chunk; 0 != it; it = it->next) { // Output text. for (I a=it->begin; a != it->end; ++a) { cout << *a; } // Output reference target. if (-1 != it->ref_target) { cout << '[' << refs [it->ref_target].order_in_footnotes << ']'; } } cout << '\n'; // Dump footnotes. cout << "@footnote:\n"; orderedRRefs.fast_begin(); const unsigned int s = orderedRRefs.size(); for (int i=0; i *it = chunk; 0 != it; it = it->next) { // Output text. for (I a=it->begin; a != it->end; ++a) { cout << *a; } // Output reference target. if (-1 != it->ref_target) { int &tmp = refs [it->ref_target].order_in_text; if (tmp < 0) tmp = newOrder++; cout << '[' << tmp << ']'; } } cout << '\n'; // Dump footnotes. cout << "@footnote:\n"; orderedRRefs.fast_begin(); const unsigned int s = orderedRRefs.size(); for (int i=0; i *tmp = chunk; chunk = chunk->next; // We can save some work by not calling ~Chunk() by assuming // that Chunk is a POD if (!use_pool) delete tmp; } } }; //----------------------------------------------------------------------------- // main //----------------------------------------------------------------------------- int main (int argc, char *argv []) { using namespace std; // Parse arguments. --argc; ++argv; string filename = ""; order_t orderType = order_in_main_section; while (0 != argc) { const string arg (*argv); --argc; ++argv; if ("-f" == arg || "--filename" == arg) { if (0 == argc) { cerr << "error:no parameter given to argument '" << arg << "'\n"; } else { filename = *argv; --argc; ++argv; } } else if ("-s" == arg || "--sort" == arg) { if (0 == argc) { cerr << "error:no parameter given to argument '" << arg << "'\n"; cerr << "use either '" << arg << " main' or '" << arg << "' footnotes\n"; } else { const string tmp = *argv; if ("main" == tmp) { orderType = order_in_main_section; } else if ("footnotes" == tmp) { orderType = order_in_footnotes_section; } else { cerr << "error:unknown parameter to argument '" << arg << "': " << tmp << '\n'; cerr << "use either '" << arg << " main' or '" << arg << " footnotes'\n"; } --argc; ++argv; } } else { cerr << "error:unknown argument '" << arg << "', skipping" << endl; return 1; } } // Either open textfile (if filename given) or read from stdin. char *begin, *end; if ("" != filename) { // Read from file. ifstream f (filename.c_str()); f.seekg (0, ios::end); const int len = f.tellg(); f.seekg (0, ios::beg); begin = new char [len]; end = begin + len; f.read (begin, len); f.close(); } else { // Read from stdin. string input (""), tmp; while (!(cin >> tmp).eof()) input += tmp + '\n'; begin = new char [input.size()]; end = begin + input.size(); strcpy (begin, input.c_str()); } switch (orderType) { case order_in_main_section: Reorder < char, '[', ']', order_in_main_section, unsafe, true > (begin, end); break; case order_in_footnotes_section: Reorder < char, '[', ']', order_in_footnotes_section, unsafe, true > (begin, end); break; }; delete [] begin; return 0; }