#!%PerlProg% # # sizedb - calculate size of database needed # # usage: sizedb config [files] # # Algorithm is # # Extract all the words from all Indexed fields. # Throw away ones <2 characters or that begin with parens. # Pipe through "sort | uniq | wc -l". # This gives the number of words that will be indexed. # Multiply that by 2 or so to get a comfortable table size. # Print the next prime number greater than the calculated table size. # # Copyright (c) 1985 Corporation for Research and Educational Networking # Copyright (c) 1988 University of Illinois Board of Trustees, Steven # Dorner, and Paul Pomes # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by the Corporation for # Research and Educational Networking (CREN), the University of # Illinois at Urbana, and their contributors. # 4. Neither the name of CREN, the University nor the names of their # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE TRUSTEES AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE TRUSTEES OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)$Id: sizedb,v 1.4 1995/01/11 00:05:11 p-pomes Exp $ # # find the numbers of the Indexed fields $config=shift; open (CONFIG,$config) || die "$config: $!\n"; while () { ($nnum,$nname) = (split(':')); if (/:Indexed/) {$Dex{$nname} = $nnum;} } close(CONFIG); open(OUT, "| sort | uniq | wc -l > /tmp/sizedb.$$"); # # Collect all indexed words while (<>) { chop; foreach $d (values(%Dex)) { if (/\b$d:([^ ]+)/) { $foo = $1; $foo =~ s/\([^ ]* //g; $foo =~ s/ \w\w / /g; $foo =~ s/ \w / /g; $foo =~ s/ /\n/g; print OUT "$foo\n"; } } } close(OUT); open(OUT, ">>/tmp/sizedb.$$"); print OUT "2 * p\n"; close(OUT); system("dc < /tmp/sizedb.$$ | primes | head -1"); unlink("/tmp/sizedb.$$"); exit 0;