//-------------------------------------------------------------------- // // TextAnalyst // //-------------------------------------------------------------------- #include //#include "stradt.h" //#include "listarr.C" #include "wordcount.h" #include "textanalyst.h" TextAnalyst::TextAnalyst (char* filename) { dictionary = new List< WordCount >(30000); read( filename ); } void TextAnalyst::read (char* filename) { dictionary->clear(); ifstream source( filename ); Stradt word; WordCount next; while (source >> word) { next.initialize( word ); if ( (dictionary->gotoBeginning()) && (dictionary->find( next )) ) { next = dictionary->getCursor(); next.increment(); dictionary->replace( next ); } else { next.increment(); dictionary->insert( next ); } } source.close(); } int TextAnalyst::number_of_words () { int count = 0; if ( dictionary->gotoBeginning() ) do count = count + 1; while ( dictionary->gotoNext() ); return count; } double TextAnalyst::average_word_length() { if ( !(dictionary->gotoBeginning()) ) return 0; double total_chars = 0.0; int total_words = 0; Stradt current_word; int occurrences = 0; do { current_word = dictionary->getCursor().word(); occurrences = dictionary->getCursor().count(); total_chars += occurrences * current_word.length(); total_words += occurrences; } while ( dictionary->gotoNext() ); return total_chars / total_words; } Stradt TextAnalyst::longest_word() { dictionary->gotoBeginning(); Stradt max = dictionary->getCursor().word(); Stradt current; while ( dictionary->gotoNext() ) { current = dictionary->getCursor().word(); if ( current.length() > max.length() ) max = current; } return max; } Stradt TextAnalyst::shortest_word() { dictionary->gotoBeginning(); Stradt min = dictionary->getCursor().word(); Stradt current; while ( dictionary->gotoNext() ) { current = dictionary->getCursor().word(); if ( current.length() < min.length() ) min = current; } return min; } WordCount TextAnalyst::find_most_common() { dictionary->gotoBeginning(); WordCount max = dictionary->getCursor(); WordCount current; while ( dictionary->gotoNext() ) { current = dictionary->getCursor(); if ( current.count() > max.count() ) max = current; } return max; } Stradt TextAnalyst::most_common_word() { return find_most_common().word(); } int TextAnalyst::occurrences_of_most_common () { return find_most_common().count(); } int TextAnalyst::super_long_word_occurs() { // Ouch! Is this inefficient! Unfortunately, we can only save, // on average, half of the second pass when searching for a long // word anyway... return longest_word().length() > (2 * average_word_length()); } int TextAnalyst::qxz_word_occurs() { if ( !(dictionary->gotoBeginning()) ) return 0; char first; do { first = dictionary->getCursor().word()[0]; if ( (first == 'q') || (first == 'x') || (first == 'z') ) return 1; } while ( dictionary->gotoNext() ); return 0; }