Import Upstream version 0.5.3

47508373 · Apertis CI · 17e5d61f · 47508373 · 47508373 · 47508373
Commit 47508373 authored 1 year ago by Apertis CI
--- a/configure.ac
+++ b/configure.ac
@@ -17,7 +17,7 @@
 # autoconf requirements
 AC_PREREQ([2.62])
-AC_INIT([hfstospell], [0.5.2], [hfst-bugs@helsinki.fi], [hfstospell], [http://hfst.github.io])
+AC_INIT([hfstospell], [0.5.3], [hfst-bugs@helsinki.fi], [hfstospell], [http://hfst.github.io])
 LT_PREREQ([2.2.6])
@@ -34,7 +34,7 @@ AC_CONFIG_HEADERS([config.h])
 HFSTOSPELL_NAME=hfstospell
 HFSTOSPELL_MAJOR=0
 HFSTOSPELL_MINOR=5
-HFSTOSPELL_EXTENSION=.2
+HFSTOSPELL_EXTENSION=.3
 HFSTOSPELL_VERSION=$HFSTOSPELL_MAJOR.$HFSTOSPELL_MINOR$HFSTOSPELL_EXTENSION
 AC_SUBST(HFSTOSPELL_MAJOR)
 AC_SUBST(HFSTOSPELL_MINOR)

--- a/hfst-ol.cc
+++ b/hfst-ol.cc
@@ -177,8 +177,8 @@ void TransducerHeader::skip_hfst3_header(FILE * f)
            HFSTOSPELL_THROW_MESSAGE(HeaderParsingException,
                               "Found broken HFST3 header\n");
        }
-        char * headervalue = new char[remaining_header_len];
+        std::string headervalue(remaining_header_len, '\0');
-        if (fread(headervalue, remaining_header_len, 1, f) != 1)
+        if (fread(&headervalue[0], remaining_header_len, 1, f) != 1)
        {
            HFSTOSPELL_THROW_MESSAGE(HeaderParsingException,
                               "HFST3 header ended unexpectedly\n");
@@ -187,12 +187,10 @@ void TransducerHeader::skip_hfst3_header(FILE * f)
            HFSTOSPELL_THROW_MESSAGE(HeaderParsingException,
                               "Found broken HFST3 header\n");
        }
-        std::string header_tail(headervalue, remaining_header_len);
+        auto type_field = headervalue.find("type");
-        size_t type_field = header_tail.find("type");
        if (type_field != std::string::npos) {
-            if (header_tail.find("HFST_OL") != type_field + 5 &&
+            if (headervalue.find("HFST_OL") != type_field + 5 &&
-                header_tail.find("HFST_OLW") != type_field + 5) {
+                headervalue.find("HFST_OLW") != type_field + 5) {
-                delete[] headervalue;
                HFSTOSPELL_THROW_MESSAGE(
                    TransducerTypeException,
                    "Transducer has incorrect type, should be "
@@ -809,7 +807,7 @@ void Encoder::read_input_symbol(const char * s, const int s_num)
        // If this is shadowed by an ascii symbol, unshadow
        ascii_symbols[(unsigned char)(*s)] = NO_SYMBOL;
    }
    letters.add_string(s, static_cast<SymbolNumber>(s_num));
 }

--- a/main.cc
+++ b/main.cc
 /*
  Copyright 2009 University of Helsinki
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
  http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
 */
 /*
@@ -165,7 +165,7 @@ void
 do_suggest(ZHfstOspeller& speller, const std::string& str)
  {
    hfst_ospell::CorrectionQueue corrections = speller.suggest(str);
-    if (corrections.size() > 0) 
+    if (corrections.size() > 0)
    {
        hfst_fprintf(stdout, "Corrections for \"%s\":\n", str.c_str());
        while (corrections.size() > 0)
@@ -181,7 +181,7 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
                        std::string::npos)
                      {
                        hfst_fprintf(stdout, "%s    %f    %s    "
-                                       "[DISCARDED BY ANALYSES]\n", 
+                                       "[DISCARDED BY ANALYSES]\n",
                                       corr.c_str(), corrections.top().second,
                                       anals.top().first.c_str());
                      }
@@ -203,8 +203,8 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
              }
            else
              {
-                hfst_fprintf(stdout, "%s    %f\n", 
+                hfst_fprintf(stdout, "%s    %f\n",
-                                   corr.c_str(), 
+                                   corr.c_str(),
                                   corrections.top().second);
              }
            corrections.pop();
@@ -222,7 +222,7 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
 void
 do_spell(ZHfstOspeller& speller, const std::string& str)
  {
-    if (speller.spell(str)) 
+    if (speller.spell(str))
      {
        hfst_fprintf(stdout, "\"%s\" is in the lexicon...\n",
                           str.c_str());
@@ -251,7 +251,7 @@ do_spell(ZHfstOspeller& speller, const std::string& str)
              }
            if (all_no_spell)
              {
-                hfst_fprintf(stdout, 
+                hfst_fprintf(stdout,
                             "All spellings were invalidated by analysis! "
                             ".:. Not in lexicon!\n");
              }
@@ -281,43 +281,33 @@ zhfst_spell(char* zhfst_filename)
    {
      speller.read_zhfst(zhfst_filename);
    }
-  catch (hfst_ospell::ZHfstMetaDataParsingError zhmdpe)
+  catch (hfst_ospell::ZHfstMetaDataParsingError& zhmdpe)
    {
-      hfst_fprintf(stderr, "cannot finish reading zhfst archive %s:\n%s.\n", 
+      hfst_fprintf(stderr, "cannot finish reading zhfst archive %s:\n%s.\n",
                         zhfst_filename, zhmdpe.what());
-      //std::cerr << "cannot finish reading zhfst archive " << zhfst_filename <<
-      //             ":\n" << zhmdpe.what() << "." << std::endl;
      return EXIT_FAILURE;
    }
-  catch (hfst_ospell::ZHfstZipReadingError zhzre)
+  catch (hfst_ospell::ZHfstZipReadingError& zhzre)
    {
-      //std::cerr << "cannot read zhfst archive " << zhfst_filename << ":\n" 
+      hfst_fprintf(stderr,
-      //    << zhzre.what() << "." << std::endl
-      //    << "trying to read as legacy automata directory" << std::endl;
-      hfst_fprintf(stderr, 
                         "cannot read zhfst archive %s:\n"
                         "%s.\n",
                         zhfst_filename, zhzre.what());
      return EXIT_FAILURE;
    }
-  catch (hfst_ospell::ZHfstXmlParsingError zhxpe)
+  catch (hfst_ospell::ZHfstXmlParsingError& zhxpe)
    {
-      //std::cerr << "Cannot finish reading index.xml from " 
+      hfst_fprintf(stderr,
-      //  << zhfst_filename << ":" << std::endl
-      //  << zhxpe.what() << "." << std::endl;
-      hfst_fprintf(stderr, 
                         "Cannot finish reading index.xml from %s:\n"
-                         "%s.\n", 
+                         "%s.\n",
                         zhfst_filename, zhxpe.what());
      return EXIT_FAILURE;
    }
  if (verbose)
    {
-      //std::cout << "Following metadata was read from ZHFST archive:" << std::endl
+      hfst_fprintf(stdout,
-      //          << speller.metadata_dump() << std::endl;
-      hfst_fprintf(stdout, 
                         "Following metadata was read from ZHFST archive:\n"
-                         "%s\n", 
+                         "%s\n",
                         speller.metadata_dump().c_str());
    }
  speller.set_queue_limit(suggs);
@@ -354,7 +344,7 @@ zhfst_spell(char* zhfst_filename)
        std::string linestr = wide_string_to_string(wstr);
        free(str);
        str = strdup(linestr.c_str());
-#else    
+#else
    while (!std::cin.eof()) {
        std::cin.getline(str, 2000);
 #endif
@@ -398,7 +388,7 @@ int
          hfst_fprintf(stdout, "Not printing suggestions worse than best by margin %f\n", suggs);
      }
      char * str = (char*) malloc(2000);
 #ifdef WINDOWS
    SetConsoleCP(65001);
    const HANDLE stdIn = GetStdHandle(STD_INPUT_HANDLE);
@@ -410,7 +400,7 @@ int
        std::string linestr = wide_string_to_string(wstr);
        free(str);
        str = strdup(linestr.c_str());
-#else    
+#else
    while (!std::cin.eof()) {
        std::cin.getline(str, 2000);
 #endif
@@ -435,11 +425,11 @@ int
 int main(int argc, char **argv)
 {
+#if HAVE_GETOPT_H
    int c;
    //std::locale::global(std::locale(""));
-#if HAVE_GETOPT_H
    while (true) {
        static struct option long_options[] =
            {
@@ -463,7 +453,7 @@ int main(int argc, char **argv)
 #endif
            {0,              0,                 0,  0 }
            };
        int option_index = 0;
        c = getopt_long(argc, argv, "hVvqsan:w:b:t:SXm:l:k", long_options, &option_index);
        char* endptr = 0;
@@ -476,17 +466,17 @@ int main(int argc, char **argv)
            print_usage();
            return EXIT_SUCCESS;
            break;
        case 'V':
            print_version();
            return EXIT_SUCCESS;
            break;
        case 'v':
            verbose = true;
            quiet = false;
            break;
        case 'q': // fallthrough
        case 's':
            quiet = true;
@@ -550,7 +540,7 @@ int main(int argc, char **argv)
        case 'k':
            output_to_console = true;
            break;
-#endif 
+#endif
        case 'S':
            suggest = true;
            break;

--- a/office.cc
+++ b/office.cc
@@ -21,16 +21,16 @@
 */
 /*
-	Tests up to 16 variations of each input token:
+	Tests up to 8 variations of each input token:
 	- Verbatim
 	- With leading non-alphanumerics removed
 	- With trailing non-alphanumerics removed
 	- With leading and trailing non-alphanumerics removed
-	- Lower-case of all the above
+	- First-lower of all the above
-	- First-upper of all the above
 */
 #include <iostream>
+#include <iomanip>
 #include <fstream>
 #include <vector>
 #include <string>
@@ -42,6 +42,7 @@
 #include <cmath>
 #include <cerrno>
 #include <cctype>
+#include <getopt.h>
 #define U_CHARSET_IS_UTF8 1
 #include <unicode/uclean.h>
@@ -64,13 +65,18 @@ struct word_t {
 	UnicodeString buffer;
 };
 std::vector<word_t> words(16);
-std::string buffer;
+std::string buffer, wbuf;
-std::vector<std::string> alts;
+using Alt = std::pair<double,std::string>;
+std::vector<Alt> alts;
 std::unordered_set<std::string> outputs;
 UnicodeString ubuffer, uc_buffer;
 size_t cw;
 bool verbatim = false;
+bool debug = false;
+hfst_ospell::Weight max_weight = -1.0;
+hfst_ospell::Weight beam = -1.0;
+float time_cutoff = 6.0;
 bool uc_first = false;
 bool uc_all = true;
@@ -82,14 +88,18 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
 	for (size_t k=0 ; k < cw && alts.size()<suggs ; ++k) {
 		buffer.clear();
 		words[k].buffer.toUTF8String(buffer);
-		hfst_ospell::CorrectionQueue corrections = speller.suggest(buffer);
+		auto corrections = speller.suggest(buffer);
 		if (corrections.size() == 0) {
 			continue;
 		}
-		// Because speller.set_queue_limit() doesn't actually work, hard limit it here
+		for (size_t i=0, e=corrections.size() ; i<e ; ++i) {
-		for (size_t i=0, e=corrections.size() ; i<e && alts.size()<suggs ; ++i) {
+			// Work around https://github.com/hfst/hfst-ospell/issues/54
+			if (max_weight > 0.0 && corrections.top().second > max_weight) {
+				break;
+			}
+			auto w = corrections.top().second * (1.0 + k/10.0);
 			buffer.clear();
 			if (k != 0) {
@@ -112,8 +122,18 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
 				words[0].buffer.tempSubString(words[k].start + words[k].count).toUTF8String(buffer);
 			}
+			if (debug) {
+				wbuf.resize(64);
+				wbuf.resize(sprintf(&wbuf[0], " (%.2f;%zu)", corrections.top().second, k));
+				buffer += wbuf;
+			}
 			if (outputs.count(buffer) == 0) {
-				alts.push_back(buffer);
+				alts.push_back({w, buffer});
+				std::sort(alts.begin(), alts.end());
+				while (alts.size() > suggs) {
+					alts.pop_back();
+				}
 			}
 			outputs.insert(buffer);
 			corrections.pop();
@@ -123,7 +143,7 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
 	if (!alts.empty()) {
 		std::cout << "&";
 		for (auto& alt : alts) {
-			std::cout << "\t" << alt;
+			std::cout << "\t" << alt.second;
 		}
 		std::cout << std::endl;
 		return true;
@@ -167,7 +187,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
 	}
 	size_t ichStart = 0, cchUse = ubuffer.length();
-	const UChar *pwsz = ubuffer.getTerminatedBuffer();
+	auto pwsz = ubuffer.getTerminatedBuffer();
 	// Always test the full given input
 	words[0].buffer.remove();
@@ -216,7 +236,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
 	for (size_t i=0, e=cw ; i<e ; ++i) {
 		// If we are looking for suggestions, don't use the cache
-		valid_words_t::iterator it = suggs ? valid_words.end() : valid_words.find(words[i].buffer);
+		auto it = suggs ? valid_words.end() : valid_words.find(words[i].buffer);
 		if (it == valid_words.end()) {
 			buffer.clear();
@@ -224,49 +244,21 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
 			bool valid = speller.spell(buffer);
 			it = valid_words.insert(std::make_pair(words[i].buffer,valid)).first;
-			if (!valid && !verbatim) {
+			if (!valid && !verbatim && uc_first) {
-				// If the word was not valid, fold it to lower case and try again
+				// If the word was not valid, try a first-lower variant
-				buffer.clear();
-				ubuffer = words[i].buffer;
-				ubuffer.toLower();
-				ubuffer.toUTF8String(buffer);
-				// Add the lower case variant to the list so that we get suggestions using that, if need be
-				words[cw].start = words[i].start;
-				words[cw].count = words[i].count;
-				words[cw].buffer = ubuffer;
-				++cw;
-				// Don't try again if the lower cased variant has already been tried
-				valid_words_t::iterator itl = suggs ? valid_words.end() : valid_words.find(ubuffer);
-				if (itl != valid_words.end()) {
-					it->second = itl->second;
-					it = itl;
-				}
-				else {
-					valid = speller.spell(buffer);
-					it->second = valid; // Also mark the original mixed case variant as whatever the lower cased one was
-					it = valid_words.insert(std::make_pair(words[i].buffer,valid)).first;
-				}
-			}
-			if (!valid && !verbatim && (uc_all || uc_first)) {
-				// If the word was still not valid but had upper case, try a first-upper variant
 				buffer.clear();
 				ubuffer.setTo(words[i].buffer, 0, 1);
-				ubuffer.toUpper();
+				ubuffer.toLower();
-				uc_buffer.setTo(words[i].buffer, 1);
+				ubuffer.append(words[i].buffer, 1, words[i].buffer.length() - 1);
-				uc_buffer.toLower();
-				ubuffer.append(uc_buffer);
 				ubuffer.toUTF8String(buffer);
-				// Add the first-upper variant to the list so that we get suggestions using that, if need be
+				// Add the first-lower case variant to the list so that we get suggestions using that, if need be
 				words[cw].start = words[i].start;
 				words[cw].count = words[i].count;
 				words[cw].buffer = ubuffer;
 				++cw;
-				// Don't try again if the first-upper variant has already been tried
+				// Don't try again if the first-lower variant has already been tried
 				valid_words_t::iterator itl = suggs ? valid_words.end() : valid_words.find(ubuffer);
 				if (itl != valid_words.end()) {
 					it->second = itl->second;
@@ -274,7 +266,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
 				}
 				else {
 					valid = speller.spell(buffer);
-					it->second = valid; // Also mark the original mixed case variant as whatever the first-upper one was
+					it->second = valid; // Also mark the original mixed case variant as whatever the first-lower one was
 					it = valid_words.insert(std::make_pair(words[i].buffer,valid)).first;
 				}
 			}
@@ -291,8 +283,13 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
 int zhfst_spell(const char* zhfst_filename) {
 	ZHfstOspeller speller;
 	try {
+		if (debug) {
+			std::cout << "@@ Loading " << zhfst_filename << " with args max-weight=" << max_weight << ", beam=" << beam << ", time-cutoff=" << time_cutoff << std::endl;
+		}
 		speller.read_zhfst(zhfst_filename);
-		speller.set_time_cutoff(6.0);
+		speller.set_weight_limit(max_weight);
+		speller.set_beam(beam);
+		speller.set_time_cutoff(time_cutoff);
 	}
 	catch (hfst_ospell::ZHfstMetaDataParsingError zhmdpe) {
 		fprintf(stderr, "cannot finish reading zhfst archive %s:\n%s.\n", zhfst_filename, zhmdpe.what());
@@ -319,6 +316,38 @@ int zhfst_spell(const char* zhfst_filename) {
 		if (line.empty()) {
 			continue;
 		}
+		if (line.size() >= 5 && line[0] == '$' && line[1] == '$' && line[3] == ' ') {
+			if (line[2] == 'd' && isdigit(line[4]) && line.size() == 5) {
+				debug = (line[4] != '0');
+				std::cout << "@@ Option debug changed to " << debug << std::endl;
+				continue;
+			}
+			if (line[2] == 'T' && isdigit(line[4]) && line.size() == 5) {
+				verbatim = (line[4] != '0');
+				std::cout << "@@ Option verbatim changed to " << verbatim << std::endl;
+				continue;
+			}
+			if (line[2] == 'w' && isdigit(line[4])) {
+				max_weight = std::stof(&line[4]);
+				speller.set_weight_limit(max_weight);
+				std::cout << "@@ Option max-weight changed to " << max_weight << std::endl;
+				continue;
+			}
+			if (line[2] == 'b' && isdigit(line[4])) {
+				beam = std::stof(&line[4]);
+				speller.set_beam(beam);
+				std::cout << "@@ Option beam changed to " << beam << std::endl;
+				continue;
+			}
+			if (line[2] == 't' && isdigit(line[4])) {
+				time_cutoff = std::stof(&line[4]);
+				speller.set_time_cutoff(time_cutoff);
+				std::cout << "@@ Option time-cutoff changed to " << time_cutoff << std::endl;
+				continue;
+			}
+		}
 		// Just in case anyone decides to use the speller for a minor eternity
 		if (valid_words.size() > 20480) {
 			valid_words.clear();
@@ -345,6 +374,19 @@ int zhfst_spell(const char* zhfst_filename) {
    return EXIT_SUCCESS;
 }
+void print_help() {
+	std::cout
+		<< "Usage: hfst-ospell [options] zhfst-archive\n"
+		<< "\n"
+		<< " -h, --help            Shows this help\n"
+		<< " -d, --debug           Debug output with weights attached to results\n"
+		<< " -T, --verbatim        Disables case-folding and non-alphanumeric trimming\n"
+		<< " -w, --max-weight=W    Suppress corrections with weights above W\n"
+		<< " -b, --beam=W          Suppress corrections worse than best candidate by more than W\n"
+		<< " -t, --time-cutoff=T   Stop trying to find better corrections after T seconds; defaults to 6.0\n"
+		<< std::flush;
+}
 int main(int argc, char **argv) {
 	UErrorCode status = U_ZERO_ERROR;
 	u_init(&status);
@@ -356,22 +398,60 @@ int main(int argc, char **argv) {
 	ucnv_setDefaultName("UTF-8");
 	uloc_setDefault("en_US_POSIX", &status);
-	std::vector<std::string> args(argv, argv+argc);
+	struct option long_options[] =
-	for (std::vector<std::string>::iterator it=args.begin() ; it != args.end() ; ) {
+		{
-		if (*it == "--verbatim") {
+		{"help",         no_argument,       0, 'h'},
-			verbatim = true;
+		{"debug",        no_argument,       0, 'd'},
-			it = args.erase(it);
+		{"verbatim",     no_argument,       0, 'T'},
+		{"max-weight",   required_argument, 0, 'w'},
+		{"beam",         required_argument, 0, 'b'},
+		{"time-cutoff",  required_argument, 0, 't'},
+		{0,              0,                 0,  0 }
+		};
+	int c = 0;
+	while (true) {
+		int option_index = 0;
+		c = getopt_long(argc, argv, "hdTw:b:t:", long_options, &option_index);
+		if (c == -1) {
+			break;
 		}
-		else {
-			++it;
+		switch (c) {
+		case 'h':
+			print_help();
+			return EXIT_SUCCESS;
+		case 'd':
+			debug = true;
+			break;
+		case 'T':
+			verbatim = true;
+			break;
+		case 'w':
+			max_weight = std::stof(optarg);
+			break;
+		case 'b':
+			beam = std::stof(optarg);
+			break;
+		case 't':
+			time_cutoff = std::stof(optarg);
+			break;
 		}
 	}
-	if (args.size() < 2) {
+	if (optind >= argc) {
 		throw std::invalid_argument("Must pass a zhfst as argument");
 	}
-	int rv = zhfst_spell(args[1].c_str());
+	std::cerr << std::fixed << std::setprecision(2);
+	std::cout << std::fixed << std::setprecision(2);
+	int rv = zhfst_spell(argv[optind]);
 	u_cleanup();
 	return rv;

--- a/ol-exceptions.h
+++ b/ol-exceptions.h
@@ -4,6 +4,7 @@
 #include "hfstol-stdafx.h"
 #include <string>
 #include <sstream>
+#include <cstring>
 namespace hfst_ospell
 {
@@ -21,7 +22,7 @@ struct OspellException
    size_t line;      //!< line number of exception
    OspellException(void) {}
 //!
 //! construct exception with name, file and location
    OspellException(const std::string &name,const std::string &file,size_t line):
@@ -29,7 +30,7 @@ struct OspellException
    file(file),
    line(line)
    {}
    //!
    //! create string representation of exception for output
    std::string operator() (void) const
@@ -45,7 +46,7 @@ struct OspellException
      {
        std::ostringstream o;
        o << file << ":" << line << ":" << name;
-        return o.str().c_str();
+        return strdup(o.str().c_str());
      }
 };
@@ -59,7 +60,7 @@ struct OspellException
 #define HFSTOSPELL_EXCEPTION_CHILD_DECLARATION(CHILD) \
    struct CHILD : public OspellException \
    { CHILD(const std::string &name,const std::string &file,size_t line):\
-    OspellException(name,file,line) {}} 
+    OspellException(name,file,line) {}}
 #define HFST_CATCH(E)                           \
    catch (const E &e)                          \

--- a/ospell.cc
+++ b/ospell.cc
@@ -152,31 +152,31 @@ TreeNode TreeNode::update(SymbolNumber symbol,
 bool TreeNode::try_compatible_with(FlagDiacriticOperation op)
 {
    switch (op.Operation()) {
    case P: // positive set
        flag_state[op.Feature()] = op.Value();
        return true;
    case N: // negative set (literally, in this implementation)
        flag_state[op.Feature()] = -1*op.Value();
        return true;
    case R: // require
        if (op.Value() == 0) { // "plain" require, return false if unset
            return (flag_state[op.Feature()] != 0);
        }
        return (flag_state[op.Feature()] == op.Value());
    case D: // disallow
        if (op.Value() == 0) { // "plain" disallow, return true if unset
            return (flag_state[op.Feature()] == 0);
        }
        return (flag_state[op.Feature()] != op.Value());
    case C: // clear
        flag_state[op.Feature()] = 0;
        return true;
    case U: // unification
        /* if the feature is unset OR the feature is to this value already OR
           the feature is negatively set to something else than this value */
@@ -190,7 +190,7 @@ bool TreeNode::try_compatible_with(FlagDiacriticOperation op)
        }
        return false;
    }
    return false; // to make the compiler happy
 }
@@ -204,7 +204,11 @@ Speller::Speller(Transducer* mutator_ptr, Transducer* lexicon_ptr):
        alphabet_translator(SymbolVector()),
        operations(lexicon->get_operations()),
        limiting(None),
-        mode(Correct)
+        mode(Correct),
+        max_time(-1.0),
+        start_clock(0),
+        call_counter(0),
+        limit_reached(false)
            {
                if (mutator != NULL) {
                    build_alphabet_translator();
@@ -228,7 +232,7 @@ void Speller::lexicon_epsilons(void)
    }
    TransitionTableIndex next = lexicon->next(next_node.lexicon_state, 0);
    STransition i_s = lexicon->take_epsilons_and_flags(next);
    while (i_s.symbol != NO_SYMBOL) {
        if (is_under_weight_limit(next_node.weight + i_s.weight)) {
            if (lexicon->transitions.input_symbol(next) == 0) {
@@ -326,7 +330,7 @@ void Speller::mutator_epsilons(void)
    }
    TransitionTableIndex next_m = mutator->next(next_node.mutator_state, 0);
    STransition mutator_i_s = mutator->take_epsilons(next_m);
    while (mutator_i_s.symbol != NO_SYMBOL) {
        if (mutator_i_s.symbol == 0) {
            if (is_under_weight_limit(
@@ -460,12 +464,9 @@ bool Transducer::initialize_input_vector(SymbolVector & input_vector,
                                         char * line)
 {
    input_vector.clear();
-    SymbolNumber k = NO_SYMBOL;
    char ** inpointer = &line;
-    char * oldpointer;
    while (**inpointer != '\0') {
-        oldpointer = *inpointer;
+        SymbolNumber k = encoder->find_key(inpointer);
-        k = encoder->find_key(inpointer);
        if (k == NO_SYMBOL) { // no tokenization from alphabet
            // for real handling of other and identity for unseen symbols,
            // use the Speller interface analyse()!
@@ -532,18 +533,18 @@ AnalysisQueue Transducer::lookup(char * line)
                i_s = take_epsilons_and_flags(next_index);
            }
        }
        // input consumption loop
        unsigned int input_state = next_node.input_state;
        if (input_state < input.size() &&
            has_transitions(
                next_node.lexicon_state + 1, input[input_state])) {
            next_index = next(next_node.lexicon_state,
                              input[input_state]);
            STransition i_s = take_non_epsilons(next_index,
                                                input[input_state]);
            while (i_s.symbol != NO_SYMBOL) {
                queue.push_back(next_node.update(
                                    i_s.symbol,
@@ -551,18 +552,18 @@ AnalysisQueue Transducer::lookup(char * line)
                                    next_node.mutator_state,
                                    i_s.index,
                                    i_s.weight));
                ++next_index;
                i_s = take_non_epsilons(next_index, input[input_state]);
            }
        }
    }
    for (auto& it : outputs) {
        analyses.push(StringWeightPair(it.first, it.second));
    }
    return analyses;
 }
@@ -729,7 +730,7 @@ Weight Transducer::final_weight(const TransitionTableIndex i) const
 bool
 Transducer::is_flag(const SymbolNumber symbol)
 {
-    return alphabet.is_flag(symbol); 
+    return alphabet.is_flag(symbol);
 }
 bool
@@ -888,7 +889,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
    std::map<std::string, Weight> corrections;
    SymbolNumber first_input = (input.size() == 0) ? 0 : input[0];
    if (cache[first_input].empty) {
-        build_cache(first_input);
+        build_cache(first_input); // XXX: cache corrupts limit!
    }
    if (input.size() <= 1) {
        // get the cached results and we're done
@@ -908,6 +909,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
                    }
                }
            }
+        set_limiting_behaviour(nbest, maxweight, beam);
        adjust_weight_limits(nbest, beam);
        for(auto& it : *results) {
              // Then collect the results
@@ -946,6 +948,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
        */
        next_node = queue.back();
        queue.pop_back();
+        set_limiting_behaviour(nbest, maxweight, beam); // XXX: need to reset
        adjust_weight_limits(nbest, beam);
        // if we can't get an acceptable result, never mind
        if (next_node.weight > limit) {
@@ -1005,6 +1008,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
            }
        }
    }
+    //cache[first_input].clear();
    return correction_queue;
 }
@@ -1031,12 +1035,16 @@ void Speller::set_limiting_behaviour(int nbest, Weight maxweight, Weight beam)
        limiting = Nbest;
    } else if (maxweight < 0.0 && nbest == 0 && beam >= 0.0) {
        limiting = Beam;
+    } else {
+        return;
    }
 }
 void Speller::adjust_weight_limits(int nbest, Weight beam)
 {
-    if (limiting == Nbest && nbest_queue.size() >= nbest) {
+    if (limiting == MaxWeight) {
+        return;
+    } else if (limiting == Nbest && nbest_queue.size() >= nbest) {
        limit = nbest_queue.get_highest();
    } else if (limiting == MaxWeightNbest && nbest_queue.size() >= nbest) {
        limit = std::min(limit, nbest_queue.get_lowest());
@@ -1201,7 +1209,7 @@ void Speller::add_symbol_to_alphabet_translator(SymbolNumber to_sym)
 }
 } // namespace hfst_ospell
 char*
 hfst_strndup(const char* s, size_t n)
  {