Skip to content
Snippets Groups Projects
Commit c2c453aa authored by Tino Didriksen's avatar Tino Didriksen Committed by Dylan Aïssi
Browse files

Import Debian changes 0.5.4-1

parents 1b7b52b4 9a29951b
Branches debian/trixie
Tags debian/0.5.4-1
1 merge request!3Update from debian/trixie for apertis/v2026dev2
Pipeline #861569 passed
authors:
- family-names: Pirinen
given-names: Flammie A
orcid: "https://orcid.org/0000-0003-1207-5395"
- family-names: Hardwick
given-names: Sam
cff-version: 1.2.0
date-released: "2022-03-13"
keywords:
- spell-checking
- nlp
message: If you use this software, please cite it using these metadata.
repository-code: "https://github.com/hfst/hfst-ospell"
title: HFST ospell
version: 0.5.3
preferred-citation:
authors:
- family-names: Pirinen
given-names: Flammie A
- family-names: Hardwick
given-names: Sam
- family-names: Lindén
given-names: Krister
title: "Effect of language and error models on efficiency of finite-state spell-checking and correction"
type: article
license: GPL-3.0
...@@ -27,8 +27,12 @@ endif # EXTRA_DEMOS ...@@ -27,8 +27,12 @@ endif # EXTRA_DEMOS
if HFST_OSPELL_OFFICE if HFST_OSPELL_OFFICE
MAYBE_HFST_OSPELL_OFFICE=hfst-ospell-office MAYBE_HFST_OSPELL_OFFICE=hfst-ospell-office
endif # HFST_OSPELL_OFFICE endif # HFST_OSPELL_OFFICE
if HFST_OSPELL_PREDICT
MAYBE_HFST_OSPELL_PREDICT=hfst-ospell-predict
endif
bin_PROGRAMS=hfst-ospell $(MAYBE_HFST_OSPELL_OFFICE) $(CONFERENCE_DEMOS) bin_PROGRAMS=hfst-ospell $(MAYBE_HFST_OSPELL_OFFICE) $(CONFERENCE_DEMOS) \
$(MAYBE_HFST_OSPELL_PREDICT)
lib_LTLIBRARIES=libhfstospell.la lib_LTLIBRARIES=libhfstospell.la
man1_MANS=hfst-ospell.1 hfst-ospell-office.1 man1_MANS=hfst-ospell.1 hfst-ospell-office.1
...@@ -63,6 +67,13 @@ hfst_ospell_LDADD=libhfstospell.la ...@@ -63,6 +67,13 @@ hfst_ospell_LDADD=libhfstospell.la
hfst_ospell_CXXFLAGS=$(AM_CXXFLAGS) $(CXXFLAGS) \ hfst_ospell_CXXFLAGS=$(AM_CXXFLAGS) $(CXXFLAGS) \
$(PKG_CXXFLAGS) $(PKG_CXXFLAGS)
if HFST_OSPELL_PREDICT
hfst_ospell_predict_SOURCES=predict.cc
hfst_ospell_predict_LDADD=libhfstospell.la
hfst_ospell_predict_CXXFLAGS=$(AM_CXXFLAGS) $(CXXFLAGS) \
$(PKG_CXXFLAGS)
endif
if HFST_OSPELL_OFFICE if HFST_OSPELL_OFFICE
hfst_ospell_office_SOURCES=office.cc hfst_ospell_office_SOURCES=office.cc
......
File moved
...@@ -58,7 +58,7 @@ inline std::string extract_to_mem(archive* ar, archive_entry* entry) { ...@@ -58,7 +58,7 @@ inline std::string extract_to_mem(archive* ar, archive_entry* entry) {
std::string buff(buffsize, 0); std::string buff(buffsize, 0);
for (;;) { for (;;) {
ssize_t curr = archive_read_data(ar, &buff[0] + full_length, buffsize - full_length); auto curr = archive_read_data(ar, &buff[0] + full_length, buffsize - full_length);
if (0 == curr) { if (0 == curr) {
break; break;
} }
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
# autoconf requirements # autoconf requirements
AC_PREREQ([2.62]) AC_PREREQ([2.62])
AC_INIT([hfstospell], [0.5.3], [hfst-bugs@helsinki.fi], [hfstospell], [http://hfst.github.io]) AC_INIT([hfstospell],[0.5.4],[hfst-bugs@helsinki.fi],[hfstospell],[http://hfst.github.io])
LT_PREREQ([2.2.6]) LT_PREREQ([2.2.6])
...@@ -34,7 +34,7 @@ AC_CONFIG_HEADERS([config.h]) ...@@ -34,7 +34,7 @@ AC_CONFIG_HEADERS([config.h])
HFSTOSPELL_NAME=hfstospell HFSTOSPELL_NAME=hfstospell
HFSTOSPELL_MAJOR=0 HFSTOSPELL_MAJOR=0
HFSTOSPELL_MINOR=5 HFSTOSPELL_MINOR=5
HFSTOSPELL_EXTENSION=.3 HFSTOSPELL_EXTENSION=.4
HFSTOSPELL_VERSION=$HFSTOSPELL_MAJOR.$HFSTOSPELL_MINOR$HFSTOSPELL_EXTENSION HFSTOSPELL_VERSION=$HFSTOSPELL_MAJOR.$HFSTOSPELL_MINOR$HFSTOSPELL_EXTENSION
AC_SUBST(HFSTOSPELL_MAJOR) AC_SUBST(HFSTOSPELL_MAJOR)
AC_SUBST(HFSTOSPELL_MINOR) AC_SUBST(HFSTOSPELL_MINOR)
...@@ -54,8 +54,13 @@ AM_CONDITIONAL([EXTRA_DEMOS], [test x$enable_extra_demos != xno]) ...@@ -54,8 +54,13 @@ AM_CONDITIONAL([EXTRA_DEMOS], [test x$enable_extra_demos != xno])
AC_ARG_ENABLE([hfst_ospell_office], AC_ARG_ENABLE([hfst_ospell_office],
[AS_HELP_STRING([--enable-hfst-ospell-office], [AS_HELP_STRING([--enable-hfst-ospell-office],
[build hfst-ospell-office @<:@default=yes@:>@])], [build hfst-ospell-office @<:@default=yes@:>@])],
[enable_hfst_ospell_ofiice=$enableval], [enable_hfst_ospell_office=yes]) [enable_hfst_ospell_office=$enableval], [enable_hfst_ospell_office=yes])
AM_CONDITIONAL([HFST_OSPELL_OFFICE], [test x$enable_hfst_ospell_office != xno]) AM_CONDITIONAL([HFST_OSPELL_OFFICE], [test x$enable_hfst_ospell_office != xno])
AC_ARG_ENABLE([hfst_ospell_predict],
[AS_HELP_STRING([--enable-hfst-ospell-predict],
[build hfst-ospell-predict @<:@default=yes@:>@])],
[enable_hfst_ospell_predict=$enableval], [enable_hfst_ospell_predict=yes])
AM_CONDITIONAL([HFST_OSPELL_PREDICT], [test x$enable_hfst_ospell_predict != xno])
AC_ARG_ENABLE([zhfst], AC_ARG_ENABLE([zhfst],
[AS_HELP_STRING([--enable-zhfst], [AS_HELP_STRING([--enable-zhfst],
[support zipped complex automaton sets @<:@default=check@:>@])], [support zipped complex automaton sets @<:@default=check@:>@])],
...@@ -84,7 +89,7 @@ AS_IF([test "x$with_extract" = xmem], [AC_DEFINE([ZHFST_EXTRACT_TO_MEM], [1], ...@@ -84,7 +89,7 @@ AS_IF([test "x$with_extract" = xmem], [AC_DEFINE([ZHFST_EXTRACT_TO_MEM], [1],
m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
AC_PROG_CC AC_PROG_CC
AC_PROG_CXX AC_PROG_CXX
AC_LIBTOOL_WIN32_DLL
LT_INIT LT_INIT
AC_PROG_INSTALL AC_PROG_INSTALL
AC_PROG_LN_S AC_PROG_LN_S
...@@ -146,17 +151,17 @@ AC_CHECK_FUNCS([strndup error]) ...@@ -146,17 +151,17 @@ AC_CHECK_FUNCS([strndup error])
# Require highest supported C++ standard # Require highest supported C++ standard
AC_LANG(C++) AC_LANG(C++)
AX_CHECK_COMPILE_FLAG([-std=c++20], [CXXFLAGS="$CXXFLAGS -std=c++20"], [ AX_CHECK_COMPILE_FLAG([-std=c++23], [CXXFLAGS="$CXXFLAGS -std=c++23"], [
AX_CHECK_COMPILE_FLAG([-std=c++2a], [CXXFLAGS="$CXXFLAGS -std=c++2a"], [ AX_CHECK_COMPILE_FLAG([-std=c++2b], [CXXFLAGS="$CXXFLAGS -std=c++2b"], [
AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXFLAGS="$CXXFLAGS -std=c++17"], [ AX_CHECK_COMPILE_FLAG([-std=c++20], [CXXFLAGS="$CXXFLAGS -std=c++20"], [
AX_CHECK_COMPILE_FLAG([-std=c++1z], [CXXFLAGS="$CXXFLAGS -std=c++1z"], [ AX_CHECK_COMPILE_FLAG([-std=c++2a], [CXXFLAGS="$CXXFLAGS -std=c++2a"], [
AX_CHECK_COMPILE_FLAG([-std=c++14], [CXXFLAGS="$CXXFLAGS -std=c++14"], [ AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXFLAGS="$CXXFLAGS -std=c++17"], [
AX_CHECK_COMPILE_FLAG([-std=c++1y], [CXXFLAGS="$CXXFLAGS -std=c++1y"], [ AX_CHECK_COMPILE_FLAG([-std=c++1z], [CXXFLAGS="$CXXFLAGS -std=c++1z"], [
AC_MSG_ERROR([Could not enable at least C++1y (C++14) - upgrade your compiler]) AC_MSG_ERROR([Could not enable at least C++1z (C++17) - upgrade your compiler])
]) ])
]) ])
]) ])
]) ])
]) ])
]) ])
......
hfst-ospell (0.5.4-1) unstable; urgency=medium
* Update to latest upstream
+ Fix FTBFS by Michael.Karcher@fu-berlin.de (Closes: #988129)
-- Tino Didriksen <tino@didriksen.cc> Tue, 20 Feb 2024 12:46:23 +0100
hfst-ospell (0.5.3-2) unstable; urgency=medium
* Team upload
* Fix gcc-13 build with upstream patch (Closes: #1037690)
-- Bastian Germann <bage@debian.org> Tue, 08 Aug 2023 21:42:28 +0200
hfst-ospell (0.5.3-1) unstable; urgency=low hfst-ospell (0.5.3-1) unstable; urgency=low
[ Tino Didriksen ] [ Tino Didriksen ]
......
Source: hfst-ospell Source: hfst-ospell
Section: science Section: science
Priority: optional Priority: optional
Maintainer: Debian Science Team <debian-science-maintainers@alioth-lists.debian.net> Maintainer: Debian Science Maintainers <debian-science-maintainers@alioth-lists.debian.net>
Uploaders: Tino Didriksen <tino@didriksen.cc>, Uploaders: Tino Didriksen <tino@didriksen.cc>,
Kartik Mistry <kartik@debian.org> Kartik Mistry <kartik@debian.org>
Build-Depends: autoconf, Build-Depends: autoconf,
...@@ -10,7 +10,7 @@ Build-Depends: autoconf, ...@@ -10,7 +10,7 @@ Build-Depends: autoconf,
libicu-dev, libicu-dev,
pkg-config, pkg-config,
zip zip
Standards-Version: 4.6.0 Standards-Version: 4.6.2
Homepage: https://github.com/hfst/hfst-ospell Homepage: https://github.com/hfst/hfst-ospell
Vcs-Git: https://salsa.debian.org/science-team/hfst-ospell.git Vcs-Git: https://salsa.debian.org/science-team/hfst-ospell.git
Vcs-Browser: https://salsa.debian.org/science-team/hfst-ospell Vcs-Browser: https://salsa.debian.org/science-team/hfst-ospell
...@@ -31,6 +31,8 @@ Multi-Arch: same ...@@ -31,6 +31,8 @@ Multi-Arch: same
Section: libs Section: libs
Depends: ${misc:Depends}, ${shlibs:Depends} Depends: ${misc:Depends}, ${shlibs:Depends}
Provides: libhfstospell Provides: libhfstospell
Conflicts: libhfstospell, libhfstospell11
Replaces: libhfstospell, libhfstospell11
Description: HFST spell checker runtime libraries Description: HFST spell checker runtime libraries
Minimal HFST optimized lookup format based spell checker library and Minimal HFST optimized lookup format based spell checker library and
a demonstrational implementation of command line based spell checker. a demonstrational implementation of command line based spell checker.
......
AUTHORS AUTHORS
NEWS NEWS
README README.md
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
export DH_OPTIONS export DH_OPTIONS
export LC_ALL=C.UTF-8 export LC_ALL=C.UTF-8
export DEB_BUILD_MAINT_OPTIONS = hardening=+all export "DEB_BUILD_MAINT_OPTIONS=hardening=+all optimize=+lto reproducible=+fixfilepath"
DPKG_EXPORT_BUILDFLAGS = 1 DPKG_EXPORT_BUILDFLAGS = 1
include /usr/share/dpkg/buildflags.mk include /usr/share/dpkg/buildflags.mk
...@@ -23,6 +23,3 @@ ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),) ...@@ -23,6 +23,3 @@ ifeq ($(filter nocheck,$(DEB_BUILD_OPTIONS)),)
override_dh_auto_test: override_dh_auto_test:
dh_auto_test --no-parallel dh_auto_test --no-parallel
endif endif
override_dh_missing:
dh_missing --fail-missing
version=4 version=4
https://github.com/hfst/hfst-ospell/releases \ opts="searchmode=plain" \
.*/@PACKAGE@-(\d[\d.]*)\.tar\.bz2 debian uupdate https://api.github.com/repos/hfst/@PACKAGE@/releases \
https://github.com/hfst/@PACKAGE@/releases/download/v(?:\d[\d.]*)/@PACKAGE@@ANY_VERSION@@ARCHIVE_EXT@
...@@ -59,9 +59,9 @@ uint16_t read_uint16_flipping_endianness(FILE * f) ...@@ -59,9 +59,9 @@ uint16_t read_uint16_flipping_endianness(FILE * f)
uint16_t read_uint16_flipping_endianness(char * raw) uint16_t read_uint16_flipping_endianness(char * raw)
{ {
uint16_t result = 0; uint16_t result = 0;
result |= *(raw + 1); result |= static_cast<uint8_t>(*(raw + 1));
result <<= 8; result <<= 8;
result |= *raw; result |= static_cast<uint8_t>(*raw);
return result; return result;
} }
...@@ -85,13 +85,13 @@ uint32_t read_uint32_flipping_endianness(FILE * f) ...@@ -85,13 +85,13 @@ uint32_t read_uint32_flipping_endianness(FILE * f)
uint32_t read_uint32_flipping_endianness(char * raw) uint32_t read_uint32_flipping_endianness(char * raw)
{ {
uint32_t result = 0; uint32_t result = 0;
result |= *(raw + 3); result |= static_cast<uint8_t>(*(raw + 3));
result <<= 8; result <<= 8;
result |= *(raw + 2); result |= static_cast<uint8_t>(*(raw + 2));
result <<= 8; result <<= 8;
result |= *(raw + 1); result |= static_cast<uint8_t>(*(raw + 1));
result <<= 8; result <<= 8;
result |= *raw; result |= static_cast<uint8_t>(*raw);
return result; return result;
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <climits> #include <climits>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstdint>
#include <iostream> #include <iostream>
#include <cstring> #include <cstring>
#include <set> #include <set>
......
...@@ -164,6 +164,10 @@ bool print_short_help(void) ...@@ -164,6 +164,10 @@ bool print_short_help(void)
void void
do_suggest(ZHfstOspeller& speller, const std::string& str) do_suggest(ZHfstOspeller& speller, const std::string& str)
{ {
if (verbose)
{
hfst_fprintf(stdout, "Suggesting for %s:\n", str.c_str());
}
hfst_ospell::CorrectionQueue corrections = speller.suggest(str); hfst_ospell::CorrectionQueue corrections = speller.suggest(str);
if (corrections.size() > 0) if (corrections.size() > 0)
{ {
......
/*
Copyright 2022 Flammie A Pirinen
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
This is a toy commandline utility for testing spellers on standard io.
*/
#if HAVE_CONFIG_H
#include <config.h>
#else
#define PACKAGE_NAME
#define PACKAGE_BUGREPORT
#define PACKAGE_STRING
#endif
#if HAVE_GETOPT_H
#include <getopt.h>
#endif
#ifdef WINDOWS
#include <windows.h>
#endif
#include <cstdarg>
#include <errno.h>
#include <stdio.h>
#include "ZHfstOspeller.h"
#include "ol-exceptions.h"
#include "ospell.h"
using hfst_ospell::Transducer;
using hfst_ospell::ZHfstOspeller;
static bool quiet = false;
static bool verbose = false;
static bool analyse = false;
static unsigned long suggs = 0;
static hfst_ospell::Weight max_weight = -1.0;
static hfst_ospell::Weight beam = -1.0;
static float time_cutoff = 0.0;
static std::string error_model_filename = "";
static std::string lexicon_filename = "";
static std::string continuation_marker = "";
#ifdef WINDOWS
static bool output_to_console = false;
#endif
#ifdef WINDOWS
static std::string
wide_string_to_string(const std::wstring &wstr)
{
int size_needed = WideCharToMultiByte(
CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
std::string str(size_needed, 0);
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &str[0],
size_needed, NULL, NULL);
return str;
}
#endif
// C++20, https://stackoverflow.com/a/2072890/4109773
inline bool
ends_with(std::string const &value, std::string const &ending)
{
if (ending.size() > value.size())
return false;
return std::equal(ending.rbegin(), ending.rend(), value.rbegin());
}
static int
hfst_fprintf(FILE *stream, const char *format, ...)
{
va_list args;
va_start(args, format);
#ifdef WINDOWS
if (output_to_console && (stream == stdout || stream == stderr))
{
char buffer[1024];
int r = vsprintf(buffer, format, args);
va_end(args);
if (r < 0)
return r;
HANDLE stdHandle = GetStdHandle(STD_OUTPUT_HANDLE);
if (stream == stderr)
stdHandle = GetStdHandle(STD_ERROR_HANDLE);
std::string pstr(buffer);
DWORD numWritten = 0;
int wchars_num
= MultiByteToWideChar(CP_UTF8, 0, pstr.c_str(), -1, NULL, 0);
wchar_t *wstr = new wchar_t[wchars_num];
MultiByteToWideChar(CP_UTF8, 0, pstr.c_str(), -1, wstr, wchars_num);
int retval = WriteConsoleW(stdHandle, wstr, wchars_num - 1,
&numWritten, NULL);
delete[] wstr;
return retval;
}
else
{
int retval = vfprintf(stream, format, args);
va_end(args);
return retval;
}
#else
errno = 0;
int retval = vfprintf(stream, format, args);
if (retval < 0)
{
perror("hfst_fprintf");
}
va_end(args);
return retval;
#endif
}
bool
print_usage(void)
{
std::cout
<< "\n"
<< "Usage: " PACKAGE_NAME " [OPTIONS] [ZHFST-ARCHIVE]\n"
<< "Use automata in ZHFST-ARCHIVE or from OPTIONS to check and "
"predict\n"
"\n"
<< " -h, --help Print this help message\n"
<< " -V, --version Print version information\n"
<< " -v, --verbose Be verbose\n"
<< " -q, --quiet Don't be verbose (default)\n"
<< " -s, --silent Same as quiet\n"
<< " -a, --analyse Analyse strings and corrections\n"
<< " -n, --limit=N Show at most N predictions\n"
<< " -w, --max-weight=W Suppress corrections with weights "
"above W\n"
<< " -b, --beam=W Suppress corrections worse than best "
"candidate by more than W\n"
<< " -t, --time-cutoff=T Stop trying to find better "
"corrections after T seconds (T is a float)\n"
<< " -C, --continuation=C Word-continuation character is C\n"
<< " -m, --error-model Use this error model (must also give "
"lexicon as option)\n"
<< " -l, --lexicon Use this lexicon (must also give erro "
"model as option)\n"
<<
#ifdef WINDOWS
" -k, --output-to-console Print output to console "
"(Windows-specific)"
<<
#endif
"\n"
<< "\n"
<< "Report bugs to " PACKAGE_BUGREPORT "\n"
<< "\n";
return true;
}
bool
print_version(void)
{
std::cout << "\n" PACKAGE_STRING << std::endl
<< "copyright (C) 2009 - 2022 University of Helsinki\n";
return true;
}
bool
print_short_help(void)
{
print_usage();
return true;
}
void
do_predict(ZHfstOspeller &speller, const std::string &str)
{
if (verbose)
{
hfst_fprintf(stdout, "Suggesting for %s:\n", str.c_str());
}
hfst_ospell::CorrectionQueue corrections = speller.suggest(str);
if (corrections.size() > 0)
{
hfst_fprintf(stdout, "Corrections for \"%s\":\n", str.c_str());
while (corrections.size() > 0)
{
const std::string &corr = corrections.top().first;
if (analyse)
{
hfst_ospell::AnalysisQueue anals = speller.analyse(corr, true);
bool all_discarded = true;
while (anals.size() > 0)
{
if (anals.top().first.find("Use/SpellNoSugg")
!= std::string::npos)
{
hfst_fprintf(stdout,
"%s %f %s "
"[DISCARDED BY ANALYSES]\n",
corr.c_str(), corrections.top().second,
anals.top().first.c_str());
}
else
{
all_discarded = false;
hfst_fprintf(stdout, "%s %f %s\n", corr.c_str(),
corrections.top().second,
anals.top().first.c_str());
}
anals.pop();
}
if (all_discarded)
{
hfst_fprintf(stdout, "All corrections were "
"invalidated by analysis! "
"No score!\n");
}
}
else
{
if ((!continuation_marker.empty())
&& (ends_with(corr, continuation_marker)))
{
std::string chomped = corr.substr(
0, corr.size() - continuation_marker.size());
hfst_fprintf(stdout, "%s... %f (continuation %s)\n",
chomped.c_str(), corrections.top().second,
continuation_marker.c_str());
}
else
{
hfst_fprintf(stdout, "%s %f\n", corr.c_str(),
corrections.top().second);
}
}
corrections.pop();
}
hfst_fprintf(stdout, "\n");
}
else
{
hfst_fprintf(stdout, "Unable to correct \"%s\"!\n\n", str.c_str());
}
}
void
do_spell(ZHfstOspeller &speller, const std::string &str)
{
if (speller.spell(str))
{
hfst_fprintf(stdout, "\"%s\" is in the lexicon...\n", str.c_str());
if (analyse)
{
hfst_fprintf(stdout, "analysing:\n");
hfst_ospell::AnalysisQueue anals = speller.analyse(str, false);
bool all_no_spell = true;
while (anals.size() > 0)
{
if (anals.top().first.find("Use/-Spell") != std::string::npos)
{
hfst_fprintf(stdout, "%s %f [DISCARDED AS -Spell]\n",
anals.top().first.c_str(),
anals.top().second);
}
else
{
all_no_spell = false;
hfst_fprintf(stdout, "%s %f\n",
anals.top().first.c_str(),
anals.top().second);
}
anals.pop();
}
if (all_no_spell)
{
hfst_fprintf(stdout,
"All spellings were invalidated by analysis! "
".:. Not in lexicon!\n");
}
}
hfst_fprintf(stdout, "(but correcting anyways)\n", str.c_str());
do_predict(speller, str);
}
else
{
hfst_fprintf(stdout, "\"%s\" is NOT in the lexicon:\n", str.c_str());
do_predict(speller, str);
}
}
int
zhfst_spell(char *zhfst_filename)
{
ZHfstOspeller speller;
try
{
speller.read_zhfst(zhfst_filename);
}
catch (hfst_ospell::ZHfstMetaDataParsingError &zhmdpe)
{
hfst_fprintf(stderr, "cannot finish reading zhfst archive %s:\n%s.\n",
zhfst_filename, zhmdpe.what());
return EXIT_FAILURE;
}
catch (hfst_ospell::ZHfstZipReadingError &zhzre)
{
hfst_fprintf(stderr,
"cannot read zhfst archive %s:\n"
"%s.\n",
zhfst_filename, zhzre.what());
return EXIT_FAILURE;
}
catch (hfst_ospell::ZHfstXmlParsingError &zhxpe)
{
hfst_fprintf(stderr,
"Cannot finish reading index.xml from %s:\n"
"%s.\n",
zhfst_filename, zhxpe.what());
return EXIT_FAILURE;
}
if (verbose)
{
hfst_fprintf(stdout,
"Following metadata was read from ZHFST archive:\n"
"%s\n",
speller.metadata_dump().c_str());
}
speller.set_queue_limit(suggs);
if (suggs != 0 && verbose)
{
hfst_fprintf(stdout, "Printing only %lu top predictions per line\n",
suggs);
}
speller.set_weight_limit(max_weight);
if (max_weight >= 0.0 && verbose)
{
hfst_fprintf(stdout, "Not printing predictions worse than %f\n",
max_weight);
}
speller.set_beam(beam);
if (beam >= 0.0 && verbose)
{
hfst_fprintf(stdout,
"Not printing predictions worse than best by margin %f\n",
beam);
}
speller.set_time_cutoff(time_cutoff);
if (time_cutoff > 0.0 && verbose)
{
hfst_fprintf(
stdout, "Not trying to find better predictions after %f seconds\n",
time_cutoff);
}
if ((!continuation_marker.empty()) && verbose)
{
hfst_fprintf(stdout, "%s marks incomplete words\n",
continuation_marker.c_str());
}
char *str = (char *)malloc(2000);
#ifdef WINDOWS
SetConsoleCP(65001);
const HANDLE stdIn = GetStdHandle(STD_INPUT_HANDLE);
WCHAR buffer[0x1000];
DWORD numRead = 0;
while (ReadConsoleW(stdIn, buffer, sizeof buffer, &numRead, NULL))
{
std::wstring wstr(buffer, numRead - 1); // skip the newline
std::string linestr = wide_string_to_string(wstr);
free(str);
str = strdup(linestr.c_str());
#else
while (!std::cin.eof())
{
std::cin.getline(str, 2000);
#endif
if (str[0] == '\0')
{
continue;
}
if (str[strlen(str) - 1] == '\r')
{
#ifdef WINDOWS
str[strlen(str) - 1] = '\0';
#else
hfst_fprintf(stderr, "There is a WINDOWS linebreak in this file\n"
"Please convert with dos2unix or fromdos\n");
exit(1);
#endif
}
do_spell(speller, str);
}
free(str);
return EXIT_SUCCESS;
}
int
legacy_spell(hfst_ospell::Speller *s)
{
ZHfstOspeller speller;
speller.inject_speller(s);
speller.set_queue_limit(suggs);
if (suggs != 0 && verbose)
{
hfst_fprintf(stdout, "Printing only %lu top predictions per line\n",
suggs);
}
speller.set_weight_limit(max_weight);
if (max_weight >= 0.0 && verbose)
{
hfst_fprintf(stdout, "Not printing predictions worse than %f\n",
suggs);
}
speller.set_beam(beam);
if (beam >= 0.0 && verbose)
{
hfst_fprintf(stdout,
"Not printing predictions worse than best by margin %f\n",
suggs);
}
char *str = (char *)malloc(2000);
#ifdef WINDOWS
SetConsoleCP(65001);
const HANDLE stdIn = GetStdHandle(STD_INPUT_HANDLE);
WCHAR buffer[0x1000];
DWORD numRead = 0;
while (ReadConsoleW(stdIn, buffer, sizeof buffer, &numRead, NULL))
{
std::wstring wstr(buffer, numRead - 1); // skip the newline
std::string linestr = wide_string_to_string(wstr);
free(str);
str = strdup(linestr.c_str());
#else
while (!std::cin.eof())
{
std::cin.getline(str, 2000);
#endif
if (str[0] == '\0')
{
continue;
}
if (str[strlen(str) - 1] == '\r')
{
#ifdef WINDOWS
str[strlen(str) - 1] = '\0';
#else
hfst_fprintf(stderr, "There is a WINDOWS linebreak in this file\n"
"Please convert with dos2unix or fromdos\n");
exit(1);
#endif
}
do_spell(speller, str);
}
free(str);
return EXIT_SUCCESS;
}
int
main(int argc, char **argv)
{
#if HAVE_GETOPT_H
int c;
// std::locale::global(std::locale(""));
while (true)
{
static struct option long_options[]
= { // first the hfst-mandated options
{ "help", no_argument, 0, 'h' },
{ "version", no_argument, 0, 'V' },
{ "verbose", no_argument, 0, 'v' },
{ "quiet", no_argument, 0, 'q' },
{ "silent", no_argument, 0, 's' },
{ "analyse", no_argument, 0, 'a' },
{ "limit", required_argument, 0, 'n' },
{ "max-weight", required_argument, 0, 'w' },
{ "beam", required_argument, 0, 'b' },
{ "time-cutoff", required_argument, 0, 't' },
{ "real-word", no_argument, 0, 'X' },
{ "error-model", required_argument, 0, 'm' },
{ "lexicon", required_argument, 0, 'l' },
{ "continuation", required_argument, 0, 'C' },
#ifdef WINDOWS
{ "output-to-console", no_argument, 0, 'k' },
#endif
{ 0, 0, 0, 0 }
};
int option_index = 0;
c = getopt_long(argc, argv, "hVvqsan:w:b:t:SXm:l:kC:", long_options,
&option_index);
char *endptr = 0;
if (c == -1) // no more options to look at
break;
switch (c)
{
case 'h':
print_usage();
return EXIT_SUCCESS;
break;
case 'V':
print_version();
return EXIT_SUCCESS;
break;
case 'v':
verbose = true;
quiet = false;
break;
case 'q': // fallthrough
case 's':
quiet = true;
verbose = false;
break;
case 'a':
analyse = true;
break;
case 'n':
suggs = strtoul(optarg, &endptr, 10);
if (endptr == optarg)
{
fprintf(stderr, "%s not a strtoul number\n", optarg);
exit(1);
}
else if (*endptr != '\0')
{
fprintf(stderr, "%s truncated from limit parameter\n", endptr);
}
break;
case 'w':
max_weight = strtof(optarg, &endptr);
if (endptr == optarg)
{
fprintf(stderr, "%s is not a float\n", optarg);
exit(1);
}
else if (*endptr != '\0')
{
fprintf(stderr, "%s truncated from limit parameter\n", endptr);
}
break;
case 'b':
beam = strtof(optarg, &endptr);
if (endptr == optarg)
{
fprintf(stderr, "%s is not a float\n", optarg);
exit(1);
}
else if (*endptr != '\0')
{
fprintf(stderr, "%s truncated from limit parameter\n", endptr);
}
break;
case 't':
time_cutoff = strtof(optarg, &endptr);
if (endptr == optarg)
{
fprintf(stderr, "%s is not a float\n", optarg);
exit(1);
}
else if (*endptr != '\0')
{
fprintf(stderr, "%s truncated from limit parameter\n", endptr);
}
break;
#ifdef WINDOWS
case 'k':
output_to_console = true;
break;
#endif
case 'm':
error_model_filename = optarg;
break;
case 'l':
lexicon_filename = optarg;
break;
case 'C':
continuation_marker = optarg;
break;
default:
std::cerr << "Invalid option\n\n";
print_short_help();
return EXIT_FAILURE;
break;
}
}
#else
int optind = 1;
#endif
// no more options, we should now be at the input filenames
if (optind == (argc - 1))
{
if (error_model_filename != "" || lexicon_filename != "")
{
std::cerr << "Give *either* a zhfst speller or --error-model and "
"--lexicon"
<< std::endl;
print_short_help();
return EXIT_FAILURE;
}
return zhfst_spell(argv[optind]);
}
else if (optind < (argc - 1))
{
std::cerr << "Too many file parameters" << std::endl;
print_short_help();
return EXIT_FAILURE;
}
else if (optind >= argc)
{
if (error_model_filename == "" || lexicon_filename == "")
{
std::cerr << "Give *either* a zhfst speller or --error-model and "
"--lexicon"
<< std::endl;
print_short_help();
return EXIT_FAILURE;
}
FILE *err_file = fopen(error_model_filename.c_str(), "r");
FILE *lex_file = fopen(lexicon_filename.c_str(), "r");
hfst_ospell::Transducer err(err_file);
hfst_ospell::Transducer lex(lex_file);
hfst_ospell::Speller *s = new hfst_ospell::Speller(&err, &lex);
return legacy_spell(s);
}
return EXIT_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment