Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
H
hfst-ospell
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
v2026dev1
entered Hard Code Freeze, changes will not be accepted •
schedule
Show more breadcrumbs
pkg
hfst-ospell
Commits
47508373
Commit
47508373
authored
1 year ago
by
Apertis CI
Browse files
Options
Downloads
Patches
Plain Diff
Import Upstream version 0.5.3
parent
17e5d61f
Branches
upstream/bookworm
Branches containing commit
Tags
upstream/0.5.3
Tags containing commit
1 merge request
!2
Update from debian/bookworm for apertis/v2024dev2
Pipeline
#861357
skipped
Changes
6
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
configure.ac
+2
-2
2 additions, 2 deletions
configure.ac
hfst-ol.cc
+6
-8
6 additions, 8 deletions
hfst-ol.cc
main.cc
+31
-41
31 additions, 41 deletions
main.cc
office.cc
+137
-57
137 additions, 57 deletions
office.cc
ol-exceptions.h
+5
-4
5 additions, 4 deletions
ol-exceptions.h
ospell.cc
+33
-25
33 additions, 25 deletions
ospell.cc
with
214 additions
and
137 deletions
configure.ac
+
2
−
2
View file @
47508373
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
# autoconf requirements
# autoconf requirements
AC_PREREQ([2.62])
AC_PREREQ([2.62])
AC_INIT([hfstospell], [0.5.
2
], [hfst-bugs@helsinki.fi], [hfstospell], [http://hfst.github.io])
AC_INIT([hfstospell], [0.5.
3
], [hfst-bugs@helsinki.fi], [hfstospell], [http://hfst.github.io])
LT_PREREQ([2.2.6])
LT_PREREQ([2.2.6])
...
@@ -34,7 +34,7 @@ AC_CONFIG_HEADERS([config.h])
...
@@ -34,7 +34,7 @@ AC_CONFIG_HEADERS([config.h])
HFSTOSPELL_NAME=hfstospell
HFSTOSPELL_NAME=hfstospell
HFSTOSPELL_MAJOR=0
HFSTOSPELL_MAJOR=0
HFSTOSPELL_MINOR=5
HFSTOSPELL_MINOR=5
HFSTOSPELL_EXTENSION=.
2
HFSTOSPELL_EXTENSION=.
3
HFSTOSPELL_VERSION=$HFSTOSPELL_MAJOR.$HFSTOSPELL_MINOR$HFSTOSPELL_EXTENSION
HFSTOSPELL_VERSION=$HFSTOSPELL_MAJOR.$HFSTOSPELL_MINOR$HFSTOSPELL_EXTENSION
AC_SUBST(HFSTOSPELL_MAJOR)
AC_SUBST(HFSTOSPELL_MAJOR)
AC_SUBST(HFSTOSPELL_MINOR)
AC_SUBST(HFSTOSPELL_MINOR)
...
...
This diff is collapsed.
Click to expand it.
hfst-ol.cc
+
6
−
8
View file @
47508373
...
@@ -177,8 +177,8 @@ void TransducerHeader::skip_hfst3_header(FILE * f)
...
@@ -177,8 +177,8 @@ void TransducerHeader::skip_hfst3_header(FILE * f)
HFSTOSPELL_THROW_MESSAGE
(
HeaderParsingException
,
HFSTOSPELL_THROW_MESSAGE
(
HeaderParsingException
,
"Found broken HFST3 header
\n
"
);
"Found broken HFST3 header
\n
"
);
}
}
char
*
headervalue
=
new
char
[
remaining_header_len
]
;
std
::
string
headervalue
(
remaining_header_len
,
'\0'
)
;
if
(
fread
(
headervalue
,
remaining_header_len
,
1
,
f
)
!=
1
)
if
(
fread
(
&
headervalue
[
0
]
,
remaining_header_len
,
1
,
f
)
!=
1
)
{
{
HFSTOSPELL_THROW_MESSAGE
(
HeaderParsingException
,
HFSTOSPELL_THROW_MESSAGE
(
HeaderParsingException
,
"HFST3 header ended unexpectedly
\n
"
);
"HFST3 header ended unexpectedly
\n
"
);
...
@@ -187,12 +187,10 @@ void TransducerHeader::skip_hfst3_header(FILE * f)
...
@@ -187,12 +187,10 @@ void TransducerHeader::skip_hfst3_header(FILE * f)
HFSTOSPELL_THROW_MESSAGE
(
HeaderParsingException
,
HFSTOSPELL_THROW_MESSAGE
(
HeaderParsingException
,
"Found broken HFST3 header
\n
"
);
"Found broken HFST3 header
\n
"
);
}
}
std
::
string
header_tail
(
headervalue
,
remaining_header_len
);
auto
type_field
=
headervalue
.
find
(
"type"
);
size_t
type_field
=
header_tail
.
find
(
"type"
);
if
(
type_field
!=
std
::
string
::
npos
)
{
if
(
type_field
!=
std
::
string
::
npos
)
{
if
(
header_tail
.
find
(
"HFST_OL"
)
!=
type_field
+
5
&&
if
(
headervalue
.
find
(
"HFST_OL"
)
!=
type_field
+
5
&&
header_tail
.
find
(
"HFST_OLW"
)
!=
type_field
+
5
)
{
headervalue
.
find
(
"HFST_OLW"
)
!=
type_field
+
5
)
{
delete
[]
headervalue
;
HFSTOSPELL_THROW_MESSAGE
(
HFSTOSPELL_THROW_MESSAGE
(
TransducerTypeException
,
TransducerTypeException
,
"Transducer has incorrect type, should be "
"Transducer has incorrect type, should be "
...
@@ -809,7 +807,7 @@ void Encoder::read_input_symbol(const char * s, const int s_num)
...
@@ -809,7 +807,7 @@ void Encoder::read_input_symbol(const char * s, const int s_num)
// If this is shadowed by an ascii symbol, unshadow
// If this is shadowed by an ascii symbol, unshadow
ascii_symbols
[(
unsigned
char
)(
*
s
)]
=
NO_SYMBOL
;
ascii_symbols
[(
unsigned
char
)(
*
s
)]
=
NO_SYMBOL
;
}
}
letters
.
add_string
(
s
,
static_cast
<
SymbolNumber
>
(
s_num
));
letters
.
add_string
(
s
,
static_cast
<
SymbolNumber
>
(
s_num
));
}
}
...
...
This diff is collapsed.
Click to expand it.
main.cc
+
31
−
41
View file @
47508373
/*
/*
Copyright 2009 University of Helsinki
Copyright 2009 University of Helsinki
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License.
limitations under the License.
*/
*/
/*
/*
...
@@ -165,7 +165,7 @@ void
...
@@ -165,7 +165,7 @@ void
do_suggest
(
ZHfstOspeller
&
speller
,
const
std
::
string
&
str
)
do_suggest
(
ZHfstOspeller
&
speller
,
const
std
::
string
&
str
)
{
{
hfst_ospell
::
CorrectionQueue
corrections
=
speller
.
suggest
(
str
);
hfst_ospell
::
CorrectionQueue
corrections
=
speller
.
suggest
(
str
);
if
(
corrections
.
size
()
>
0
)
if
(
corrections
.
size
()
>
0
)
{
{
hfst_fprintf
(
stdout
,
"Corrections for
\"
%s
\"
:
\n
"
,
str
.
c_str
());
hfst_fprintf
(
stdout
,
"Corrections for
\"
%s
\"
:
\n
"
,
str
.
c_str
());
while
(
corrections
.
size
()
>
0
)
while
(
corrections
.
size
()
>
0
)
...
@@ -181,7 +181,7 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
...
@@ -181,7 +181,7 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
std
::
string
::
npos
)
std
::
string
::
npos
)
{
{
hfst_fprintf
(
stdout
,
"%s %f %s "
hfst_fprintf
(
stdout
,
"%s %f %s "
"[DISCARDED BY ANALYSES]
\n
"
,
"[DISCARDED BY ANALYSES]
\n
"
,
corr
.
c_str
(),
corrections
.
top
().
second
,
corr
.
c_str
(),
corrections
.
top
().
second
,
anals
.
top
().
first
.
c_str
());
anals
.
top
().
first
.
c_str
());
}
}
...
@@ -203,8 +203,8 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
...
@@ -203,8 +203,8 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
}
}
else
else
{
{
hfst_fprintf
(
stdout
,
"%s %f
\n
"
,
hfst_fprintf
(
stdout
,
"%s %f
\n
"
,
corr
.
c_str
(),
corr
.
c_str
(),
corrections
.
top
().
second
);
corrections
.
top
().
second
);
}
}
corrections
.
pop
();
corrections
.
pop
();
...
@@ -222,7 +222,7 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
...
@@ -222,7 +222,7 @@ do_suggest(ZHfstOspeller& speller, const std::string& str)
void
void
do_spell
(
ZHfstOspeller
&
speller
,
const
std
::
string
&
str
)
do_spell
(
ZHfstOspeller
&
speller
,
const
std
::
string
&
str
)
{
{
if
(
speller
.
spell
(
str
))
if
(
speller
.
spell
(
str
))
{
{
hfst_fprintf
(
stdout
,
"
\"
%s
\"
is in the lexicon...
\n
"
,
hfst_fprintf
(
stdout
,
"
\"
%s
\"
is in the lexicon...
\n
"
,
str
.
c_str
());
str
.
c_str
());
...
@@ -251,7 +251,7 @@ do_spell(ZHfstOspeller& speller, const std::string& str)
...
@@ -251,7 +251,7 @@ do_spell(ZHfstOspeller& speller, const std::string& str)
}
}
if
(
all_no_spell
)
if
(
all_no_spell
)
{
{
hfst_fprintf
(
stdout
,
hfst_fprintf
(
stdout
,
"All spellings were invalidated by analysis! "
"All spellings were invalidated by analysis! "
".:. Not in lexicon!
\n
"
);
".:. Not in lexicon!
\n
"
);
}
}
...
@@ -281,43 +281,33 @@ zhfst_spell(char* zhfst_filename)
...
@@ -281,43 +281,33 @@ zhfst_spell(char* zhfst_filename)
{
{
speller
.
read_zhfst
(
zhfst_filename
);
speller
.
read_zhfst
(
zhfst_filename
);
}
}
catch
(
hfst_ospell
::
ZHfstMetaDataParsingError
zhmdpe
)
catch
(
hfst_ospell
::
ZHfstMetaDataParsingError
&
zhmdpe
)
{
{
hfst_fprintf
(
stderr
,
"cannot finish reading zhfst archive %s:
\n
%s.
\n
"
,
hfst_fprintf
(
stderr
,
"cannot finish reading zhfst archive %s:
\n
%s.
\n
"
,
zhfst_filename
,
zhmdpe
.
what
());
zhfst_filename
,
zhmdpe
.
what
());
//std::cerr << "cannot finish reading zhfst archive " << zhfst_filename <<
// ":\n" << zhmdpe.what() << "." << std::endl;
return
EXIT_FAILURE
;
return
EXIT_FAILURE
;
}
}
catch
(
hfst_ospell
::
ZHfstZipReadingError
zhzre
)
catch
(
hfst_ospell
::
ZHfstZipReadingError
&
zhzre
)
{
{
//std::cerr << "cannot read zhfst archive " << zhfst_filename << ":\n"
hfst_fprintf
(
stderr
,
// << zhzre.what() << "." << std::endl
// << "trying to read as legacy automata directory" << std::endl;
hfst_fprintf
(
stderr
,
"cannot read zhfst archive %s:
\n
"
"cannot read zhfst archive %s:
\n
"
"%s.
\n
"
,
"%s.
\n
"
,
zhfst_filename
,
zhzre
.
what
());
zhfst_filename
,
zhzre
.
what
());
return
EXIT_FAILURE
;
return
EXIT_FAILURE
;
}
}
catch
(
hfst_ospell
::
ZHfstXmlParsingError
zhxpe
)
catch
(
hfst_ospell
::
ZHfstXmlParsingError
&
zhxpe
)
{
{
//std::cerr << "Cannot finish reading index.xml from "
hfst_fprintf
(
stderr
,
// << zhfst_filename << ":" << std::endl
// << zhxpe.what() << "." << std::endl;
hfst_fprintf
(
stderr
,
"Cannot finish reading index.xml from %s:
\n
"
"Cannot finish reading index.xml from %s:
\n
"
"%s.
\n
"
,
"%s.
\n
"
,
zhfst_filename
,
zhxpe
.
what
());
zhfst_filename
,
zhxpe
.
what
());
return
EXIT_FAILURE
;
return
EXIT_FAILURE
;
}
}
if
(
verbose
)
if
(
verbose
)
{
{
//std::cout << "Following metadata was read from ZHFST archive:" << std::endl
hfst_fprintf
(
stdout
,
// << speller.metadata_dump() << std::endl;
hfst_fprintf
(
stdout
,
"Following metadata was read from ZHFST archive:
\n
"
"Following metadata was read from ZHFST archive:
\n
"
"%s
\n
"
,
"%s
\n
"
,
speller
.
metadata_dump
().
c_str
());
speller
.
metadata_dump
().
c_str
());
}
}
speller
.
set_queue_limit
(
suggs
);
speller
.
set_queue_limit
(
suggs
);
...
@@ -354,7 +344,7 @@ zhfst_spell(char* zhfst_filename)
...
@@ -354,7 +344,7 @@ zhfst_spell(char* zhfst_filename)
std
::
string
linestr
=
wide_string_to_string
(
wstr
);
std
::
string
linestr
=
wide_string_to_string
(
wstr
);
free
(
str
);
free
(
str
);
str
=
strdup
(
linestr
.
c_str
());
str
=
strdup
(
linestr
.
c_str
());
#else
#else
while
(
!
std
::
cin
.
eof
())
{
while
(
!
std
::
cin
.
eof
())
{
std
::
cin
.
getline
(
str
,
2000
);
std
::
cin
.
getline
(
str
,
2000
);
#endif
#endif
...
@@ -398,7 +388,7 @@ int
...
@@ -398,7 +388,7 @@ int
hfst_fprintf
(
stdout
,
"Not printing suggestions worse than best by margin %f
\n
"
,
suggs
);
hfst_fprintf
(
stdout
,
"Not printing suggestions worse than best by margin %f
\n
"
,
suggs
);
}
}
char
*
str
=
(
char
*
)
malloc
(
2000
);
char
*
str
=
(
char
*
)
malloc
(
2000
);
#ifdef WINDOWS
#ifdef WINDOWS
SetConsoleCP
(
65001
);
SetConsoleCP
(
65001
);
const
HANDLE
stdIn
=
GetStdHandle
(
STD_INPUT_HANDLE
);
const
HANDLE
stdIn
=
GetStdHandle
(
STD_INPUT_HANDLE
);
...
@@ -410,7 +400,7 @@ int
...
@@ -410,7 +400,7 @@ int
std
::
string
linestr
=
wide_string_to_string
(
wstr
);
std
::
string
linestr
=
wide_string_to_string
(
wstr
);
free
(
str
);
free
(
str
);
str
=
strdup
(
linestr
.
c_str
());
str
=
strdup
(
linestr
.
c_str
());
#else
#else
while
(
!
std
::
cin
.
eof
())
{
while
(
!
std
::
cin
.
eof
())
{
std
::
cin
.
getline
(
str
,
2000
);
std
::
cin
.
getline
(
str
,
2000
);
#endif
#endif
...
@@ -435,11 +425,11 @@ int
...
@@ -435,11 +425,11 @@ int
int
main
(
int
argc
,
char
**
argv
)
int
main
(
int
argc
,
char
**
argv
)
{
{
#if HAVE_GETOPT_H
int
c
;
int
c
;
//std::locale::global(std::locale(""));
//std::locale::global(std::locale(""));
#if HAVE_GETOPT_H
while
(
true
)
{
while
(
true
)
{
static
struct
option
long_options
[]
=
static
struct
option
long_options
[]
=
{
{
...
@@ -463,7 +453,7 @@ int main(int argc, char **argv)
...
@@ -463,7 +453,7 @@ int main(int argc, char **argv)
#endif
#endif
{
0
,
0
,
0
,
0
}
{
0
,
0
,
0
,
0
}
};
};
int
option_index
=
0
;
int
option_index
=
0
;
c
=
getopt_long
(
argc
,
argv
,
"hVvqsan:w:b:t:SXm:l:k"
,
long_options
,
&
option_index
);
c
=
getopt_long
(
argc
,
argv
,
"hVvqsan:w:b:t:SXm:l:k"
,
long_options
,
&
option_index
);
char
*
endptr
=
0
;
char
*
endptr
=
0
;
...
@@ -476,17 +466,17 @@ int main(int argc, char **argv)
...
@@ -476,17 +466,17 @@ int main(int argc, char **argv)
print_usage
();
print_usage
();
return
EXIT_SUCCESS
;
return
EXIT_SUCCESS
;
break
;
break
;
case
'V'
:
case
'V'
:
print_version
();
print_version
();
return
EXIT_SUCCESS
;
return
EXIT_SUCCESS
;
break
;
break
;
case
'v'
:
case
'v'
:
verbose
=
true
;
verbose
=
true
;
quiet
=
false
;
quiet
=
false
;
break
;
break
;
case
'q'
:
// fallthrough
case
'q'
:
// fallthrough
case
's'
:
case
's'
:
quiet
=
true
;
quiet
=
true
;
...
@@ -550,7 +540,7 @@ int main(int argc, char **argv)
...
@@ -550,7 +540,7 @@ int main(int argc, char **argv)
case
'k'
:
case
'k'
:
output_to_console
=
true
;
output_to_console
=
true
;
break
;
break
;
#endif
#endif
case
'S'
:
case
'S'
:
suggest
=
true
;
suggest
=
true
;
break
;
break
;
...
...
This diff is collapsed.
Click to expand it.
office.cc
+
137
−
57
View file @
47508373
...
@@ -21,16 +21,16 @@
...
@@ -21,16 +21,16 @@
*/
*/
/*
/*
Tests up to
16
variations of each input token:
Tests up to
8
variations of each input token:
- Verbatim
- Verbatim
- With leading non-alphanumerics removed
- With leading non-alphanumerics removed
- With trailing non-alphanumerics removed
- With trailing non-alphanumerics removed
- With leading and trailing non-alphanumerics removed
- With leading and trailing non-alphanumerics removed
- Lower-case of all the above
- First-lower of all the above
- First-upper of all the above
*/
*/
#include
<iostream>
#include
<iostream>
#include
<iomanip>
#include
<fstream>
#include
<fstream>
#include
<vector>
#include
<vector>
#include
<string>
#include
<string>
...
@@ -42,6 +42,7 @@
...
@@ -42,6 +42,7 @@
#include
<cmath>
#include
<cmath>
#include
<cerrno>
#include
<cerrno>
#include
<cctype>
#include
<cctype>
#include
<getopt.h>
#define U_CHARSET_IS_UTF8 1
#define U_CHARSET_IS_UTF8 1
#include
<unicode/uclean.h>
#include
<unicode/uclean.h>
...
@@ -64,13 +65,18 @@ struct word_t {
...
@@ -64,13 +65,18 @@ struct word_t {
UnicodeString
buffer
;
UnicodeString
buffer
;
};
};
std
::
vector
<
word_t
>
words
(
16
);
std
::
vector
<
word_t
>
words
(
16
);
std
::
string
buffer
;
std
::
string
buffer
,
wbuf
;
std
::
vector
<
std
::
string
>
alts
;
using
Alt
=
std
::
pair
<
double
,
std
::
string
>
;
std
::
vector
<
Alt
>
alts
;
std
::
unordered_set
<
std
::
string
>
outputs
;
std
::
unordered_set
<
std
::
string
>
outputs
;
UnicodeString
ubuffer
,
uc_buffer
;
UnicodeString
ubuffer
,
uc_buffer
;
size_t
cw
;
size_t
cw
;
bool
verbatim
=
false
;
bool
verbatim
=
false
;
bool
debug
=
false
;
hfst_ospell
::
Weight
max_weight
=
-
1.0
;
hfst_ospell
::
Weight
beam
=
-
1.0
;
float
time_cutoff
=
6.0
;
bool
uc_first
=
false
;
bool
uc_first
=
false
;
bool
uc_all
=
true
;
bool
uc_all
=
true
;
...
@@ -82,14 +88,18 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
...
@@ -82,14 +88,18 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
for
(
size_t
k
=
0
;
k
<
cw
&&
alts
.
size
()
<
suggs
;
++
k
)
{
for
(
size_t
k
=
0
;
k
<
cw
&&
alts
.
size
()
<
suggs
;
++
k
)
{
buffer
.
clear
();
buffer
.
clear
();
words
[
k
].
buffer
.
toUTF8String
(
buffer
);
words
[
k
].
buffer
.
toUTF8String
(
buffer
);
hfst_ospell
::
CorrectionQueue
corrections
=
speller
.
suggest
(
buffer
);
auto
corrections
=
speller
.
suggest
(
buffer
);
if
(
corrections
.
size
()
==
0
)
{
if
(
corrections
.
size
()
==
0
)
{
continue
;
continue
;
}
}
// Because speller.set_queue_limit() doesn't actually work, hard limit it here
for
(
size_t
i
=
0
,
e
=
corrections
.
size
()
;
i
<
e
;
++
i
)
{
for
(
size_t
i
=
0
,
e
=
corrections
.
size
()
;
i
<
e
&&
alts
.
size
()
<
suggs
;
++
i
)
{
// Work around https://github.com/hfst/hfst-ospell/issues/54
if
(
max_weight
>
0.0
&&
corrections
.
top
().
second
>
max_weight
)
{
break
;
}
auto
w
=
corrections
.
top
().
second
*
(
1.0
+
k
/
10.0
);
buffer
.
clear
();
buffer
.
clear
();
if
(
k
!=
0
)
{
if
(
k
!=
0
)
{
...
@@ -112,8 +122,18 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
...
@@ -112,8 +122,18 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
words
[
0
].
buffer
.
tempSubString
(
words
[
k
].
start
+
words
[
k
].
count
).
toUTF8String
(
buffer
);
words
[
0
].
buffer
.
tempSubString
(
words
[
k
].
start
+
words
[
k
].
count
).
toUTF8String
(
buffer
);
}
}
if
(
debug
)
{
wbuf
.
resize
(
64
);
wbuf
.
resize
(
sprintf
(
&
wbuf
[
0
],
" (%.2f;%zu)"
,
corrections
.
top
().
second
,
k
));
buffer
+=
wbuf
;
}
if
(
outputs
.
count
(
buffer
)
==
0
)
{
if
(
outputs
.
count
(
buffer
)
==
0
)
{
alts
.
push_back
(
buffer
);
alts
.
push_back
({
w
,
buffer
});
std
::
sort
(
alts
.
begin
(),
alts
.
end
());
while
(
alts
.
size
()
>
suggs
)
{
alts
.
pop_back
();
}
}
}
outputs
.
insert
(
buffer
);
outputs
.
insert
(
buffer
);
corrections
.
pop
();
corrections
.
pop
();
...
@@ -123,7 +143,7 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
...
@@ -123,7 +143,7 @@ bool find_alternatives(ZHfstOspeller& speller, size_t suggs) {
if
(
!
alts
.
empty
())
{
if
(
!
alts
.
empty
())
{
std
::
cout
<<
"&"
;
std
::
cout
<<
"&"
;
for
(
auto
&
alt
:
alts
)
{
for
(
auto
&
alt
:
alts
)
{
std
::
cout
<<
"
\t
"
<<
alt
;
std
::
cout
<<
"
\t
"
<<
alt
.
second
;
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
return
true
;
return
true
;
...
@@ -167,7 +187,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
...
@@ -167,7 +187,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
}
}
size_t
ichStart
=
0
,
cchUse
=
ubuffer
.
length
();
size_t
ichStart
=
0
,
cchUse
=
ubuffer
.
length
();
const
UChar
*
pwsz
=
ubuffer
.
getTerminatedBuffer
();
auto
pwsz
=
ubuffer
.
getTerminatedBuffer
();
// Always test the full given input
// Always test the full given input
words
[
0
].
buffer
.
remove
();
words
[
0
].
buffer
.
remove
();
...
@@ -216,7 +236,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
...
@@ -216,7 +236,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
for
(
size_t
i
=
0
,
e
=
cw
;
i
<
e
;
++
i
)
{
for
(
size_t
i
=
0
,
e
=
cw
;
i
<
e
;
++
i
)
{
// If we are looking for suggestions, don't use the cache
// If we are looking for suggestions, don't use the cache
valid_words_t
::
iter
ato
r
it
=
suggs
?
valid_words
.
end
()
:
valid_words
.
find
(
words
[
i
].
buffer
);
a
u
to
it
=
suggs
?
valid_words
.
end
()
:
valid_words
.
find
(
words
[
i
].
buffer
);
if
(
it
==
valid_words
.
end
())
{
if
(
it
==
valid_words
.
end
())
{
buffer
.
clear
();
buffer
.
clear
();
...
@@ -224,49 +244,21 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
...
@@ -224,49 +244,21 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
bool
valid
=
speller
.
spell
(
buffer
);
bool
valid
=
speller
.
spell
(
buffer
);
it
=
valid_words
.
insert
(
std
::
make_pair
(
words
[
i
].
buffer
,
valid
)).
first
;
it
=
valid_words
.
insert
(
std
::
make_pair
(
words
[
i
].
buffer
,
valid
)).
first
;
if
(
!
valid
&&
!
verbatim
)
{
if
(
!
valid
&&
!
verbatim
&&
uc_first
)
{
// If the word was not valid, fold it to lower case and try again
// If the word was not valid, try a first-lower variant
buffer
.
clear
();
ubuffer
=
words
[
i
].
buffer
;
ubuffer
.
toLower
();
ubuffer
.
toUTF8String
(
buffer
);
// Add the lower case variant to the list so that we get suggestions using that, if need be
words
[
cw
].
start
=
words
[
i
].
start
;
words
[
cw
].
count
=
words
[
i
].
count
;
words
[
cw
].
buffer
=
ubuffer
;
++
cw
;
// Don't try again if the lower cased variant has already been tried
valid_words_t
::
iterator
itl
=
suggs
?
valid_words
.
end
()
:
valid_words
.
find
(
ubuffer
);
if
(
itl
!=
valid_words
.
end
())
{
it
->
second
=
itl
->
second
;
it
=
itl
;
}
else
{
valid
=
speller
.
spell
(
buffer
);
it
->
second
=
valid
;
// Also mark the original mixed case variant as whatever the lower cased one was
it
=
valid_words
.
insert
(
std
::
make_pair
(
words
[
i
].
buffer
,
valid
)).
first
;
}
}
if
(
!
valid
&&
!
verbatim
&&
(
uc_all
||
uc_first
))
{
// If the word was still not valid but had upper case, try a first-upper variant
buffer
.
clear
();
buffer
.
clear
();
ubuffer
.
setTo
(
words
[
i
].
buffer
,
0
,
1
);
ubuffer
.
setTo
(
words
[
i
].
buffer
,
0
,
1
);
ubuffer
.
toUpper
();
ubuffer
.
toLower
();
uc_buffer
.
setTo
(
words
[
i
].
buffer
,
1
);
ubuffer
.
append
(
words
[
i
].
buffer
,
1
,
words
[
i
].
buffer
.
length
()
-
1
);
uc_buffer
.
toLower
();
ubuffer
.
append
(
uc_buffer
);
ubuffer
.
toUTF8String
(
buffer
);
ubuffer
.
toUTF8String
(
buffer
);
// Add the first-
upper
variant to the list so that we get suggestions using that, if need be
// Add the first-
lower case
variant to the list so that we get suggestions using that, if need be
words
[
cw
].
start
=
words
[
i
].
start
;
words
[
cw
].
start
=
words
[
i
].
start
;
words
[
cw
].
count
=
words
[
i
].
count
;
words
[
cw
].
count
=
words
[
i
].
count
;
words
[
cw
].
buffer
=
ubuffer
;
words
[
cw
].
buffer
=
ubuffer
;
++
cw
;
++
cw
;
// Don't try again if the first-
upp
er variant has already been tried
// Don't try again if the first-
low
er variant has already been tried
valid_words_t
::
iterator
itl
=
suggs
?
valid_words
.
end
()
:
valid_words
.
find
(
ubuffer
);
valid_words_t
::
iterator
itl
=
suggs
?
valid_words
.
end
()
:
valid_words
.
find
(
ubuffer
);
if
(
itl
!=
valid_words
.
end
())
{
if
(
itl
!=
valid_words
.
end
())
{
it
->
second
=
itl
->
second
;
it
->
second
=
itl
->
second
;
...
@@ -274,7 +266,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
...
@@ -274,7 +266,7 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
}
}
else
{
else
{
valid
=
speller
.
spell
(
buffer
);
valid
=
speller
.
spell
(
buffer
);
it
->
second
=
valid
;
// Also mark the original mixed case variant as whatever the first-
upp
er one was
it
->
second
=
valid
;
// Also mark the original mixed case variant as whatever the first-
low
er one was
it
=
valid_words
.
insert
(
std
::
make_pair
(
words
[
i
].
buffer
,
valid
)).
first
;
it
=
valid_words
.
insert
(
std
::
make_pair
(
words
[
i
].
buffer
,
valid
)).
first
;
}
}
}
}
...
@@ -291,8 +283,13 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
...
@@ -291,8 +283,13 @@ bool is_valid_word(ZHfstOspeller& speller, const std::string& word, size_t suggs
int
zhfst_spell
(
const
char
*
zhfst_filename
)
{
int
zhfst_spell
(
const
char
*
zhfst_filename
)
{
ZHfstOspeller
speller
;
ZHfstOspeller
speller
;
try
{
try
{
if
(
debug
)
{
std
::
cout
<<
"@@ Loading "
<<
zhfst_filename
<<
" with args max-weight="
<<
max_weight
<<
", beam="
<<
beam
<<
", time-cutoff="
<<
time_cutoff
<<
std
::
endl
;
}
speller
.
read_zhfst
(
zhfst_filename
);
speller
.
read_zhfst
(
zhfst_filename
);
speller
.
set_time_cutoff
(
6.0
);
speller
.
set_weight_limit
(
max_weight
);
speller
.
set_beam
(
beam
);
speller
.
set_time_cutoff
(
time_cutoff
);
}
}
catch
(
hfst_ospell
::
ZHfstMetaDataParsingError
zhmdpe
)
{
catch
(
hfst_ospell
::
ZHfstMetaDataParsingError
zhmdpe
)
{
fprintf
(
stderr
,
"cannot finish reading zhfst archive %s:
\n
%s.
\n
"
,
zhfst_filename
,
zhmdpe
.
what
());
fprintf
(
stderr
,
"cannot finish reading zhfst archive %s:
\n
%s.
\n
"
,
zhfst_filename
,
zhmdpe
.
what
());
...
@@ -319,6 +316,38 @@ int zhfst_spell(const char* zhfst_filename) {
...
@@ -319,6 +316,38 @@ int zhfst_spell(const char* zhfst_filename) {
if
(
line
.
empty
())
{
if
(
line
.
empty
())
{
continue
;
continue
;
}
}
if
(
line
.
size
()
>=
5
&&
line
[
0
]
==
'$'
&&
line
[
1
]
==
'$'
&&
line
[
3
]
==
' '
)
{
if
(
line
[
2
]
==
'd'
&&
isdigit
(
line
[
4
])
&&
line
.
size
()
==
5
)
{
debug
=
(
line
[
4
]
!=
'0'
);
std
::
cout
<<
"@@ Option debug changed to "
<<
debug
<<
std
::
endl
;
continue
;
}
if
(
line
[
2
]
==
'T'
&&
isdigit
(
line
[
4
])
&&
line
.
size
()
==
5
)
{
verbatim
=
(
line
[
4
]
!=
'0'
);
std
::
cout
<<
"@@ Option verbatim changed to "
<<
verbatim
<<
std
::
endl
;
continue
;
}
if
(
line
[
2
]
==
'w'
&&
isdigit
(
line
[
4
]))
{
max_weight
=
std
::
stof
(
&
line
[
4
]);
speller
.
set_weight_limit
(
max_weight
);
std
::
cout
<<
"@@ Option max-weight changed to "
<<
max_weight
<<
std
::
endl
;
continue
;
}
if
(
line
[
2
]
==
'b'
&&
isdigit
(
line
[
4
]))
{
beam
=
std
::
stof
(
&
line
[
4
]);
speller
.
set_beam
(
beam
);
std
::
cout
<<
"@@ Option beam changed to "
<<
beam
<<
std
::
endl
;
continue
;
}
if
(
line
[
2
]
==
't'
&&
isdigit
(
line
[
4
]))
{
time_cutoff
=
std
::
stof
(
&
line
[
4
]);
speller
.
set_time_cutoff
(
time_cutoff
);
std
::
cout
<<
"@@ Option time-cutoff changed to "
<<
time_cutoff
<<
std
::
endl
;
continue
;
}
}
// Just in case anyone decides to use the speller for a minor eternity
// Just in case anyone decides to use the speller for a minor eternity
if
(
valid_words
.
size
()
>
20480
)
{
if
(
valid_words
.
size
()
>
20480
)
{
valid_words
.
clear
();
valid_words
.
clear
();
...
@@ -345,6 +374,19 @@ int zhfst_spell(const char* zhfst_filename) {
...
@@ -345,6 +374,19 @@ int zhfst_spell(const char* zhfst_filename) {
return
EXIT_SUCCESS
;
return
EXIT_SUCCESS
;
}
}
void
print_help
()
{
std
::
cout
<<
"Usage: hfst-ospell [options] zhfst-archive
\n
"
<<
"
\n
"
<<
" -h, --help Shows this help
\n
"
<<
" -d, --debug Debug output with weights attached to results
\n
"
<<
" -T, --verbatim Disables case-folding and non-alphanumeric trimming
\n
"
<<
" -w, --max-weight=W Suppress corrections with weights above W
\n
"
<<
" -b, --beam=W Suppress corrections worse than best candidate by more than W
\n
"
<<
" -t, --time-cutoff=T Stop trying to find better corrections after T seconds; defaults to 6.0
\n
"
<<
std
::
flush
;
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
UErrorCode
status
=
U_ZERO_ERROR
;
UErrorCode
status
=
U_ZERO_ERROR
;
u_init
(
&
status
);
u_init
(
&
status
);
...
@@ -356,22 +398,60 @@ int main(int argc, char **argv) {
...
@@ -356,22 +398,60 @@ int main(int argc, char **argv) {
ucnv_setDefaultName
(
"UTF-8"
);
ucnv_setDefaultName
(
"UTF-8"
);
uloc_setDefault
(
"en_US_POSIX"
,
&
status
);
uloc_setDefault
(
"en_US_POSIX"
,
&
status
);
std
::
vector
<
std
::
string
>
args
(
argv
,
argv
+
argc
);
struct
option
long_options
[]
=
for
(
std
::
vector
<
std
::
string
>::
iterator
it
=
args
.
begin
()
;
it
!=
args
.
end
()
;
)
{
{
if
(
*
it
==
"--verbatim"
)
{
{
"help"
,
no_argument
,
0
,
'h'
},
verbatim
=
true
;
{
"debug"
,
no_argument
,
0
,
'd'
},
it
=
args
.
erase
(
it
);
{
"verbatim"
,
no_argument
,
0
,
'T'
},
{
"max-weight"
,
required_argument
,
0
,
'w'
},
{
"beam"
,
required_argument
,
0
,
'b'
},
{
"time-cutoff"
,
required_argument
,
0
,
't'
},
{
0
,
0
,
0
,
0
}
};
int
c
=
0
;
while
(
true
)
{
int
option_index
=
0
;
c
=
getopt_long
(
argc
,
argv
,
"hdTw:b:t:"
,
long_options
,
&
option_index
);
if
(
c
==
-
1
)
{
break
;
}
}
else
{
++
it
;
switch
(
c
)
{
case
'h'
:
print_help
();
return
EXIT_SUCCESS
;
case
'd'
:
debug
=
true
;
break
;
case
'T'
:
verbatim
=
true
;
break
;
case
'w'
:
max_weight
=
std
::
stof
(
optarg
);
break
;
case
'b'
:
beam
=
std
::
stof
(
optarg
);
break
;
case
't'
:
time_cutoff
=
std
::
stof
(
optarg
);
break
;
}
}
}
}
if
(
args
.
size
()
<
2
)
{
if
(
optind
>=
argc
)
{
throw
std
::
invalid_argument
(
"Must pass a zhfst as argument"
);
throw
std
::
invalid_argument
(
"Must pass a zhfst as argument"
);
}
}
int
rv
=
zhfst_spell
(
args
[
1
].
c_str
());
std
::
cerr
<<
std
::
fixed
<<
std
::
setprecision
(
2
);
std
::
cout
<<
std
::
fixed
<<
std
::
setprecision
(
2
);
int
rv
=
zhfst_spell
(
argv
[
optind
]);
u_cleanup
();
u_cleanup
();
return
rv
;
return
rv
;
...
...
This diff is collapsed.
Click to expand it.
ol-exceptions.h
+
5
−
4
View file @
47508373
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
#include
"hfstol-stdafx.h"
#include
"hfstol-stdafx.h"
#include
<string>
#include
<string>
#include
<sstream>
#include
<sstream>
#include
<cstring>
namespace
hfst_ospell
namespace
hfst_ospell
{
{
...
@@ -21,7 +22,7 @@ struct OspellException
...
@@ -21,7 +22,7 @@ struct OspellException
size_t
line
;
//!< line number of exception
size_t
line
;
//!< line number of exception
OspellException
(
void
)
{}
OspellException
(
void
)
{}
//!
//!
//! construct exception with name, file and location
//! construct exception with name, file and location
OspellException
(
const
std
::
string
&
name
,
const
std
::
string
&
file
,
size_t
line
)
:
OspellException
(
const
std
::
string
&
name
,
const
std
::
string
&
file
,
size_t
line
)
:
...
@@ -29,7 +30,7 @@ struct OspellException
...
@@ -29,7 +30,7 @@ struct OspellException
file
(
file
),
file
(
file
),
line
(
line
)
line
(
line
)
{}
{}
//!
//!
//! create string representation of exception for output
//! create string representation of exception for output
std
::
string
operator
()
(
void
)
const
std
::
string
operator
()
(
void
)
const
...
@@ -45,7 +46,7 @@ struct OspellException
...
@@ -45,7 +46,7 @@ struct OspellException
{
{
std
::
ostringstream
o
;
std
::
ostringstream
o
;
o
<<
file
<<
":"
<<
line
<<
":"
<<
name
;
o
<<
file
<<
":"
<<
line
<<
":"
<<
name
;
return
o
.
str
().
c_str
();
return
strdup
(
o
.
str
().
c_str
()
)
;
}
}
};
};
...
@@ -59,7 +60,7 @@ struct OspellException
...
@@ -59,7 +60,7 @@ struct OspellException
#define HFSTOSPELL_EXCEPTION_CHILD_DECLARATION(CHILD) \
#define HFSTOSPELL_EXCEPTION_CHILD_DECLARATION(CHILD) \
struct CHILD : public OspellException \
struct CHILD : public OspellException \
{ CHILD(const std::string &name,const std::string &file,size_t line):\
{ CHILD(const std::string &name,const std::string &file,size_t line):\
OspellException(name,file,line) {}}
OspellException(name,file,line) {}}
#define HFST_CATCH(E) \
#define HFST_CATCH(E) \
catch (const E &e) \
catch (const E &e) \
...
...
This diff is collapsed.
Click to expand it.
ospell.cc
+
33
−
25
View file @
47508373
...
@@ -152,31 +152,31 @@ TreeNode TreeNode::update(SymbolNumber symbol,
...
@@ -152,31 +152,31 @@ TreeNode TreeNode::update(SymbolNumber symbol,
bool
TreeNode
::
try_compatible_with
(
FlagDiacriticOperation
op
)
bool
TreeNode
::
try_compatible_with
(
FlagDiacriticOperation
op
)
{
{
switch
(
op
.
Operation
())
{
switch
(
op
.
Operation
())
{
case
P
:
// positive set
case
P
:
// positive set
flag_state
[
op
.
Feature
()]
=
op
.
Value
();
flag_state
[
op
.
Feature
()]
=
op
.
Value
();
return
true
;
return
true
;
case
N
:
// negative set (literally, in this implementation)
case
N
:
// negative set (literally, in this implementation)
flag_state
[
op
.
Feature
()]
=
-
1
*
op
.
Value
();
flag_state
[
op
.
Feature
()]
=
-
1
*
op
.
Value
();
return
true
;
return
true
;
case
R
:
// require
case
R
:
// require
if
(
op
.
Value
()
==
0
)
{
// "plain" require, return false if unset
if
(
op
.
Value
()
==
0
)
{
// "plain" require, return false if unset
return
(
flag_state
[
op
.
Feature
()]
!=
0
);
return
(
flag_state
[
op
.
Feature
()]
!=
0
);
}
}
return
(
flag_state
[
op
.
Feature
()]
==
op
.
Value
());
return
(
flag_state
[
op
.
Feature
()]
==
op
.
Value
());
case
D
:
// disallow
case
D
:
// disallow
if
(
op
.
Value
()
==
0
)
{
// "plain" disallow, return true if unset
if
(
op
.
Value
()
==
0
)
{
// "plain" disallow, return true if unset
return
(
flag_state
[
op
.
Feature
()]
==
0
);
return
(
flag_state
[
op
.
Feature
()]
==
0
);
}
}
return
(
flag_state
[
op
.
Feature
()]
!=
op
.
Value
());
return
(
flag_state
[
op
.
Feature
()]
!=
op
.
Value
());
case
C
:
// clear
case
C
:
// clear
flag_state
[
op
.
Feature
()]
=
0
;
flag_state
[
op
.
Feature
()]
=
0
;
return
true
;
return
true
;
case
U
:
// unification
case
U
:
// unification
/* if the feature is unset OR the feature is to this value already OR
/* if the feature is unset OR the feature is to this value already OR
the feature is negatively set to something else than this value */
the feature is negatively set to something else than this value */
...
@@ -190,7 +190,7 @@ bool TreeNode::try_compatible_with(FlagDiacriticOperation op)
...
@@ -190,7 +190,7 @@ bool TreeNode::try_compatible_with(FlagDiacriticOperation op)
}
}
return
false
;
return
false
;
}
}
return
false
;
// to make the compiler happy
return
false
;
// to make the compiler happy
}
}
...
@@ -204,7 +204,11 @@ Speller::Speller(Transducer* mutator_ptr, Transducer* lexicon_ptr):
...
@@ -204,7 +204,11 @@ Speller::Speller(Transducer* mutator_ptr, Transducer* lexicon_ptr):
alphabet_translator
(
SymbolVector
()),
alphabet_translator
(
SymbolVector
()),
operations
(
lexicon
->
get_operations
()),
operations
(
lexicon
->
get_operations
()),
limiting
(
None
),
limiting
(
None
),
mode
(
Correct
)
mode
(
Correct
),
max_time
(
-
1.0
),
start_clock
(
0
),
call_counter
(
0
),
limit_reached
(
false
)
{
{
if
(
mutator
!=
NULL
)
{
if
(
mutator
!=
NULL
)
{
build_alphabet_translator
();
build_alphabet_translator
();
...
@@ -228,7 +232,7 @@ void Speller::lexicon_epsilons(void)
...
@@ -228,7 +232,7 @@ void Speller::lexicon_epsilons(void)
}
}
TransitionTableIndex
next
=
lexicon
->
next
(
next_node
.
lexicon_state
,
0
);
TransitionTableIndex
next
=
lexicon
->
next
(
next_node
.
lexicon_state
,
0
);
STransition
i_s
=
lexicon
->
take_epsilons_and_flags
(
next
);
STransition
i_s
=
lexicon
->
take_epsilons_and_flags
(
next
);
while
(
i_s
.
symbol
!=
NO_SYMBOL
)
{
while
(
i_s
.
symbol
!=
NO_SYMBOL
)
{
if
(
is_under_weight_limit
(
next_node
.
weight
+
i_s
.
weight
))
{
if
(
is_under_weight_limit
(
next_node
.
weight
+
i_s
.
weight
))
{
if
(
lexicon
->
transitions
.
input_symbol
(
next
)
==
0
)
{
if
(
lexicon
->
transitions
.
input_symbol
(
next
)
==
0
)
{
...
@@ -326,7 +330,7 @@ void Speller::mutator_epsilons(void)
...
@@ -326,7 +330,7 @@ void Speller::mutator_epsilons(void)
}
}
TransitionTableIndex
next_m
=
mutator
->
next
(
next_node
.
mutator_state
,
0
);
TransitionTableIndex
next_m
=
mutator
->
next
(
next_node
.
mutator_state
,
0
);
STransition
mutator_i_s
=
mutator
->
take_epsilons
(
next_m
);
STransition
mutator_i_s
=
mutator
->
take_epsilons
(
next_m
);
while
(
mutator_i_s
.
symbol
!=
NO_SYMBOL
)
{
while
(
mutator_i_s
.
symbol
!=
NO_SYMBOL
)
{
if
(
mutator_i_s
.
symbol
==
0
)
{
if
(
mutator_i_s
.
symbol
==
0
)
{
if
(
is_under_weight_limit
(
if
(
is_under_weight_limit
(
...
@@ -460,12 +464,9 @@ bool Transducer::initialize_input_vector(SymbolVector & input_vector,
...
@@ -460,12 +464,9 @@ bool Transducer::initialize_input_vector(SymbolVector & input_vector,
char
*
line
)
char
*
line
)
{
{
input_vector
.
clear
();
input_vector
.
clear
();
SymbolNumber
k
=
NO_SYMBOL
;
char
**
inpointer
=
&
line
;
char
**
inpointer
=
&
line
;
char
*
oldpointer
;
while
(
**
inpointer
!=
'\0'
)
{
while
(
**
inpointer
!=
'\0'
)
{
oldpointer
=
*
inpointer
;
SymbolNumber
k
=
encoder
->
find_key
(
inpointer
);
k
=
encoder
->
find_key
(
inpointer
);
if
(
k
==
NO_SYMBOL
)
{
// no tokenization from alphabet
if
(
k
==
NO_SYMBOL
)
{
// no tokenization from alphabet
// for real handling of other and identity for unseen symbols,
// for real handling of other and identity for unseen symbols,
// use the Speller interface analyse()!
// use the Speller interface analyse()!
...
@@ -532,18 +533,18 @@ AnalysisQueue Transducer::lookup(char * line)
...
@@ -532,18 +533,18 @@ AnalysisQueue Transducer::lookup(char * line)
i_s
=
take_epsilons_and_flags
(
next_index
);
i_s
=
take_epsilons_and_flags
(
next_index
);
}
}
}
}
// input consumption loop
// input consumption loop
unsigned
int
input_state
=
next_node
.
input_state
;
unsigned
int
input_state
=
next_node
.
input_state
;
if
(
input_state
<
input
.
size
()
&&
if
(
input_state
<
input
.
size
()
&&
has_transitions
(
has_transitions
(
next_node
.
lexicon_state
+
1
,
input
[
input_state
]))
{
next_node
.
lexicon_state
+
1
,
input
[
input_state
]))
{
next_index
=
next
(
next_node
.
lexicon_state
,
next_index
=
next
(
next_node
.
lexicon_state
,
input
[
input_state
]);
input
[
input_state
]);
STransition
i_s
=
take_non_epsilons
(
next_index
,
STransition
i_s
=
take_non_epsilons
(
next_index
,
input
[
input_state
]);
input
[
input_state
]);
while
(
i_s
.
symbol
!=
NO_SYMBOL
)
{
while
(
i_s
.
symbol
!=
NO_SYMBOL
)
{
queue
.
push_back
(
next_node
.
update
(
queue
.
push_back
(
next_node
.
update
(
i_s
.
symbol
,
i_s
.
symbol
,
...
@@ -551,18 +552,18 @@ AnalysisQueue Transducer::lookup(char * line)
...
@@ -551,18 +552,18 @@ AnalysisQueue Transducer::lookup(char * line)
next_node
.
mutator_state
,
next_node
.
mutator_state
,
i_s
.
index
,
i_s
.
index
,
i_s
.
weight
));
i_s
.
weight
));
++
next_index
;
++
next_index
;
i_s
=
take_non_epsilons
(
next_index
,
input
[
input_state
]);
i_s
=
take_non_epsilons
(
next_index
,
input
[
input_state
]);
}
}
}
}
}
}
for
(
auto
&
it
:
outputs
)
{
for
(
auto
&
it
:
outputs
)
{
analyses
.
push
(
StringWeightPair
(
it
.
first
,
it
.
second
));
analyses
.
push
(
StringWeightPair
(
it
.
first
,
it
.
second
));
}
}
return
analyses
;
return
analyses
;
}
}
...
@@ -729,7 +730,7 @@ Weight Transducer::final_weight(const TransitionTableIndex i) const
...
@@ -729,7 +730,7 @@ Weight Transducer::final_weight(const TransitionTableIndex i) const
bool
bool
Transducer
::
is_flag
(
const
SymbolNumber
symbol
)
Transducer
::
is_flag
(
const
SymbolNumber
symbol
)
{
{
return
alphabet
.
is_flag
(
symbol
);
return
alphabet
.
is_flag
(
symbol
);
}
}
bool
bool
...
@@ -888,7 +889,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
...
@@ -888,7 +889,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
std
::
map
<
std
::
string
,
Weight
>
corrections
;
std
::
map
<
std
::
string
,
Weight
>
corrections
;
SymbolNumber
first_input
=
(
input
.
size
()
==
0
)
?
0
:
input
[
0
];
SymbolNumber
first_input
=
(
input
.
size
()
==
0
)
?
0
:
input
[
0
];
if
(
cache
[
first_input
].
empty
)
{
if
(
cache
[
first_input
].
empty
)
{
build_cache
(
first_input
);
build_cache
(
first_input
);
// XXX: cache corrupts limit!
}
}
if
(
input
.
size
()
<=
1
)
{
if
(
input
.
size
()
<=
1
)
{
// get the cached results and we're done
// get the cached results and we're done
...
@@ -908,6 +909,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
...
@@ -908,6 +909,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
}
}
}
}
}
}
set_limiting_behaviour
(
nbest
,
maxweight
,
beam
);
adjust_weight_limits
(
nbest
,
beam
);
adjust_weight_limits
(
nbest
,
beam
);
for
(
auto
&
it
:
*
results
)
{
for
(
auto
&
it
:
*
results
)
{
// Then collect the results
// Then collect the results
...
@@ -946,6 +948,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
...
@@ -946,6 +948,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
*/
*/
next_node
=
queue
.
back
();
next_node
=
queue
.
back
();
queue
.
pop_back
();
queue
.
pop_back
();
set_limiting_behaviour
(
nbest
,
maxweight
,
beam
);
// XXX: need to reset
adjust_weight_limits
(
nbest
,
beam
);
adjust_weight_limits
(
nbest
,
beam
);
// if we can't get an acceptable result, never mind
// if we can't get an acceptable result, never mind
if
(
next_node
.
weight
>
limit
)
{
if
(
next_node
.
weight
>
limit
)
{
...
@@ -1005,6 +1008,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
...
@@ -1005,6 +1008,7 @@ CorrectionQueue Speller::correct(char * line, int nbest,
}
}
}
}
}
}
//cache[first_input].clear();
return
correction_queue
;
return
correction_queue
;
}
}
...
@@ -1031,12 +1035,16 @@ void Speller::set_limiting_behaviour(int nbest, Weight maxweight, Weight beam)
...
@@ -1031,12 +1035,16 @@ void Speller::set_limiting_behaviour(int nbest, Weight maxweight, Weight beam)
limiting
=
Nbest
;
limiting
=
Nbest
;
}
else
if
(
maxweight
<
0.0
&&
nbest
==
0
&&
beam
>=
0.0
)
{
}
else
if
(
maxweight
<
0.0
&&
nbest
==
0
&&
beam
>=
0.0
)
{
limiting
=
Beam
;
limiting
=
Beam
;
}
else
{
return
;
}
}
}
}
void
Speller
::
adjust_weight_limits
(
int
nbest
,
Weight
beam
)
void
Speller
::
adjust_weight_limits
(
int
nbest
,
Weight
beam
)
{
{
if
(
limiting
==
Nbest
&&
nbest_queue
.
size
()
>=
nbest
)
{
if
(
limiting
==
MaxWeight
)
{
return
;
}
else
if
(
limiting
==
Nbest
&&
nbest_queue
.
size
()
>=
nbest
)
{
limit
=
nbest_queue
.
get_highest
();
limit
=
nbest_queue
.
get_highest
();
}
else
if
(
limiting
==
MaxWeightNbest
&&
nbest_queue
.
size
()
>=
nbest
)
{
}
else
if
(
limiting
==
MaxWeightNbest
&&
nbest_queue
.
size
()
>=
nbest
)
{
limit
=
std
::
min
(
limit
,
nbest_queue
.
get_lowest
());
limit
=
std
::
min
(
limit
,
nbest_queue
.
get_lowest
());
...
@@ -1201,7 +1209,7 @@ void Speller::add_symbol_to_alphabet_translator(SymbolNumber to_sym)
...
@@ -1201,7 +1209,7 @@ void Speller::add_symbol_to_alphabet_translator(SymbolNumber to_sym)
}
}
}
// namespace hfst_ospell
}
// namespace hfst_ospell
char
*
char
*
hfst_strndup
(
const
char
*
s
,
size_t
n
)
hfst_strndup
(
const
char
*
s
,
size_t
n
)
{
{
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment