1// © 2021 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#ifndef LSTMBE_H
5#define LSTMBE_H
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_BREAK_ITERATION
10
11#include "unicode/uniset.h"
12#include "unicode/ures.h"
13#include "unicode/utext.h"
14#include "unicode/utypes.h"
15
16#include "brkeng.h"
17#include "dictbe.h"
18#include "uvectr32.h"
19
20U_NAMESPACE_BEGIN
21
22class Vectorizer;
23struct LSTMData;
24
25/*******************************************************************
26 * LSTMBreakEngine
27 */
28
29/**
30 * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a
31 * LSTM to determine language-specific breaks.</p>
32 *
33 * <p>After it is constructed a LSTMBreakEngine may be shared between
34 * threads without synchronization.</p>
35 */
36class LSTMBreakEngine : public DictionaryBreakEngine {
37public:
38 /**
39 * <p>Constructor.</p>
40 */
41 LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status);
42
43 /**
44 * <p>Virtual destructor.</p>
45 */
46 virtual ~LSTMBreakEngine();
47
48 virtual const char16_t* name() const;
49
50protected:
51 /**
52 * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
53 *
54 * @param text A UText representing the text
55 * @param rangeStart The start of the range of dictionary characters
56 * @param rangeEnd The end of the range of dictionary characters
57 * @param foundBreaks Output of C array of int32_t break positions, or 0
58 * @param status Information on any errors encountered.
59 * @return The number of breaks found
60 */
61 virtual int32_t divideUpDictionaryRange(UText *text,
62 int32_t rangeStart,
63 int32_t rangeEnd,
64 UVector32 &foundBreaks,
65 UBool isPhraseBreaking,
66 UErrorCode& status) const override;
67private:
68 const LSTMData* fData;
69 const Vectorizer* fVectorizer;
70};
71
72U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine(
73 UScriptCode script, const LSTMData* data, UErrorCode& status);
74
75U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(
76 UResourceBundle* rb, UErrorCode& status);
77
78U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(
79 UScriptCode script, UErrorCode& status);
80
81U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data);
82U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data);
83
84U_NAMESPACE_END
85
86#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
87
88#endif /* LSTMBE_H */
89