1/*
2 * Copyright (c) 2015, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Character class in UTF-8 mode.
31 */
32
33#ifndef UTF8_COMPONENT_CLASS_H
34#define UTF8_COMPONENT_CLASS_H
35
36#include "ComponentClass.h"
37#include "ue2common.h"
38#include "util/unicode_set.h"
39
40#include <map>
41#include <set>
42#include <string>
43#include <vector>
44
45namespace ue2 {
46
47class UTF8ComponentClass : public ComponentClass {
48 friend class DumpVisitor;
49 friend class PrintVisitor;
50 friend class CaselessVisitor;
51 friend class SimplifyVisitor;
52 friend class SimplifyCandidatesVisitor;
53public:
54 explicit UTF8ComponentClass(const ParseMode &mode);
55 ~UTF8ComponentClass() override {}
56 UTF8ComponentClass *clone() const override;
57
58 Component *accept(ComponentVisitor &v) override {
59 Component *c = v.visit(this);
60 v.post(this);
61 return c;
62 }
63
64 void accept(ConstComponentVisitor &v) const override {
65 v.pre(*this);
66 v.during(*this);
67 v.post(*this);
68 }
69
70 bool class_empty(void) const override;
71 void add(PredefinedClass c, bool negative) override;
72 void add(unichar c) override;
73 void finalize(void) override;
74 void notePositions(GlushkovBuildState &bs) override;
75 void buildFollowSet(GlushkovBuildState &bs,
76 const std::vector<PositionInfo> &) override;
77 std::vector<PositionInfo> first(void) const override;
78 std::vector<PositionInfo> last(void) const override;
79
80protected:
81 void createRange(unichar to) override;
82
83private:
84 Position getHead(NFABuilder &builder, u8 first_byte);
85 void addToTail(GlushkovBuildState &bs, std::map<Position, Position> &finals,
86 Position prev, unichar b, unichar e);
87 void ensureDotTrailer(GlushkovBuildState &bs);
88 void ensureTwoDotTrailer(GlushkovBuildState &bs);
89 void ensureThreeDotTrailer(GlushkovBuildState &bs);
90 void buildOneByte(GlushkovBuildState &bs);
91 void buildTwoByte(GlushkovBuildState &bs);
92 void buildThreeByte(GlushkovBuildState &bs);
93 void buildFourByte(GlushkovBuildState &bs);
94
95 CodePointSet cps;
96
97 std::map<u8, Position> heads;
98 Position single_pos;
99 Position one_dot_trailer;
100 Position two_dot_trailer;
101 Position three_dot_trailer;
102
103 Position two_char_dot_head;
104 Position three_char_dot_head;
105 Position four_char_dot_head;
106 std::set<Position> tails;
107};
108
109PredefinedClass translateForUcpMode(PredefinedClass in, const ParseMode &mode);
110
111CodePointSet getPredefinedCodePointSet(PredefinedClass c,
112 const ParseMode &mode);
113
114} // namespace
115
116#endif // UTF8_COMPONENT_CLASS_H
117