rbbiscan.h source code [Skia/third_party/externals/icu/source/common/rbbiscan.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	//
4	// rbbiscan.h
5	//
6	// Copyright (C) 2002-2016, International Business Machines Corporation and others.
7	// All Rights Reserved.
8	//
9	// This file contains declarations for class RBBIRuleScanner
10	//
11
12
13	#ifndef RBBISCAN_H
14	#define RBBISCAN_H
15
16	#include "unicode/utypes.h"
17	#include "unicode/uobject.h"
18	#include "unicode/rbbi.h"
19	#include "unicode/uniset.h"
20	#include "unicode/parseerr.h"
21	#include "uhash.h"
22	#include "uvector.h"
23	#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
24	// looks up references to $variables within a set.
25	#include "rbbinode.h"
26	#include "rbbirpt.h"
27
28	U_NAMESPACE_BEGIN
29
30	class RBBIRuleBuilder;
31	class RBBISymbolTable;
32
33
34	//--------------------------------------------------------------------------------
35	//
36	// class RBBIRuleScanner does the lowest level, character-at-a-time
37	// scanning of break iterator rules.
38	//
39	// The output of the scanner is parse trees for
40	// the rule expressions and a list of all Unicode Sets
41	// encountered.
42	//
43	//--------------------------------------------------------------------------------
44
45	class RBBIRuleScanner : public UMemory {
46	public:
47
48	enum {
49	kStackSize = `100` // The size of the state stack for
50	}; // rules parsing. Corresponds roughly
51	// to the depth of parentheses nesting
52	// that is allowed in the rules.
53
54	struct RBBIRuleChar {
55	UChar32 fChar;
56	UBool fEscaped;
57	RBBIRuleChar() : fChar(`0`), fEscaped(FALSE) {}
58	};
59
60	RBBIRuleScanner(RBBIRuleBuilder *rb);
61
62
63	virtual ~RBBIRuleScanner();
64
65	void nextChar(RBBIRuleChar &c); // Get the next char from the input stream.
66	// Return false if at end.
67
68	UBool push(const RBBIRuleChar &c); // Push (unget) one character.
69	// Only a single character may be pushed.
70
71	void parse(); // Parse the rules, generating two parse
72	// trees, one each for the forward and
73	// reverse rules,
74	// and a list of UnicodeSets encountered.
75
76	/**
77	* Return a rules string without unnecessary
78	* characters.
79	*/
80	static UnicodeString stripRules(const UnicodeString &rules);
81	private:
82
83	UBool doParseActions(int32_t a);
84	void error(UErrorCode e); // error reporting convenience function.
85	void fixOpStack(RBBINode::OpPrecedence p);
86	// a character.
87	void findSetFor(const UnicodeString &s, RBBINode node, UnicodeSet setToAdopt = NULL);
88
89	UChar32 nextCharLL();
90	#ifdef RBBI_DEBUG
91	void printNodeStack(const char *title);
92	#endif
93	RBBINode *pushNewNode(RBBINode::NodeType t);
94	void scanSet();
95
96
97	RBBIRuleBuilder fRB; // The rule builder that we are part of.*
98
99	int32_t fScanIndex; // Index of current character being processed
100	// in the rule input string.
101	int32_t fNextIndex; // Index of the next character, which
102	// is the first character not yet scanned.
103	UBool fQuoteMode; // Scan is in a 'quoted region'
104	int32_t fLineNum; // Line number in input file.
105	int32_t fCharNum; // Char position within the line.
106	UChar32 fLastChar; // Previous char, needed to count CR-LF
107	// as a single line, not two.
108
109	RBBIRuleChar fC; // Current char for parse state machine
110	// processing.
111	UnicodeString fVarName; // $variableName, valid when we've just
112	// scanned one.
113
114	RBBIRuleTableEl *fStateTable; // State Transition Table for RBBI Rule*
115	// parsing. index by p[state][char-class]
116
117	uint16_t fStack[kStackSize]; // State stack, holds state pushes
118	int32_t fStackPtr; // and pops as specified in the state
119	// transition rules.
120
121	RBBINode fNodeStack[kStackSize]; // Node stack, holds nodes created*
122	// during the parse of a rule
123	int32_t fNodeStackPtr;
124
125
126	UBool fReverseRule; // True if the rule currently being scanned
127	// is a reverse direction rule (if it
128	// starts with a '!')
129
130	UBool fLookAheadRule; // True if the rule includes a '/'
131	// somewhere within it.
132
133	UBool fNoChainInRule; // True if the current rule starts with a '^'.
134
135	RBBISymbolTable fSymbolTable; // symbol table, holds definitions of*
136	// $variable symbols.
137
138	UHashtable fSetTable; // UnicocodeSet hash table, holds indexes to*
139	// the sets created while parsing rules.
140	// The key is the string used for creating
141	// the set.
142
143	UnicodeSet fRuleSets[`10`]; // Unicode Sets that are needed during
144	// the scanning of RBBI rules. The
145	// indicies for these are assigned by the
146	// perl script that builds the state tables.
147	// See rbbirpt.h.
148
149	int32_t fRuleNum; // Counts each rule as it is scanned.
150
151	int32_t fOptionStart; // Input index of start of a !!option
152	// keyword, while being scanned.
153
154	UnicodeSet *gRuleSet_rule_char;
155	UnicodeSet *gRuleSet_white_space;
156	UnicodeSet *gRuleSet_name_char;
157	UnicodeSet *gRuleSet_name_start_char;
158
159	RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class
160	RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class
161	};
162
163	U_NAMESPACE_END
164
165	#endif
166

Browse the source code of Skia/third_party/externals/icu/source/common/rbbiscan.h