1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * |
9 | * File DTPTNGEN.H |
10 | * |
11 | ******************************************************************************* |
12 | */ |
13 | |
14 | #ifndef __DTPTNGEN_IMPL_H__ |
15 | #define __DTPTNGEN_IMPL_H__ |
16 | |
17 | #include "unicode/udatpg.h" |
18 | |
19 | #include "unicode/strenum.h" |
20 | #include "unicode/unistr.h" |
21 | #include "uvector.h" |
22 | |
23 | // TODO(claireho): Split off Builder class. |
24 | // TODO(claireho): If splitting off Builder class: As subclass or independent? |
25 | |
26 | #define MAX_PATTERN_ENTRIES 52 |
27 | #define MAX_CLDR_FIELD_LEN 60 |
28 | #define MAX_DT_TOKEN 50 |
29 | #define MAX_RESOURCE_FIELD 12 |
30 | #define MAX_AVAILABLE_FORMATS 12 |
31 | #define NONE 0 |
32 | #define 0x10000 |
33 | #define MISSING_FIELD 0x1000 |
34 | #define MAX_STRING_ENUMERATION 200 |
35 | #define SINGLE_QUOTE ((UChar)0x0027) |
36 | #define FORWARDSLASH ((UChar)0x002F) |
37 | #define BACKSLASH ((UChar)0x005C) |
38 | #define SPACE ((UChar)0x0020) |
39 | #define QUOTATION_MARK ((UChar)0x0022) |
40 | #define ASTERISK ((UChar)0x002A) |
41 | #define PLUSSITN ((UChar)0x002B) |
42 | #define COMMA ((UChar)0x002C) |
43 | #define HYPHEN ((UChar)0x002D) |
44 | #define DOT ((UChar)0x002E) |
45 | #define COLON ((UChar)0x003A) |
46 | #define CAP_A ((UChar)0x0041) |
47 | #define CAP_B ((UChar)0x0042) |
48 | #define CAP_C ((UChar)0x0043) |
49 | #define CAP_D ((UChar)0x0044) |
50 | #define CAP_E ((UChar)0x0045) |
51 | #define CAP_F ((UChar)0x0046) |
52 | #define CAP_G ((UChar)0x0047) |
53 | #define CAP_H ((UChar)0x0048) |
54 | #define CAP_J ((UChar)0x004A) |
55 | #define CAP_K ((UChar)0x004B) |
56 | #define CAP_L ((UChar)0x004C) |
57 | #define CAP_M ((UChar)0x004D) |
58 | #define CAP_O ((UChar)0x004F) |
59 | #define CAP_Q ((UChar)0x0051) |
60 | #define CAP_S ((UChar)0x0053) |
61 | #define CAP_T ((UChar)0x0054) |
62 | #define CAP_U ((UChar)0x0055) |
63 | #define CAP_V ((UChar)0x0056) |
64 | #define CAP_W ((UChar)0x0057) |
65 | #define CAP_X ((UChar)0x0058) |
66 | #define CAP_Y ((UChar)0x0059) |
67 | #define CAP_Z ((UChar)0x005A) |
68 | #define LOWLINE ((UChar)0x005F) |
69 | #define LOW_A ((UChar)0x0061) |
70 | #define LOW_B ((UChar)0x0062) |
71 | #define LOW_C ((UChar)0x0063) |
72 | #define LOW_D ((UChar)0x0064) |
73 | #define LOW_E ((UChar)0x0065) |
74 | #define LOW_F ((UChar)0x0066) |
75 | #define LOW_G ((UChar)0x0067) |
76 | #define LOW_H ((UChar)0x0068) |
77 | #define LOW_I ((UChar)0x0069) |
78 | #define LOW_J ((UChar)0x006A) |
79 | #define LOW_K ((UChar)0x006B) |
80 | #define LOW_L ((UChar)0x006C) |
81 | #define LOW_M ((UChar)0x006D) |
82 | #define LOW_N ((UChar)0x006E) |
83 | #define LOW_O ((UChar)0x006F) |
84 | #define LOW_P ((UChar)0x0070) |
85 | #define LOW_Q ((UChar)0x0071) |
86 | #define LOW_R ((UChar)0x0072) |
87 | #define LOW_S ((UChar)0x0073) |
88 | #define LOW_T ((UChar)0x0074) |
89 | #define LOW_U ((UChar)0x0075) |
90 | #define LOW_V ((UChar)0x0076) |
91 | #define LOW_W ((UChar)0x0077) |
92 | #define LOW_X ((UChar)0x0078) |
93 | #define LOW_Y ((UChar)0x0079) |
94 | #define LOW_Z ((UChar)0x007A) |
95 | #define DT_NARROW -0x101 |
96 | #define DT_SHORTER -0x102 |
97 | #define DT_SHORT -0x103 |
98 | #define DT_LONG -0x104 |
99 | #define DT_NUMERIC 0x100 |
100 | #define DT_DELTA 0x10 |
101 | |
102 | U_NAMESPACE_BEGIN |
103 | |
104 | const int32_t UDATPG_FRACTIONAL_MASK = 1<<UDATPG_FRACTIONAL_SECOND_FIELD; |
105 | const int32_t UDATPG_SECOND_AND_FRACTIONAL_MASK = (1<<UDATPG_SECOND_FIELD) | (1<<UDATPG_FRACTIONAL_SECOND_FIELD); |
106 | |
107 | typedef enum dtStrEnum { |
108 | DT_BASESKELETON, |
109 | DT_SKELETON, |
110 | DT_PATTERN |
111 | }dtStrEnum; |
112 | |
113 | typedef struct dtTypeElem { |
114 | UChar patternChar; |
115 | UDateTimePatternField field; |
116 | int16_t type; |
117 | int16_t minLen; |
118 | int16_t weight; |
119 | } dtTypeElem; |
120 | |
121 | // A compact storage mechanism for skeleton field strings. Several dozen of these will be created |
122 | // for a typical DateTimePatternGenerator instance. |
123 | class SkeletonFields : public UMemory { |
124 | public: |
125 | SkeletonFields(); |
126 | void clear(); |
127 | void copyFrom(const SkeletonFields& other); |
128 | void clearField(int32_t field); |
129 | UChar getFieldChar(int32_t field) const; |
130 | int32_t getFieldLength(int32_t field) const; |
131 | void populate(int32_t field, const UnicodeString& value); |
132 | void populate(int32_t field, UChar repeatChar, int32_t repeatCount); |
133 | UBool isFieldEmpty(int32_t field) const; |
134 | UnicodeString& appendTo(UnicodeString& string) const; |
135 | UnicodeString& appendFieldTo(int32_t field, UnicodeString& string) const; |
136 | UChar getFirstChar() const; |
137 | inline UBool operator==(const SkeletonFields& other) const; |
138 | inline UBool operator!=(const SkeletonFields& other) const; |
139 | |
140 | private: |
141 | int8_t chars[UDATPG_FIELD_COUNT]; |
142 | int8_t lengths[UDATPG_FIELD_COUNT]; |
143 | }; |
144 | |
145 | inline UBool SkeletonFields::operator==(const SkeletonFields& other) const { |
146 | return (uprv_memcmp(chars, other.chars, sizeof(chars)) == 0 |
147 | && uprv_memcmp(lengths, other.lengths, sizeof(lengths)) == 0); |
148 | } |
149 | |
150 | inline UBool SkeletonFields::operator!=(const SkeletonFields& other) const { |
151 | return (! operator==(other)); |
152 | } |
153 | |
154 | class PtnSkeleton : public UMemory { |
155 | public: |
156 | int32_t type[UDATPG_FIELD_COUNT]; |
157 | SkeletonFields original; |
158 | SkeletonFields baseOriginal; |
159 | UBool addedDefaultDayPeriod; |
160 | |
161 | PtnSkeleton(); |
162 | PtnSkeleton(const PtnSkeleton& other); |
163 | void copyFrom(const PtnSkeleton& other); |
164 | void clear(); |
165 | UBool equals(const PtnSkeleton& other) const; |
166 | UnicodeString getSkeleton() const; |
167 | UnicodeString getBaseSkeleton() const; |
168 | UChar getFirstChar() const; |
169 | |
170 | // TODO: Why is this virtual, as well as the other destructors in this file? We don't want |
171 | // vtables when we don't use class objects polymorphically. |
172 | virtual ~PtnSkeleton(); |
173 | }; |
174 | |
175 | class PtnElem : public UMemory { |
176 | public: |
177 | UnicodeString basePattern; |
178 | LocalPointer<PtnSkeleton> skeleton; |
179 | UnicodeString pattern; |
180 | UBool skeletonWasSpecified; // if specified in availableFormats, not derived |
181 | LocalPointer<PtnElem> next; |
182 | |
183 | PtnElem(const UnicodeString &basePattern, const UnicodeString &pattern); |
184 | virtual ~PtnElem(); |
185 | }; |
186 | |
187 | class FormatParser : public UMemory { |
188 | public: |
189 | UnicodeString items[MAX_DT_TOKEN]; |
190 | int32_t itemNumber; |
191 | |
192 | FormatParser(); |
193 | virtual ~FormatParser(); |
194 | void set(const UnicodeString& patternString); |
195 | void getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex); |
196 | UBool isPatternSeparator(const UnicodeString& field) const; |
197 | static UBool isQuoteLiteral(const UnicodeString& s); |
198 | static int32_t getCanonicalIndex(const UnicodeString& s) { return getCanonicalIndex(s, TRUE); } |
199 | static int32_t getCanonicalIndex(const UnicodeString& s, UBool strict); |
200 | |
201 | private: |
202 | typedef enum TokenStatus { |
203 | START, |
204 | ADD_TOKEN, |
205 | SYNTAX_ERROR, |
206 | DONE |
207 | } TokenStatus; |
208 | |
209 | TokenStatus status; |
210 | virtual TokenStatus setTokens(const UnicodeString& pattern, int32_t startPos, int32_t *len); |
211 | }; |
212 | |
213 | class DistanceInfo : public UMemory { |
214 | public: |
215 | int32_t missingFieldMask; |
216 | int32_t ; |
217 | |
218 | DistanceInfo() {} |
219 | virtual ~DistanceInfo(); |
220 | void clear() { missingFieldMask = extraFieldMask = 0; } |
221 | void setTo(const DistanceInfo& other); |
222 | void addMissing(int32_t field) { missingFieldMask |= (1<<field); } |
223 | void (int32_t field) { extraFieldMask |= (1<<field); } |
224 | }; |
225 | |
226 | class DateTimeMatcher: public UMemory { |
227 | public: |
228 | PtnSkeleton skeleton; |
229 | |
230 | void getBasePattern(UnicodeString& basePattern); |
231 | UnicodeString getPattern(); |
232 | void set(const UnicodeString& pattern, FormatParser* fp); |
233 | void set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton& skeleton); |
234 | void copyFrom(const PtnSkeleton& skeleton); |
235 | void copyFrom(); |
236 | PtnSkeleton* getSkeletonPtr(); |
237 | UBool equals(const DateTimeMatcher* other) const; |
238 | int32_t getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) const; |
239 | DateTimeMatcher(); |
240 | DateTimeMatcher(const DateTimeMatcher& other); |
241 | virtual ~DateTimeMatcher(); |
242 | int32_t getFieldMask() const; |
243 | }; |
244 | |
245 | class PatternMap : public UMemory { |
246 | public: |
247 | PtnElem *boot[MAX_PATTERN_ENTRIES]; |
248 | PatternMap(); |
249 | virtual ~PatternMap(); |
250 | void add(const UnicodeString& basePattern, const PtnSkeleton& skeleton, const UnicodeString& value, UBool skeletonWasSpecified, UErrorCode& status); |
251 | const UnicodeString* getPatternFromBasePattern(const UnicodeString& basePattern, UBool& skeletonWasSpecified) const; |
252 | const UnicodeString* getPatternFromSkeleton(const PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr = 0) const; |
253 | void copyFrom(const PatternMap& other, UErrorCode& status); |
254 | PtnElem* (UChar baseChar) const; |
255 | UBool equals(const PatternMap& other) const; |
256 | private: |
257 | UBool isDupAllowed; |
258 | PtnElem* getDuplicateElem(const UnicodeString& basePattern, const PtnSkeleton& skeleton, PtnElem *baseElem); |
259 | }; // end PatternMap |
260 | |
261 | class PatternMapIterator : public UMemory { |
262 | public: |
263 | PatternMapIterator(UErrorCode &status); |
264 | virtual ~PatternMapIterator(); |
265 | void set(PatternMap& patternMap); |
266 | PtnSkeleton* getSkeleton() const; |
267 | UBool hasNext() const; |
268 | DateTimeMatcher& next(); |
269 | private: |
270 | int32_t bootIndex; |
271 | PtnElem *nodePtr; |
272 | LocalPointer<DateTimeMatcher> matcher; |
273 | PatternMap *patternMap; |
274 | }; |
275 | |
276 | class DTSkeletonEnumeration : public StringEnumeration { |
277 | public: |
278 | DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status); |
279 | virtual ~DTSkeletonEnumeration(); |
280 | static UClassID U_EXPORT2 getStaticClassID(void); |
281 | virtual UClassID getDynamicClassID(void) const; |
282 | virtual const UnicodeString* snext(UErrorCode& status); |
283 | virtual void reset(UErrorCode& status); |
284 | virtual int32_t count(UErrorCode& status) const; |
285 | private: |
286 | int32_t pos; |
287 | UBool isCanonicalItem(const UnicodeString& item); |
288 | LocalPointer<UVector> fSkeletons; |
289 | }; |
290 | |
291 | class DTRedundantEnumeration : public StringEnumeration { |
292 | public: |
293 | DTRedundantEnumeration(); |
294 | virtual ~DTRedundantEnumeration(); |
295 | static UClassID U_EXPORT2 getStaticClassID(void); |
296 | virtual UClassID getDynamicClassID(void) const; |
297 | virtual const UnicodeString* snext(UErrorCode& status); |
298 | virtual void reset(UErrorCode& status); |
299 | virtual int32_t count(UErrorCode& status) const; |
300 | void add(const UnicodeString &pattern, UErrorCode& status); |
301 | private: |
302 | int32_t pos; |
303 | UBool isCanonicalItem(const UnicodeString& item) const; |
304 | LocalPointer<UVector> fPatterns; |
305 | }; |
306 | |
307 | U_NAMESPACE_END |
308 | |
309 | #endif |
310 | |