regexp_assembler.cc source code [engine/third_party/dart/runtime/vm/regexp_assembler.cc]

1	// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2	// for details. All rights reserved. Use of this source code is governed by a
3	// BSD-style license that can be found in the LICENSE file.
4
5	#include "vm/regexp_assembler.h"
6
7	#include "unicode/uchar.h"
8
9	#include "platform/unicode.h"
10
11	#include "vm/flags.h"
12	#include "vm/regexp.h"
13	#include "vm/runtime_entry.h"
14	#include "vm/unibrow-inl.h"
15
16	namespace dart {
17
18	void PrintUtf16(uint16_t c) {
19	const char* format =
20	(`0x20` <= c && c <= `0x7F`) ? "%c" : (c <= `0xff`) ? "\\x%02x" : "\\u%04x";
21	OS::PrintErr(format, c);
22	}
23
24	uword /BoolPtr/ CaseInsensitiveCompareUCS2(uword /StringPtr/ str_raw,
25	uword /SmiPtr/ lhs_index_raw,
26	uword /SmiPtr/ rhs_index_raw,
27	uword /SmiPtr/ length_raw) {
28	const String& str = String::Handle(static_cast<StringPtr>(str_raw));
29	const Smi& lhs_index = Smi::Handle(static_cast<SmiPtr>(lhs_index_raw));
30	const Smi& rhs_index = Smi::Handle(static_cast<SmiPtr>(rhs_index_raw));
31	const Smi& length = Smi::Handle(static_cast<SmiPtr>(length_raw));
32
33	// TODO(zerny): Optimize as single instance. V8 has this as an
34	// isolate member.
35	unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
36
37	for (intptr_t i = `0`; i < length.Value(); i++) {
38	int32_t c1 = str.CharAt(lhs_index.Value() + i);
39	int32_t c2 = str.CharAt(rhs_index.Value() + i);
40	if (c1 != c2) {
41	int32_t s1[`1`] = {c1};
42	canonicalize.get(c1, `'\0'`, s1);
43	if (s1[`0`] != c2) {
44	int32_t s2[`1`] = {c2};
45	canonicalize.get(c2, `'\0'`, s2);
46	if (s1[`0`] != s2[`0`]) {
47	return static_cast<uword>(Bool::False().raw());
48	}
49	}
50	}
51	}
52	return static_cast<uword>(Bool::True().raw());
53	}
54
55	uword /BoolPtr/ CaseInsensitiveCompareUTF16(uword /StringPtr/ str_raw,
56	uword /SmiPtr/ lhs_index_raw,
57	uword /SmiPtr/ rhs_index_raw,
58	uword /SmiPtr/ length_raw) {
59	const String& str = String::Handle(static_cast<StringPtr>(str_raw));
60	const Smi& lhs_index = Smi::Handle(static_cast<SmiPtr>(lhs_index_raw));
61	const Smi& rhs_index = Smi::Handle(static_cast<SmiPtr>(rhs_index_raw));
62	const Smi& length = Smi::Handle(static_cast<SmiPtr>(length_raw));
63
64	for (intptr_t i = `0`; i < length.Value(); i++) {
65	int32_t c1 = str.CharAt(lhs_index.Value() + i);
66	int32_t c2 = str.CharAt(rhs_index.Value() + i);
67	if (Utf16::IsLeadSurrogate(c1)) {
68	// Non-BMP characters do not have case-equivalents in the BMP.
69	// Both have to be non-BMP for them to be able to match.
70	if (!Utf16::IsLeadSurrogate(c2))
71	return static_cast<uword>(Bool::False().raw());
72	if (i + `1` < length.Value()) {
73	uint16_t c1t = str.CharAt(lhs_index.Value() + i + `1`);
74	uint16_t c2t = str.CharAt(rhs_index.Value() + i + `1`);
75	if (Utf16::IsTrailSurrogate(c1t) && Utf16::IsTrailSurrogate(c2t)) {
76	c1 = Utf16::Decode(c1, c1t);
77	c2 = Utf16::Decode(c2, c2t);
78	i++;
79	}
80	}
81	}
82	c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
83	c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
84	if (c1 != c2) return static_cast<uword>(Bool::False().raw());
85	}
86	return static_cast<uword>(Bool::True().raw());
87	}
88
89	DEFINE_RAW_LEAF_RUNTIME_ENTRY(
90	CaseInsensitiveCompareUCS2,
91	`4`,
92	false / is_float /,
93	reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUCS2));
94
95	DEFINE_RAW_LEAF_RUNTIME_ENTRY(
96	CaseInsensitiveCompareUTF16,
97	`4`,
98	false / is_float /,
99	reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUTF16));
100
101	BlockLabel::BlockLabel() {
102	#if !defined(DART_PRECOMPILED_RUNTIME)
103	if (!FLAG_interpret_irregexp) {
104	// Only needed by the compiled IR backend.
105	block_ =
106	new JoinEntryInstr (-`1`, -`1`, CompilerState::Current().GetNextDeoptId());
107	}
108	#endif
109	}
110
111	RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
112	: slow_safe_compiler_(false), global_mode_(NOT_GLOBAL), zone_(zone) {}
113
114	RegExpMacroAssembler::~RegExpMacroAssembler() {}
115
116	void RegExpMacroAssembler::CheckNotInSurrogatePair(intptr_t cp_offset,
117	BlockLabel* on_failure) {
118	BlockLabel ok;
119	// Check that current character is not a trail surrogate.
120	LoadCurrentCharacter(cp_offset, &ok);
121	CheckCharacterNotInRange(Utf16::kTrailSurrogateStart,
122	Utf16::kTrailSurrogateEnd, &ok);
123	// Check that previous character is not a lead surrogate.
124	LoadCurrentCharacter(cp_offset - `1`, &ok);
125	CheckCharacterInRange(Utf16::kLeadSurrogateStart, Utf16::kLeadSurrogateEnd,
126	on_failure);
127	BindBlock(&ok);
128	}
129
130	} // namespace dart
131

Browse the source code of engine/third_party/dart/runtime/vm/regexp_assembler.cc