1#pragma once
2
3#include <string>
4#include <cassert>
5#include <cstring>
6
7namespace duckdb {
8
9enum class UnicodeType {INVALID, ASCII, UNICODE};
10
11
12class Utf8Proc {
13public:
14 static UnicodeType Analyze(const char* s) {
15 return Analyze(s, std::strlen(s));
16 }
17
18 static UnicodeType Analyze(std::string s) {
19 return Analyze(s.c_str(), s.size());
20 }
21
22 static UnicodeType Analyze(const char *s, size_t len);
23
24
25 static std::string Normalize(std::string s);
26
27 static char* Normalize(const char* s);
28
29 //! Returns whether or not the UTF8 string is valid
30 static bool IsValid(const char *s, size_t len);
31 //! Returns the position (in bytes) of the next grapheme cluster
32 static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos);
33 //! Returns the position (in bytes) of the previous grapheme cluster
34 static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos);
35};
36
37}
38