1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2008-2011, International Business Machines
7* Corporation, Google and others. All Rights Reserved.
8*
9*******************************************************************************
10*/
11/*
12 * Author : eldawy@google.com (Mohamed Eldawy)
13 * ucnvsel.h
14 *
15 * Purpose: To generate a list of encodings capable of handling
16 * a given Unicode text
17 *
18 * Started 09-April-2008
19 */
20
21#ifndef __ICU_UCNV_SEL_H__
22#define __ICU_UCNV_SEL_H__
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_CONVERSION
27
28#include "unicode/uset.h"
29#include "unicode/utf16.h"
30#include "unicode/uenum.h"
31#include "unicode/ucnv.h"
32
33#if U_SHOW_CPLUSPLUS_API
34#include "unicode/localpointer.h"
35#endif // U_SHOW_CPLUSPLUS_API
36
37/**
38 * \file
39 * \brief C API: Encoding/charset encoding selector
40 *
41 * A converter selector is built with a set of encoding/charset names
42 * and given an input string returns the set of names of the
43 * corresponding converters which can convert the string.
44 *
45 * A converter selector can be serialized into a buffer and reopened
46 * from the serialized form.
47 */
48
49struct UConverterSelector;
50/**
51 * @{
52 * Typedef for selector data structure.
53 */
54typedef struct UConverterSelector UConverterSelector;
55/** @} */
56
57/**
58 * Open a selector.
59 * If converterListSize is 0, build for all available converters.
60 * If excludedCodePoints is NULL, don't exclude any code points.
61 *
62 * @param converterList a pointer to encoding names needed to be involved.
63 * Can be NULL if converterListSize==0.
64 * The list and the names will be cloned, and the caller
65 * retains ownership of the original.
66 * @param converterListSize number of encodings in above list.
67 * If 0, builds a selector for all available converters.
68 * @param excludedCodePoints a set of code points to be excluded from consideration.
69 * That is, excluded code points in a string do not change
70 * the selection result. (They might be handled by a callback.)
71 * Use NULL to exclude nothing.
72 * @param whichSet what converter set to use? Use this to determine whether
73 * to consider only roundtrip mappings or also fallbacks.
74 * @param status an in/out ICU UErrorCode
75 * @return the new selector
76 *
77 * @stable ICU 4.2
78 */
79U_CAPI UConverterSelector* U_EXPORT2
80ucnvsel_open(const char* const* converterList, int32_t converterListSize,
81 const USet* excludedCodePoints,
82 const UConverterUnicodeSet whichSet, UErrorCode* status);
83
84/**
85 * Closes a selector.
86 * If any Enumerations were returned by ucnv_select*, they become invalid.
87 * They can be closed before or after calling ucnv_closeSelector,
88 * but should never be used after the selector is closed.
89 *
90 * @see ucnv_selectForString
91 * @see ucnv_selectForUTF8
92 *
93 * @param sel selector to close
94 *
95 * @stable ICU 4.2
96 */
97U_CAPI void U_EXPORT2
98ucnvsel_close(UConverterSelector *sel);
99
100#if U_SHOW_CPLUSPLUS_API
101
102U_NAMESPACE_BEGIN
103
104/**
105 * \class LocalUConverterSelectorPointer
106 * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
107 * For most methods see the LocalPointerBase base class.
108 *
109 * @see LocalPointerBase
110 * @see LocalPointer
111 * @stable ICU 4.4
112 */
113U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
114
115U_NAMESPACE_END
116
117#endif
118
119/**
120 * Open a selector from its serialized form.
121 * The buffer must remain valid and unchanged for the lifetime of the selector.
122 * This is much faster than creating a selector from scratch.
123 * Using a serialized form from a different machine (endianness/charset) is supported.
124 *
125 * @param buffer pointer to the serialized form of a converter selector;
126 * must be 32-bit-aligned
127 * @param length the capacity of this buffer (can be equal to or larger than
128 * the actual data length)
129 * @param status an in/out ICU UErrorCode
130 * @return the new selector
131 *
132 * @stable ICU 4.2
133 */
134U_CAPI UConverterSelector* U_EXPORT2
135ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
136
137/**
138 * Serialize a selector into a linear buffer.
139 * The serialized form is portable to different machines.
140 *
141 * @param sel selector to consider
142 * @param buffer pointer to 32-bit-aligned memory to be filled with the
143 * serialized form of this converter selector
144 * @param bufferCapacity the capacity of this buffer
145 * @param status an in/out ICU UErrorCode
146 * @return the required buffer capacity to hold serialize data (even if the call fails
147 * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
148 *
149 * @stable ICU 4.2
150 */
151U_CAPI int32_t U_EXPORT2
152ucnvsel_serialize(const UConverterSelector* sel,
153 void* buffer, int32_t bufferCapacity, UErrorCode* status);
154
155/**
156 * Select converters that can map all characters in a UTF-16 string,
157 * ignoring the excluded code points.
158 *
159 * @param sel a selector
160 * @param s UTF-16 string
161 * @param length length of the string, or -1 if NUL-terminated
162 * @param status an in/out ICU UErrorCode
163 * @return an enumeration containing encoding names.
164 * The returned encoding names and their order will be the same as
165 * supplied when building the selector.
166 *
167 * @stable ICU 4.2
168 */
169U_CAPI UEnumeration * U_EXPORT2
170ucnvsel_selectForString(const UConverterSelector* sel,
171 const UChar *s, int32_t length, UErrorCode *status);
172
173/**
174 * Select converters that can map all characters in a UTF-8 string,
175 * ignoring the excluded code points.
176 *
177 * @param sel a selector
178 * @param s UTF-8 string
179 * @param length length of the string, or -1 if NUL-terminated
180 * @param status an in/out ICU UErrorCode
181 * @return an enumeration containing encoding names.
182 * The returned encoding names and their order will be the same as
183 * supplied when building the selector.
184 *
185 * @stable ICU 4.2
186 */
187U_CAPI UEnumeration * U_EXPORT2
188ucnvsel_selectForUTF8(const UConverterSelector* sel,
189 const char *s, int32_t length, UErrorCode *status);
190
191#endif /* !UCONFIG_NO_CONVERSION */
192
193#endif /* __ICU_UCNV_SEL_H__ */
194