| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ******************************************************************************* | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2008-2011, International Business Machines | 
|---|
| 7 | *   Corporation, Google and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ******************************************************************************* | 
|---|
| 10 | */ | 
|---|
| 11 | /* | 
|---|
| 12 | * Author : eldawy@google.com (Mohamed Eldawy) | 
|---|
| 13 | * ucnvsel.h | 
|---|
| 14 | * | 
|---|
| 15 | * Purpose: To generate a list of encodings capable of handling | 
|---|
| 16 | * a given Unicode text | 
|---|
| 17 | * | 
|---|
| 18 | * Started 09-April-2008 | 
|---|
| 19 | */ | 
|---|
| 20 |  | 
|---|
| 21 | #ifndef __ICU_UCNV_SEL_H__ | 
|---|
| 22 | #define __ICU_UCNV_SEL_H__ | 
|---|
| 23 |  | 
|---|
| 24 | #include "unicode/utypes.h" | 
|---|
| 25 |  | 
|---|
| 26 | #if !UCONFIG_NO_CONVERSION | 
|---|
| 27 |  | 
|---|
| 28 | #include "unicode/uset.h" | 
|---|
| 29 | #include "unicode/utf16.h" | 
|---|
| 30 | #include "unicode/uenum.h" | 
|---|
| 31 | #include "unicode/ucnv.h" | 
|---|
| 32 | #include "unicode/localpointer.h" | 
|---|
| 33 |  | 
|---|
| 34 | /** | 
|---|
| 35 | * \file | 
|---|
| 36 | * | 
|---|
| 37 | * A converter selector is built with a set of encoding/charset names | 
|---|
| 38 | * and given an input string returns the set of names of the | 
|---|
| 39 | * corresponding converters which can convert the string. | 
|---|
| 40 | * | 
|---|
| 41 | * A converter selector can be serialized into a buffer and reopened | 
|---|
| 42 | * from the serialized form. | 
|---|
| 43 | */ | 
|---|
| 44 |  | 
|---|
| 45 | /** | 
|---|
| 46 | * @{ | 
|---|
| 47 | * The selector data structure | 
|---|
| 48 | */ | 
|---|
| 49 | struct UConverterSelector; | 
|---|
| 50 | typedef struct UConverterSelector UConverterSelector; | 
|---|
| 51 | /** @} */ | 
|---|
| 52 |  | 
|---|
| 53 | /** | 
|---|
| 54 | * Open a selector. | 
|---|
| 55 | * If converterListSize is 0, build for all available converters. | 
|---|
| 56 | * If excludedCodePoints is NULL, don't exclude any code points. | 
|---|
| 57 | * | 
|---|
| 58 | * @param converterList a pointer to encoding names needed to be involved. | 
|---|
| 59 | *                      Can be NULL if converterListSize==0. | 
|---|
| 60 | *                      The list and the names will be cloned, and the caller | 
|---|
| 61 | *                      retains ownership of the original. | 
|---|
| 62 | * @param converterListSize number of encodings in above list. | 
|---|
| 63 | *                          If 0, builds a selector for all available converters. | 
|---|
| 64 | * @param excludedCodePoints a set of code points to be excluded from consideration. | 
|---|
| 65 | *                           That is, excluded code points in a string do not change | 
|---|
| 66 | *                           the selection result. (They might be handled by a callback.) | 
|---|
| 67 | *                           Use NULL to exclude nothing. | 
|---|
| 68 | * @param whichSet what converter set to use? Use this to determine whether | 
|---|
| 69 | *                 to consider only roundtrip mappings or also fallbacks. | 
|---|
| 70 | * @param status an in/out ICU UErrorCode | 
|---|
| 71 | * @return the new selector | 
|---|
| 72 | * | 
|---|
| 73 | * @stable ICU 4.2 | 
|---|
| 74 | */ | 
|---|
| 75 | U_STABLE UConverterSelector* U_EXPORT2 | 
|---|
| 76 | ucnvsel_open(const char* const*  converterList, int32_t converterListSize, | 
|---|
| 77 | const USet* excludedCodePoints, | 
|---|
| 78 | const UConverterUnicodeSet whichSet, UErrorCode* status); | 
|---|
| 79 |  | 
|---|
| 80 | /** | 
|---|
| 81 | * Closes a selector. | 
|---|
| 82 | * If any Enumerations were returned by ucnv_select*, they become invalid. | 
|---|
| 83 | * They can be closed before or after calling ucnv_closeSelector, | 
|---|
| 84 | * but should never be used after the selector is closed. | 
|---|
| 85 | * | 
|---|
| 86 | * @see ucnv_selectForString | 
|---|
| 87 | * @see ucnv_selectForUTF8 | 
|---|
| 88 | * | 
|---|
| 89 | * @param sel selector to close | 
|---|
| 90 | * | 
|---|
| 91 | * @stable ICU 4.2 | 
|---|
| 92 | */ | 
|---|
| 93 | U_STABLE void U_EXPORT2 | 
|---|
| 94 | ucnvsel_close(UConverterSelector *sel); | 
|---|
| 95 |  | 
|---|
| 96 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 97 |  | 
|---|
| 98 | U_NAMESPACE_BEGIN | 
|---|
| 99 |  | 
|---|
| 100 | /** | 
|---|
| 101 | * \class LocalUConverterSelectorPointer | 
|---|
| 102 | * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close(). | 
|---|
| 103 | * For most methods see the LocalPointerBase base class. | 
|---|
| 104 | * | 
|---|
| 105 | * @see LocalPointerBase | 
|---|
| 106 | * @see LocalPointer | 
|---|
| 107 | * @stable ICU 4.4 | 
|---|
| 108 | */ | 
|---|
| 109 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close); | 
|---|
| 110 |  | 
|---|
| 111 | U_NAMESPACE_END | 
|---|
| 112 |  | 
|---|
| 113 | #endif | 
|---|
| 114 |  | 
|---|
| 115 | /** | 
|---|
| 116 | * Open a selector from its serialized form. | 
|---|
| 117 | * The buffer must remain valid and unchanged for the lifetime of the selector. | 
|---|
| 118 | * This is much faster than creating a selector from scratch. | 
|---|
| 119 | * Using a serialized form from a different machine (endianness/charset) is supported. | 
|---|
| 120 | * | 
|---|
| 121 | * @param buffer pointer to the serialized form of a converter selector; | 
|---|
| 122 | *               must be 32-bit-aligned | 
|---|
| 123 | * @param length the capacity of this buffer (can be equal to or larger than | 
|---|
| 124 | *               the actual data length) | 
|---|
| 125 | * @param status an in/out ICU UErrorCode | 
|---|
| 126 | * @return the new selector | 
|---|
| 127 | * | 
|---|
| 128 | * @stable ICU 4.2 | 
|---|
| 129 | */ | 
|---|
| 130 | U_STABLE UConverterSelector* U_EXPORT2 | 
|---|
| 131 | ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status); | 
|---|
| 132 |  | 
|---|
| 133 | /** | 
|---|
| 134 | * Serialize a selector into a linear buffer. | 
|---|
| 135 | * The serialized form is portable to different machines. | 
|---|
| 136 | * | 
|---|
| 137 | * @param sel selector to consider | 
|---|
| 138 | * @param buffer pointer to 32-bit-aligned memory to be filled with the | 
|---|
| 139 | *               serialized form of this converter selector | 
|---|
| 140 | * @param bufferCapacity the capacity of this buffer | 
|---|
| 141 | * @param status an in/out ICU UErrorCode | 
|---|
| 142 | * @return the required buffer capacity to hold serialize data (even if the call fails | 
|---|
| 143 | *         with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) | 
|---|
| 144 | * | 
|---|
| 145 | * @stable ICU 4.2 | 
|---|
| 146 | */ | 
|---|
| 147 | U_STABLE int32_t U_EXPORT2 | 
|---|
| 148 | ucnvsel_serialize(const UConverterSelector* sel, | 
|---|
| 149 | void* buffer, int32_t bufferCapacity, UErrorCode* status); | 
|---|
| 150 |  | 
|---|
| 151 | /** | 
|---|
| 152 | * Select converters that can map all characters in a UTF-16 string, | 
|---|
| 153 | * ignoring the excluded code points. | 
|---|
| 154 | * | 
|---|
| 155 | * @param sel a selector | 
|---|
| 156 | * @param s UTF-16 string | 
|---|
| 157 | * @param length length of the string, or -1 if NUL-terminated | 
|---|
| 158 | * @param status an in/out ICU UErrorCode | 
|---|
| 159 | * @return an enumeration containing encoding names. | 
|---|
| 160 | *         The returned encoding names and their order will be the same as | 
|---|
| 161 | *         supplied when building the selector. | 
|---|
| 162 | * | 
|---|
| 163 | * @stable ICU 4.2 | 
|---|
| 164 | */ | 
|---|
| 165 | U_STABLE UEnumeration * U_EXPORT2 | 
|---|
| 166 | ucnvsel_selectForString(const UConverterSelector* sel, | 
|---|
| 167 | const UChar *s, int32_t length, UErrorCode *status); | 
|---|
| 168 |  | 
|---|
| 169 | /** | 
|---|
| 170 | * Select converters that can map all characters in a UTF-8 string, | 
|---|
| 171 | * ignoring the excluded code points. | 
|---|
| 172 | * | 
|---|
| 173 | * @param sel a selector | 
|---|
| 174 | * @param s UTF-8 string | 
|---|
| 175 | * @param length length of the string, or -1 if NUL-terminated | 
|---|
| 176 | * @param status an in/out ICU UErrorCode | 
|---|
| 177 | * @return an enumeration containing encoding names. | 
|---|
| 178 | *         The returned encoding names and their order will be the same as | 
|---|
| 179 | *         supplied when building the selector. | 
|---|
| 180 | * | 
|---|
| 181 | * @stable ICU 4.2 | 
|---|
| 182 | */ | 
|---|
| 183 | U_STABLE UEnumeration * U_EXPORT2 | 
|---|
| 184 | ucnvsel_selectForUTF8(const UConverterSelector* sel, | 
|---|
| 185 | const char *s, int32_t length, UErrorCode *status); | 
|---|
| 186 |  | 
|---|
| 187 | #endif  /* !UCONFIG_NO_CONVERSION */ | 
|---|
| 188 |  | 
|---|
| 189 | #endif  /* __ICU_UCNV_SEL_H__ */ | 
|---|
| 190 |  | 
|---|