| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 2005-2008, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | */ |
| 9 | |
| 10 | #ifndef __INPUTEXT_H |
| 11 | #define __INPUTEXT_H |
| 12 | |
| 13 | /** |
| 14 | * \file |
| 15 | * \internal |
| 16 | * |
| 17 | * This is an internal header for the Character Set Detection code. The |
| 18 | * name is probably too generic... |
| 19 | */ |
| 20 | |
| 21 | |
| 22 | #include "unicode/uobject.h" |
| 23 | |
| 24 | #if !UCONFIG_NO_CONVERSION |
| 25 | |
| 26 | U_NAMESPACE_BEGIN |
| 27 | |
| 28 | class InputText : public UMemory |
| 29 | { |
| 30 | // Prevent copying |
| 31 | InputText(const InputText &); |
| 32 | public: |
| 33 | InputText(UErrorCode &status); |
| 34 | ~InputText(); |
| 35 | |
| 36 | void setText(const char *in, int32_t len); |
| 37 | void setDeclaredEncoding(const char *encoding, int32_t len); |
| 38 | UBool isSet() const; |
| 39 | void MungeInput(UBool fStripTags); |
| 40 | |
| 41 | // The text to be checked. Markup will have been |
| 42 | // removed if appropriate. |
| 43 | uint8_t *fInputBytes; |
| 44 | int32_t fInputLen; // Length of the byte data in fInputBytes. |
| 45 | // byte frequency statistics for the input text. |
| 46 | // Value is percent, not absolute. |
| 47 | // Value is rounded up, so zero really means zero occurences. |
| 48 | int16_t *fByteStats; |
| 49 | UBool fC1Bytes; // True if any bytes in the range 0x80 - 0x9F are in the input;false by default |
| 50 | char *fDeclaredEncoding; |
| 51 | |
| 52 | const uint8_t *fRawInput; // Original, untouched input bytes. |
| 53 | // If user gave us a byte array, this is it. |
| 54 | // If user gave us a stream, it's read to a |
| 55 | // buffer here. |
| 56 | int32_t fRawLength; // Length of data in fRawInput array. |
| 57 | |
| 58 | }; |
| 59 | |
| 60 | U_NAMESPACE_END |
| 61 | |
| 62 | #endif |
| 63 | #endif /* __INPUTEXT_H */ |
| 64 | |