| 1 | /* |
| 2 | * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions |
| 6 | * are met: |
| 7 | * |
| 8 | * - Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * |
| 11 | * - Redistributions in binary form must reproduce the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer in the |
| 13 | * documentation and/or other materials provided with the distribution. |
| 14 | * |
| 15 | * - Neither the name of Oracle nor the names of its |
| 16 | * contributors may be used to endorse or promote products derived |
| 17 | * from this software without specific prior written permission. |
| 18 | * |
| 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS |
| 20 | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| 21 | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| 23 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 26 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 27 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 28 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 29 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
| 32 | #include "jni.h" |
| 33 | #include "imageDecompressor.hpp" |
| 34 | #include "endian.hpp" |
| 35 | #ifdef WIN32 |
| 36 | #include <windows.h> |
| 37 | #else |
| 38 | #include <dlfcn.h> |
| 39 | #endif |
| 40 | |
| 41 | typedef jboolean (*ZipInflateFully_t)(void *inBuf, jlong inLen, |
| 42 | void *outBuf, jlong outLen, char **pmsg); |
| 43 | static ZipInflateFully_t ZipInflateFully = NULL; |
| 44 | |
| 45 | #ifndef WIN32 |
| 46 | #define JNI_LIB_PREFIX "lib" |
| 47 | #ifdef __APPLE__ |
| 48 | #define JNI_LIB_SUFFIX ".dylib" |
| 49 | #else |
| 50 | #define JNI_LIB_SUFFIX ".so" |
| 51 | #endif |
| 52 | #endif |
| 53 | |
| 54 | /** |
| 55 | * Return the address of the entry point named in the zip shared library. |
| 56 | * @param name - the name of the entry point |
| 57 | * @return the address of the entry point or NULL |
| 58 | */ |
| 59 | static void* findEntry(const char* name) { |
| 60 | void *addr = NULL; |
| 61 | #ifdef WIN32 |
| 62 | HMODULE handle = GetModuleHandle("zip.dll" ); |
| 63 | if (handle == NULL) { |
| 64 | return NULL; |
| 65 | } |
| 66 | addr = (void*) GetProcAddress(handle, name); |
| 67 | return addr; |
| 68 | #else |
| 69 | addr = dlopen(JNI_LIB_PREFIX "zip" JNI_LIB_SUFFIX, RTLD_GLOBAL|RTLD_LAZY); |
| 70 | if (addr == NULL) { |
| 71 | return NULL; |
| 72 | } |
| 73 | addr = dlsym(addr, name); |
| 74 | return addr; |
| 75 | #endif |
| 76 | } |
| 77 | |
| 78 | /* |
| 79 | * Initialize the array of decompressors. |
| 80 | */ |
| 81 | int ImageDecompressor::_decompressors_num = 0; |
| 82 | ImageDecompressor** ImageDecompressor::_decompressors = NULL; |
| 83 | void ImageDecompressor::image_decompressor_init() { |
| 84 | if (_decompressors == NULL) { |
| 85 | ZipInflateFully = (ZipInflateFully_t) findEntry("ZIP_InflateFully" ); |
| 86 | assert(ZipInflateFully != NULL && "ZIP decompressor not found." ); |
| 87 | _decompressors_num = 2; |
| 88 | _decompressors = new ImageDecompressor*[_decompressors_num]; |
| 89 | _decompressors[0] = new ZipDecompressor("zip" ); |
| 90 | _decompressors[1] = new SharedStringDecompressor("compact-cp" ); |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | void ImageDecompressor::image_decompressor_close() { |
| 95 | delete[] _decompressors; |
| 96 | } |
| 97 | |
| 98 | /* |
| 99 | * Locate decompressor. |
| 100 | */ |
| 101 | ImageDecompressor* ImageDecompressor::get_decompressor(const char * decompressor_name) { |
| 102 | image_decompressor_init(); |
| 103 | for (int i = 0; i < _decompressors_num; i++) { |
| 104 | ImageDecompressor* decompressor = _decompressors[i]; |
| 105 | assert(decompressor != NULL && "Decompressors not initialized." ); |
| 106 | if (strcmp(decompressor->get_name(), decompressor_name) == 0) { |
| 107 | return decompressor; |
| 108 | } |
| 109 | } |
| 110 | assert(false && "No decompressor found." ); |
| 111 | return NULL; |
| 112 | } |
| 113 | |
| 114 | // Sparc to read unaligned content |
| 115 | // u8 l = (*(u8*) ptr); |
| 116 | // If ptr is not aligned, sparc will fail. |
| 117 | u8 ImageDecompressor::getU8(u1* ptr, Endian *endian) { |
| 118 | u8 ret; |
| 119 | if (endian->is_big_endian()) { |
| 120 | ret = (u8)ptr[0] << 56 | (u8)ptr[1] << 48 | (u8)ptr[2]<<40 | (u8)ptr[3]<<32 | |
| 121 | ptr[4]<<24 | ptr[5]<<16 | ptr[6]<<8 | ptr[7]; |
| 122 | } else { |
| 123 | ret = ptr[0] | ptr[1]<<8 | ptr[2]<<16 | ptr[3]<<24 | (u8)ptr[4]<<32 | |
| 124 | (u8)ptr[5]<<40 | (u8)ptr[6]<<48 | (u8)ptr[7]<<56; |
| 125 | } |
| 126 | return ret; |
| 127 | } |
| 128 | |
| 129 | u4 ImageDecompressor::getU4(u1* ptr, Endian *endian) { |
| 130 | u4 ret; |
| 131 | if (endian->is_big_endian()) { |
| 132 | ret = ptr[0] << 24 | ptr[1]<<16 | (ptr[2]<<8) | ptr[3]; |
| 133 | } else { |
| 134 | ret = ptr[0] | ptr[1]<<8 | (ptr[2]<<16) | ptr[3]<<24; |
| 135 | } |
| 136 | return ret; |
| 137 | } |
| 138 | |
| 139 | /* |
| 140 | * Decompression entry point. Called from ImageFileReader::get_resource. |
| 141 | */ |
| 142 | void ImageDecompressor::decompress_resource(u1* compressed, u1* uncompressed, |
| 143 | u8 uncompressed_size, const ImageStrings* strings, Endian *endian) { |
| 144 | bool = false; |
| 145 | u1* decompressed_resource = compressed; |
| 146 | u1* compressed_resource = compressed; |
| 147 | // Resource could have been transformed by a stack of decompressors. |
| 148 | // Iterate and decompress resources until there is no more header. |
| 149 | do { |
| 150 | ResourceHeader ; |
| 151 | u1* compressed_resource_base = compressed_resource; |
| 152 | _header._magic = getU4(compressed_resource, endian); |
| 153 | compressed_resource += 4; |
| 154 | _header._size = getU8(compressed_resource, endian); |
| 155 | compressed_resource += 8; |
| 156 | _header._uncompressed_size = getU8(compressed_resource, endian); |
| 157 | compressed_resource += 8; |
| 158 | _header._decompressor_name_offset = getU4(compressed_resource, endian); |
| 159 | compressed_resource += 4; |
| 160 | _header._decompressor_config_offset = getU4(compressed_resource, endian); |
| 161 | compressed_resource += 4; |
| 162 | _header._is_terminal = *compressed_resource; |
| 163 | compressed_resource += 1; |
| 164 | has_header = _header._magic == ResourceHeader::resource_header_magic; |
| 165 | if (has_header) { |
| 166 | // decompressed_resource array contains the result of decompression |
| 167 | decompressed_resource = new u1[(size_t) _header._uncompressed_size]; |
| 168 | // Retrieve the decompressor name |
| 169 | const char* decompressor_name = strings->get(_header._decompressor_name_offset); |
| 170 | assert(decompressor_name && "image decompressor not found" ); |
| 171 | // Retrieve the decompressor instance |
| 172 | ImageDecompressor* decompressor = get_decompressor(decompressor_name); |
| 173 | assert(decompressor && "image decompressor not found" ); |
| 174 | // Ask the decompressor to decompress the compressed content |
| 175 | decompressor->decompress_resource(compressed_resource, decompressed_resource, |
| 176 | &_header, strings); |
| 177 | if (compressed_resource_base != compressed) { |
| 178 | delete[] compressed_resource_base; |
| 179 | } |
| 180 | compressed_resource = decompressed_resource; |
| 181 | } |
| 182 | } while (has_header); |
| 183 | memcpy(uncompressed, decompressed_resource, (size_t) uncompressed_size); |
| 184 | delete[] decompressed_resource; |
| 185 | } |
| 186 | |
| 187 | // Zip decompressor |
| 188 | |
| 189 | void ZipDecompressor::(u1* data, u1* uncompressed, |
| 190 | ResourceHeader* , const ImageStrings* strings) { |
| 191 | char* msg = NULL; |
| 192 | jboolean res = ZipDecompressor::decompress(data, header->_size, uncompressed, |
| 193 | header->_uncompressed_size, &msg); |
| 194 | assert(res && "decompression failed" ); |
| 195 | } |
| 196 | |
| 197 | jboolean ZipDecompressor::decompress(void *in, u8 inSize, void *out, u8 outSize, char **pmsg) { |
| 198 | return (*ZipInflateFully)(in, inSize, out, outSize, pmsg); |
| 199 | } |
| 200 | |
| 201 | // END Zip Decompressor |
| 202 | |
| 203 | // Shared String decompressor |
| 204 | |
| 205 | // array index is the constant pool tag. value is size. |
| 206 | // eg: array[5] = 8; means size of long is 8 bytes. |
| 207 | const u1 SharedStringDecompressor::sizes[] = { |
| 208 | 0, 0, 0, 4, 4, 8, 8, 2, 2, 4, 4, 4, 4, 0, 0, 3, 2, 0, 4 |
| 209 | }; |
| 210 | /** |
| 211 | * Recreate the class by reconstructing the constant pool. |
| 212 | */ |
| 213 | void SharedStringDecompressor::(u1* data, |
| 214 | u1* uncompressed_resource, |
| 215 | ResourceHeader* , const ImageStrings* strings) { |
| 216 | u1* uncompressed_base = uncompressed_resource; |
| 217 | u1* data_base = data; |
| 218 | int = 8; // magic + major + minor |
| 219 | memcpy(uncompressed_resource, data, header_size + 2); //+ cp count |
| 220 | uncompressed_resource += header_size + 2; |
| 221 | data += header_size; |
| 222 | u2 cp_count = Endian::get_java(data); |
| 223 | data += 2; |
| 224 | for (int i = 1; i < cp_count; i++) { |
| 225 | u1 tag = *data; |
| 226 | data += 1; |
| 227 | switch (tag) { |
| 228 | |
| 229 | case externalized_string: |
| 230 | { // String in Strings table |
| 231 | *uncompressed_resource = 1; |
| 232 | uncompressed_resource += 1; |
| 233 | int k = decompress_int(data); |
| 234 | const char * string = strings->get(k); |
| 235 | int str_length = (int) strlen(string); |
| 236 | Endian::set_java(uncompressed_resource, str_length); |
| 237 | uncompressed_resource += 2; |
| 238 | memcpy(uncompressed_resource, string, str_length); |
| 239 | uncompressed_resource += str_length; |
| 240 | break; |
| 241 | } |
| 242 | // Descriptor String has been split and types added to Strings table |
| 243 | case externalized_string_descriptor: |
| 244 | { |
| 245 | *uncompressed_resource = 1; |
| 246 | uncompressed_resource += 1; |
| 247 | int descriptor_index = decompress_int(data); |
| 248 | int indexes_length = decompress_int(data); |
| 249 | u1* length_address = uncompressed_resource; |
| 250 | uncompressed_resource += 2; |
| 251 | int desc_length = 0; |
| 252 | const char * desc_string = strings->get(descriptor_index); |
| 253 | if (indexes_length > 0) { |
| 254 | u1* indexes_base = data; |
| 255 | data += indexes_length; |
| 256 | char c = *desc_string; |
| 257 | do { |
| 258 | *uncompressed_resource = c; |
| 259 | uncompressed_resource++; |
| 260 | desc_length += 1; |
| 261 | /* |
| 262 | * Every L character is the marker we are looking at in order |
| 263 | * to reconstruct the descriptor. Each time an L is found, then |
| 264 | * we retrieve the couple token/token at the current index and |
| 265 | * add it to the descriptor. |
| 266 | * "(L;I)V" and "java/lang","String" couple of tokens, |
| 267 | * this becomes "(Ljava/lang/String;I)V" |
| 268 | */ |
| 269 | if (c == 'L') { |
| 270 | int index = decompress_int(indexes_base); |
| 271 | const char * pkg = strings->get(index); |
| 272 | int str_length = (int) strlen(pkg); |
| 273 | // the case where we have a package. |
| 274 | // reconstruct the type full name |
| 275 | if (str_length > 0) { |
| 276 | int len = str_length + 1; |
| 277 | char* fullpkg = new char[len]; |
| 278 | char* pkg_base = fullpkg; |
| 279 | memcpy(fullpkg, pkg, str_length); |
| 280 | fullpkg += str_length; |
| 281 | *fullpkg = '/'; |
| 282 | memcpy(uncompressed_resource, pkg_base, len); |
| 283 | uncompressed_resource += len; |
| 284 | delete[] pkg_base; |
| 285 | desc_length += len; |
| 286 | } else { // Empty package |
| 287 | // Nothing to do. |
| 288 | } |
| 289 | int classIndex = decompress_int(indexes_base); |
| 290 | const char * clazz = strings->get(classIndex); |
| 291 | int clazz_length = (int) strlen(clazz); |
| 292 | memcpy(uncompressed_resource, clazz, clazz_length); |
| 293 | uncompressed_resource += clazz_length; |
| 294 | desc_length += clazz_length; |
| 295 | } |
| 296 | desc_string += 1; |
| 297 | c = *desc_string; |
| 298 | } while (c != '\0'); |
| 299 | } else { |
| 300 | desc_length = (int) strlen(desc_string); |
| 301 | memcpy(uncompressed_resource, desc_string, desc_length); |
| 302 | uncompressed_resource += desc_length; |
| 303 | } |
| 304 | Endian::set_java(length_address, desc_length); |
| 305 | break; |
| 306 | } |
| 307 | |
| 308 | case constant_utf8: |
| 309 | { // UTF-8 |
| 310 | *uncompressed_resource = tag; |
| 311 | uncompressed_resource += 1; |
| 312 | u2 str_length = Endian::get_java(data); |
| 313 | int len = str_length + 2; |
| 314 | memcpy(uncompressed_resource, data, len); |
| 315 | uncompressed_resource += len; |
| 316 | data += len; |
| 317 | break; |
| 318 | } |
| 319 | |
| 320 | case constant_long: |
| 321 | case constant_double: |
| 322 | { |
| 323 | i++; |
| 324 | } |
| 325 | /* fall through */ |
| 326 | default: |
| 327 | { |
| 328 | *uncompressed_resource = tag; |
| 329 | uncompressed_resource += 1; |
| 330 | int size = sizes[tag]; |
| 331 | memcpy(uncompressed_resource, data, size); |
| 332 | uncompressed_resource += size; |
| 333 | data += size; |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | u8 remain = header->_size - (int)(data - data_base); |
| 338 | u8 computed = (u8)(uncompressed_resource - uncompressed_base) + remain; |
| 339 | if (header->_uncompressed_size != computed) |
| 340 | printf("Failure, expecting %llu but getting %llu\n" , header->_uncompressed_size, |
| 341 | computed); |
| 342 | assert(header->_uncompressed_size == computed && |
| 343 | "Constant Pool reconstruction failed" ); |
| 344 | memcpy(uncompressed_resource, data, (size_t) remain); |
| 345 | } |
| 346 | |
| 347 | /* |
| 348 | * Decompress integers. Compressed integers are negative. |
| 349 | * If positive, the integer is not decompressed. |
| 350 | * If negative, length extracted from the first byte, then reconstruct the integer |
| 351 | * from the following bytes. |
| 352 | * Example of compression: 1 is compressed on 1 byte: 10100001 |
| 353 | */ |
| 354 | int SharedStringDecompressor::decompress_int(unsigned char*& value) { |
| 355 | int len = 4; |
| 356 | int res = 0; |
| 357 | char b1 = *value; |
| 358 | if (is_compressed((signed char)b1)) { // compressed |
| 359 | len = get_compressed_length(b1); |
| 360 | char clearedValue = b1 &= 0x1F; |
| 361 | if (len == 1) { |
| 362 | res = clearedValue; |
| 363 | } else { |
| 364 | res = (clearedValue & 0xFF) << 8 * (len - 1); |
| 365 | for (int i = 1; i < len; i++) { |
| 366 | res |= (value[i]&0xFF) << 8 * (len - i - 1); |
| 367 | } |
| 368 | } |
| 369 | } else { |
| 370 | res = (value[0] & 0xFF) << 24 | (value[1]&0xFF) << 16 | |
| 371 | (value[2]&0xFF) << 8 | (value[3]&0xFF); |
| 372 | } |
| 373 | value += len; |
| 374 | return res; |
| 375 | } |
| 376 | // END Shared String decompressor |
| 377 | |