| 1 | // basisu_enc.cpp | 
|---|
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. | 
|---|
| 3 | // | 
|---|
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 5 | // you may not use this file except in compliance with the License. | 
|---|
| 6 | // You may obtain a copy of the License at | 
|---|
| 7 | // | 
|---|
| 8 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 9 | // | 
|---|
| 10 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 13 | // See the License for the specific language governing permissions and | 
|---|
| 14 | // limitations under the License. | 
|---|
| 15 | #include "basisu_enc.h" | 
|---|
| 16 | #include "basisu_resampler.h" | 
|---|
| 17 | #include "basisu_resampler_filters.h" | 
|---|
| 18 | #include "basisu_etc.h" | 
|---|
| 19 | #include "../transcoder/basisu_transcoder.h" | 
|---|
| 20 | #include "basisu_bc7enc.h" | 
|---|
| 21 | #include "jpgd.h" | 
|---|
| 22 | #include "pvpngreader.h" | 
|---|
| 23 | #include "basisu_opencl.h" | 
|---|
| 24 | #include <vector> | 
|---|
| 25 |  | 
|---|
| 26 | #define | 
|---|
| 27 | #define MINIZ_NO_ZLIB_COMPATIBLE_NAMES | 
|---|
| 28 | #include "basisu_miniz.h" | 
|---|
| 29 |  | 
|---|
| 30 | #if defined(_WIN32) | 
|---|
| 31 | // For QueryPerformanceCounter/QueryPerformanceFrequency | 
|---|
| 32 | #define WIN32_LEAN_AND_MEAN | 
|---|
| 33 | #include <windows.h> | 
|---|
| 34 | #endif | 
|---|
| 35 |  | 
|---|
| 36 | namespace basisu | 
|---|
| 37 | { | 
|---|
| 38 | uint64_t interval_timer::g_init_ticks, interval_timer::g_freq; | 
|---|
| 39 | double interval_timer::g_timer_freq; | 
|---|
| 40 | #if BASISU_SUPPORT_SSE | 
|---|
| 41 | bool g_cpu_supports_sse41; | 
|---|
| 42 | #endif | 
|---|
| 43 |  | 
|---|
| 44 | uint8_t g_hamming_dist[256] = | 
|---|
| 45 | { | 
|---|
| 46 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, | 
|---|
| 47 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, | 
|---|
| 48 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, | 
|---|
| 49 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | 
|---|
| 50 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, | 
|---|
| 51 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | 
|---|
| 52 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | 
|---|
| 53 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, | 
|---|
| 54 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, | 
|---|
| 55 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | 
|---|
| 56 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | 
|---|
| 57 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, | 
|---|
| 58 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, | 
|---|
| 59 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, | 
|---|
| 60 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, | 
|---|
| 61 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 | 
|---|
| 62 | }; | 
|---|
| 63 |  | 
|---|
| 64 | // This is a Public Domain 8x8 font from here: | 
|---|
| 65 | // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h | 
|---|
| 66 | const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = | 
|---|
| 67 | { | 
|---|
| 68 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},	// U+0020 ( ) | 
|---|
| 69 | { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00},   // U+0021 (!) | 
|---|
| 70 | { 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},   // U+0022 (") | 
|---|
| 71 | { 0x36, 0x36, 0x7F, 0x36, 0x7F, 0x36, 0x36, 0x00},   // U+0023 (#) | 
|---|
| 72 | { 0x0C, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x0C, 0x00},   // U+0024 ($) | 
|---|
| 73 | { 0x00, 0x63, 0x33, 0x18, 0x0C, 0x66, 0x63, 0x00},   // U+0025 (%) | 
|---|
| 74 | { 0x1C, 0x36, 0x1C, 0x6E, 0x3B, 0x33, 0x6E, 0x00},   // U+0026 (&) | 
|---|
| 75 | { 0x06, 0x06, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00},   // U+0027 (') | 
|---|
| 76 | { 0x18, 0x0C, 0x06, 0x06, 0x06, 0x0C, 0x18, 0x00},   // U+0028 (() | 
|---|
| 77 | { 0x06, 0x0C, 0x18, 0x18, 0x18, 0x0C, 0x06, 0x00},   // U+0029 ()) | 
|---|
| 78 | { 0x00, 0x66, 0x3C, 0xFF, 0x3C, 0x66, 0x00, 0x00},   // U+002A (*) | 
|---|
| 79 | { 0x00, 0x0C, 0x0C, 0x3F, 0x0C, 0x0C, 0x00, 0x00},   // U+002B (+) | 
|---|
| 80 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x06},   // U+002C (,) | 
|---|
| 81 | { 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x00},   // U+002D (-) | 
|---|
| 82 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x00},   // U+002E (.) | 
|---|
| 83 | { 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01, 0x00},   // U+002F (/) | 
|---|
| 84 | { 0x3E, 0x63, 0x73, 0x7B, 0x6F, 0x67, 0x3E, 0x00},   // U+0030 (0) | 
|---|
| 85 | { 0x0C, 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x3F, 0x00},   // U+0031 (1) | 
|---|
| 86 | { 0x1E, 0x33, 0x30, 0x1C, 0x06, 0x33, 0x3F, 0x00},   // U+0032 (2) | 
|---|
| 87 | { 0x1E, 0x33, 0x30, 0x1C, 0x30, 0x33, 0x1E, 0x00},   // U+0033 (3) | 
|---|
| 88 | { 0x38, 0x3C, 0x36, 0x33, 0x7F, 0x30, 0x78, 0x00},   // U+0034 (4) | 
|---|
| 89 | { 0x3F, 0x03, 0x1F, 0x30, 0x30, 0x33, 0x1E, 0x00},   // U+0035 (5) | 
|---|
| 90 | { 0x1C, 0x06, 0x03, 0x1F, 0x33, 0x33, 0x1E, 0x00},   // U+0036 (6) | 
|---|
| 91 | { 0x3F, 0x33, 0x30, 0x18, 0x0C, 0x0C, 0x0C, 0x00},   // U+0037 (7) | 
|---|
| 92 | { 0x1E, 0x33, 0x33, 0x1E, 0x33, 0x33, 0x1E, 0x00},   // U+0038 (8) | 
|---|
| 93 | { 0x1E, 0x33, 0x33, 0x3E, 0x30, 0x18, 0x0E, 0x00},   // U+0039 (9) | 
|---|
| 94 | { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x00},   // U+003A (:) | 
|---|
| 95 | { 0x00, 0x0C, 0x0C, 0x00, 0x00, 0x0C, 0x0C, 0x06},   // U+003B (;) | 
|---|
| 96 | { 0x18, 0x0C, 0x06, 0x03, 0x06, 0x0C, 0x18, 0x00},   // U+003C (<) | 
|---|
| 97 | { 0x00, 0x00, 0x3F, 0x00, 0x00, 0x3F, 0x00, 0x00},   // U+003D (=) | 
|---|
| 98 | { 0x06, 0x0C, 0x18, 0x30, 0x18, 0x0C, 0x06, 0x00},   // U+003E (>) | 
|---|
| 99 | { 0x1E, 0x33, 0x30, 0x18, 0x0C, 0x00, 0x0C, 0x00},   // U+003F (?) | 
|---|
| 100 | { 0x3E, 0x63, 0x7B, 0x7B, 0x7B, 0x03, 0x1E, 0x00},   // U+0040 (@) | 
|---|
| 101 | { 0x0C, 0x1E, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x00},   // U+0041 (A) | 
|---|
| 102 | { 0x3F, 0x66, 0x66, 0x3E, 0x66, 0x66, 0x3F, 0x00},   // U+0042 (B) | 
|---|
| 103 | { 0x3C, 0x66, 0x03, 0x03, 0x03, 0x66, 0x3C, 0x00},   // U+0043 (C) | 
|---|
| 104 | { 0x1F, 0x36, 0x66, 0x66, 0x66, 0x36, 0x1F, 0x00},   // U+0044 (D) | 
|---|
| 105 | { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x46, 0x7F, 0x00},   // U+0045 (E) | 
|---|
| 106 | { 0x7F, 0x46, 0x16, 0x1E, 0x16, 0x06, 0x0F, 0x00},   // U+0046 (F) | 
|---|
| 107 | { 0x3C, 0x66, 0x03, 0x03, 0x73, 0x66, 0x7C, 0x00},   // U+0047 (G) | 
|---|
| 108 | { 0x33, 0x33, 0x33, 0x3F, 0x33, 0x33, 0x33, 0x00},   // U+0048 (H) | 
|---|
| 109 | { 0x1E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00},   // U+0049 (I) | 
|---|
| 110 | { 0x78, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E, 0x00},   // U+004A (J) | 
|---|
| 111 | { 0x67, 0x66, 0x36, 0x1E, 0x36, 0x66, 0x67, 0x00},   // U+004B (K) | 
|---|
| 112 | { 0x0F, 0x06, 0x06, 0x06, 0x46, 0x66, 0x7F, 0x00},   // U+004C (L) | 
|---|
| 113 | { 0x63, 0x77, 0x7F, 0x7F, 0x6B, 0x63, 0x63, 0x00},   // U+004D (M) | 
|---|
| 114 | { 0x63, 0x67, 0x6F, 0x7B, 0x73, 0x63, 0x63, 0x00},   // U+004E (N) | 
|---|
| 115 | { 0x1C, 0x36, 0x63, 0x63, 0x63, 0x36, 0x1C, 0x00},   // U+004F (O) | 
|---|
| 116 | { 0x3F, 0x66, 0x66, 0x3E, 0x06, 0x06, 0x0F, 0x00},   // U+0050 (P) | 
|---|
| 117 | { 0x1E, 0x33, 0x33, 0x33, 0x3B, 0x1E, 0x38, 0x00},   // U+0051 (Q) | 
|---|
| 118 | { 0x3F, 0x66, 0x66, 0x3E, 0x36, 0x66, 0x67, 0x00},   // U+0052 (R) | 
|---|
| 119 | { 0x1E, 0x33, 0x07, 0x0E, 0x38, 0x33, 0x1E, 0x00},   // U+0053 (S) | 
|---|
| 120 | { 0x3F, 0x2D, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00},   // U+0054 (T) | 
|---|
| 121 | { 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x3F, 0x00},   // U+0055 (U) | 
|---|
| 122 | { 0x33, 0x33, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00},   // U+0056 (V) | 
|---|
| 123 | { 0x63, 0x63, 0x63, 0x6B, 0x7F, 0x77, 0x63, 0x00},   // U+0057 (W) | 
|---|
| 124 | { 0x63, 0x63, 0x36, 0x1C, 0x1C, 0x36, 0x63, 0x00},   // U+0058 (X) | 
|---|
| 125 | { 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x0C, 0x1E, 0x00},   // U+0059 (Y) | 
|---|
| 126 | { 0x7F, 0x63, 0x31, 0x18, 0x4C, 0x66, 0x7F, 0x00},   // U+005A (Z) | 
|---|
| 127 | { 0x1E, 0x06, 0x06, 0x06, 0x06, 0x06, 0x1E, 0x00},   // U+005B ([) | 
|---|
| 128 | { 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0x40, 0x00},   // U+005C (\) | 
|---|
| 129 | { 0x1E, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1E, 0x00},   // U+005D (]) | 
|---|
| 130 | { 0x08, 0x1C, 0x36, 0x63, 0x00, 0x00, 0x00, 0x00},   // U+005E (^) | 
|---|
| 131 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF},   // U+005F (_) | 
|---|
| 132 | { 0x0C, 0x0C, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00},   // U+0060 (`) | 
|---|
| 133 | { 0x00, 0x00, 0x1E, 0x30, 0x3E, 0x33, 0x6E, 0x00},   // U+0061 (a) | 
|---|
| 134 | { 0x07, 0x06, 0x06, 0x3E, 0x66, 0x66, 0x3B, 0x00},   // U+0062 (b) | 
|---|
| 135 | { 0x00, 0x00, 0x1E, 0x33, 0x03, 0x33, 0x1E, 0x00},   // U+0063 (c) | 
|---|
| 136 | { 0x38, 0x30, 0x30, 0x3e, 0x33, 0x33, 0x6E, 0x00},   // U+0064 (d) | 
|---|
| 137 | { 0x00, 0x00, 0x1E, 0x33, 0x3f, 0x03, 0x1E, 0x00},   // U+0065 (e) | 
|---|
| 138 | { 0x1C, 0x36, 0x06, 0x0f, 0x06, 0x06, 0x0F, 0x00},   // U+0066 (f) | 
|---|
| 139 | { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x1F},   // U+0067 (g) | 
|---|
| 140 | { 0x07, 0x06, 0x36, 0x6E, 0x66, 0x66, 0x67, 0x00},   // U+0068 (h) | 
|---|
| 141 | { 0x0C, 0x00, 0x0E, 0x0C, 0x0C, 0x0C, 0x1E, 0x00},   // U+0069 (i) | 
|---|
| 142 | { 0x30, 0x00, 0x30, 0x30, 0x30, 0x33, 0x33, 0x1E},   // U+006A (j) | 
|---|
| 143 | { 0x07, 0x06, 0x66, 0x36, 0x1E, 0x36, 0x67, 0x00},   // U+006B (k) | 
|---|
| 144 | { 0x0E, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x1E, 0x00},   // U+006C (l) | 
|---|
| 145 | { 0x00, 0x00, 0x33, 0x7F, 0x7F, 0x6B, 0x63, 0x00},   // U+006D (m) | 
|---|
| 146 | { 0x00, 0x00, 0x1F, 0x33, 0x33, 0x33, 0x33, 0x00},   // U+006E (n) | 
|---|
| 147 | { 0x00, 0x00, 0x1E, 0x33, 0x33, 0x33, 0x1E, 0x00},   // U+006F (o) | 
|---|
| 148 | { 0x00, 0x00, 0x3B, 0x66, 0x66, 0x3E, 0x06, 0x0F},   // U+0070 (p) | 
|---|
| 149 | { 0x00, 0x00, 0x6E, 0x33, 0x33, 0x3E, 0x30, 0x78},   // U+0071 (q) | 
|---|
| 150 | { 0x00, 0x00, 0x3B, 0x6E, 0x66, 0x06, 0x0F, 0x00},   // U+0072 (r) | 
|---|
| 151 | { 0x00, 0x00, 0x3E, 0x03, 0x1E, 0x30, 0x1F, 0x00},   // U+0073 (s) | 
|---|
| 152 | { 0x08, 0x0C, 0x3E, 0x0C, 0x0C, 0x2C, 0x18, 0x00},   // U+0074 (t) | 
|---|
| 153 | { 0x00, 0x00, 0x33, 0x33, 0x33, 0x33, 0x6E, 0x00},   // U+0075 (u) | 
|---|
| 154 | { 0x00, 0x00, 0x33, 0x33, 0x33, 0x1E, 0x0C, 0x00},   // U+0076 (v) | 
|---|
| 155 | { 0x00, 0x00, 0x63, 0x6B, 0x7F, 0x7F, 0x36, 0x00},   // U+0077 (w) | 
|---|
| 156 | { 0x00, 0x00, 0x63, 0x36, 0x1C, 0x36, 0x63, 0x00},   // U+0078 (x) | 
|---|
| 157 | { 0x00, 0x00, 0x33, 0x33, 0x33, 0x3E, 0x30, 0x1F},   // U+0079 (y) | 
|---|
| 158 | { 0x00, 0x00, 0x3F, 0x19, 0x0C, 0x26, 0x3F, 0x00},   // U+007A (z) | 
|---|
| 159 | { 0x38, 0x0C, 0x0C, 0x07, 0x0C, 0x0C, 0x38, 0x00},   // U+007B ({) | 
|---|
| 160 | { 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00},   // U+007C (|) | 
|---|
| 161 | { 0x07, 0x0C, 0x0C, 0x38, 0x0C, 0x0C, 0x07, 0x00},   // U+007D (}) | 
|---|
| 162 | { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},   // U+007E (~) | 
|---|
| 163 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}    // U+007F | 
|---|
| 164 | }; | 
|---|
| 165 |  | 
|---|
| 166 | bool g_library_initialized; | 
|---|
| 167 | std::mutex g_encoder_init_mutex; | 
|---|
| 168 |  | 
|---|
| 169 | // Encoder library initialization (just call once at startup) | 
|---|
| 170 | void basisu_encoder_init(bool use_opencl, bool opencl_force_serialization) | 
|---|
| 171 | { | 
|---|
| 172 | std::lock_guard<std::mutex> lock(g_encoder_init_mutex); | 
|---|
| 173 |  | 
|---|
| 174 | if (g_library_initialized) | 
|---|
| 175 | return; | 
|---|
| 176 |  | 
|---|
| 177 | detect_sse41(); | 
|---|
| 178 |  | 
|---|
| 179 | basist::basisu_transcoder_init(); | 
|---|
| 180 | pack_etc1_solid_color_init(); | 
|---|
| 181 | //uastc_init(); | 
|---|
| 182 | bc7enc_compress_block_init(); // must be after uastc_init() | 
|---|
| 183 |  | 
|---|
| 184 | // Don't bother initializing the OpenCL module at all if it's been completely disabled. | 
|---|
| 185 | if (use_opencl) | 
|---|
| 186 | { | 
|---|
| 187 | opencl_init(opencl_force_serialization); | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | interval_timer::init(); // make sure interval_timer globals are initialized from main thread to avoid TSAN reports | 
|---|
| 191 |  | 
|---|
| 192 | g_library_initialized = true; | 
|---|
| 193 | } | 
|---|
| 194 |  | 
|---|
| 195 | void basisu_encoder_deinit() | 
|---|
| 196 | { | 
|---|
| 197 | opencl_deinit(); | 
|---|
| 198 |  | 
|---|
| 199 | g_library_initialized = false; | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | void error_vprintf(const char* pFmt, va_list args) | 
|---|
| 203 | { | 
|---|
| 204 | char buf[8192]; | 
|---|
| 205 |  | 
|---|
| 206 | #ifdef _WIN32 | 
|---|
| 207 | vsprintf_s(buf, sizeof(buf), pFmt, args); | 
|---|
| 208 | #else | 
|---|
| 209 | vsnprintf(buf, sizeof(buf), pFmt, args); | 
|---|
| 210 | #endif | 
|---|
| 211 |  | 
|---|
| 212 | fprintf(stderr, "ERROR: %s", buf); | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | void error_printf(const char *pFmt, ...) | 
|---|
| 216 | { | 
|---|
| 217 | va_list args; | 
|---|
| 218 | va_start(args, pFmt); | 
|---|
| 219 | error_vprintf(pFmt, args); | 
|---|
| 220 | va_end(args); | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | #if defined(_WIN32) | 
|---|
| 224 | inline void query_counter(timer_ticks* pTicks) | 
|---|
| 225 | { | 
|---|
| 226 | QueryPerformanceCounter(reinterpret_cast<LARGE_INTEGER*>(pTicks)); | 
|---|
| 227 | } | 
|---|
| 228 | inline void query_counter_frequency(timer_ticks* pTicks) | 
|---|
| 229 | { | 
|---|
| 230 | QueryPerformanceFrequency(reinterpret_cast<LARGE_INTEGER*>(pTicks)); | 
|---|
| 231 | } | 
|---|
| 232 | #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__EMSCRIPTEN__) | 
|---|
| 233 | #include <sys/time.h> | 
|---|
| 234 | inline void query_counter(timer_ticks* pTicks) | 
|---|
| 235 | { | 
|---|
| 236 | struct timeval cur_time; | 
|---|
| 237 | gettimeofday(&cur_time, NULL); | 
|---|
| 238 | *pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec); | 
|---|
| 239 | } | 
|---|
| 240 | inline void query_counter_frequency(timer_ticks* pTicks) | 
|---|
| 241 | { | 
|---|
| 242 | *pTicks = 1000000; | 
|---|
| 243 | } | 
|---|
| 244 | #elif defined(__GNUC__) | 
|---|
| 245 | #include <sys/timex.h> | 
|---|
| 246 | inline void query_counter(timer_ticks* pTicks) | 
|---|
| 247 | { | 
|---|
| 248 | struct timeval cur_time; | 
|---|
| 249 | gettimeofday(&cur_time, NULL); | 
|---|
| 250 | *pTicks = static_cast<unsigned long long>(cur_time.tv_sec) * 1000000ULL + static_cast<unsigned long long>(cur_time.tv_usec); | 
|---|
| 251 | } | 
|---|
| 252 | inline void query_counter_frequency(timer_ticks* pTicks) | 
|---|
| 253 | { | 
|---|
| 254 | *pTicks = 1000000; | 
|---|
| 255 | } | 
|---|
| 256 | #else | 
|---|
| 257 | #error TODO | 
|---|
| 258 | #endif | 
|---|
| 259 |  | 
|---|
| 260 | interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) | 
|---|
| 261 | { | 
|---|
| 262 | if (!g_timer_freq) | 
|---|
| 263 | init(); | 
|---|
| 264 | } | 
|---|
| 265 |  | 
|---|
| 266 | void interval_timer::start() | 
|---|
| 267 | { | 
|---|
| 268 | query_counter(&m_start_time); | 
|---|
| 269 | m_started = true; | 
|---|
| 270 | m_stopped = false; | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | void interval_timer::stop() | 
|---|
| 274 | { | 
|---|
| 275 | assert(m_started); | 
|---|
| 276 | query_counter(&m_stop_time); | 
|---|
| 277 | m_stopped = true; | 
|---|
| 278 | } | 
|---|
| 279 |  | 
|---|
| 280 | double interval_timer::get_elapsed_secs() const | 
|---|
| 281 | { | 
|---|
| 282 | assert(m_started); | 
|---|
| 283 | if (!m_started) | 
|---|
| 284 | return 0; | 
|---|
| 285 |  | 
|---|
| 286 | timer_ticks stop_time = m_stop_time; | 
|---|
| 287 | if (!m_stopped) | 
|---|
| 288 | query_counter(&stop_time); | 
|---|
| 289 |  | 
|---|
| 290 | timer_ticks delta = stop_time - m_start_time; | 
|---|
| 291 | return delta * g_timer_freq; | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | void interval_timer::init() | 
|---|
| 295 | { | 
|---|
| 296 | if (!g_timer_freq) | 
|---|
| 297 | { | 
|---|
| 298 | query_counter_frequency(&g_freq); | 
|---|
| 299 | g_timer_freq = 1.0f / g_freq; | 
|---|
| 300 | query_counter(&g_init_ticks); | 
|---|
| 301 | } | 
|---|
| 302 | } | 
|---|
| 303 |  | 
|---|
| 304 | timer_ticks interval_timer::get_ticks() | 
|---|
| 305 | { | 
|---|
| 306 | if (!g_timer_freq) | 
|---|
| 307 | init(); | 
|---|
| 308 | timer_ticks ticks; | 
|---|
| 309 | query_counter(&ticks); | 
|---|
| 310 | return ticks - g_init_ticks; | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | double interval_timer::ticks_to_secs(timer_ticks ticks) | 
|---|
| 314 | { | 
|---|
| 315 | if (!g_timer_freq) | 
|---|
| 316 | init(); | 
|---|
| 317 | return ticks * g_timer_freq; | 
|---|
| 318 | } | 
|---|
| 319 |  | 
|---|
| 320 | const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; | 
|---|
| 321 |  | 
|---|
| 322 | bool load_tga(const char* pFilename, image& img) | 
|---|
| 323 | { | 
|---|
| 324 | int w = 0, h = 0, n_chans = 0; | 
|---|
| 325 | uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); | 
|---|
| 326 |  | 
|---|
| 327 | if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) | 
|---|
| 328 | { | 
|---|
| 329 | error_printf( "Failed loading .TGA image \"%s\"!\n", pFilename); | 
|---|
| 330 |  | 
|---|
| 331 | if (pImage_data) | 
|---|
| 332 | free(pImage_data); | 
|---|
| 333 |  | 
|---|
| 334 | return false; | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | if (sizeof(void *) == sizeof(uint32_t)) | 
|---|
| 338 | { | 
|---|
| 339 | if ((w * h * n_chans) > MAX_32BIT_ALLOC_SIZE) | 
|---|
| 340 | { | 
|---|
| 341 | error_printf( "Image \"%s\" is too large (%ux%u) to process in a 32-bit build!\n", pFilename, w, h); | 
|---|
| 342 |  | 
|---|
| 343 | if (pImage_data) | 
|---|
| 344 | free(pImage_data); | 
|---|
| 345 |  | 
|---|
| 346 | return false; | 
|---|
| 347 | } | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | img.resize(w, h); | 
|---|
| 351 |  | 
|---|
| 352 | const uint8_t *pSrc = pImage_data; | 
|---|
| 353 | for (int y = 0; y < h; y++) | 
|---|
| 354 | { | 
|---|
| 355 | color_rgba *pDst = &img(0, y); | 
|---|
| 356 |  | 
|---|
| 357 | for (int x = 0; x < w; x++) | 
|---|
| 358 | { | 
|---|
| 359 | pDst->r = pSrc[0]; | 
|---|
| 360 | pDst->g = pSrc[1]; | 
|---|
| 361 | pDst->b = pSrc[2]; | 
|---|
| 362 | pDst->a = (n_chans == 3) ? 255 : pSrc[3]; | 
|---|
| 363 |  | 
|---|
| 364 | pSrc += n_chans; | 
|---|
| 365 | ++pDst; | 
|---|
| 366 | } | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | free(pImage_data); | 
|---|
| 370 |  | 
|---|
| 371 | return true; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename) | 
|---|
| 375 | { | 
|---|
| 376 | interval_timer tm; | 
|---|
| 377 | tm.start(); | 
|---|
| 378 |  | 
|---|
| 379 | if (!buf_size) | 
|---|
| 380 | return false; | 
|---|
| 381 |  | 
|---|
| 382 | uint32_t width = 0, height = 0, num_chans = 0; | 
|---|
| 383 | void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans); | 
|---|
| 384 | if (!pBuf) | 
|---|
| 385 | { | 
|---|
| 386 | error_printf( "pv_png::load_png failed while loading image \"%s\"\n", pFilename); | 
|---|
| 387 | return false; | 
|---|
| 388 | } | 
|---|
| 389 |  | 
|---|
| 390 | img.grant_ownership(reinterpret_cast<color_rgba*>(pImage), width, height); | 
|---|
| 391 |  | 
|---|
| 392 | //debug_printf("Total load_png() time: %3.3f secs\n", tm.get_elapsed_secs()); | 
|---|
| 393 |  | 
|---|
| 394 | return true; | 
|---|
| 395 | } | 
|---|
| 396 |  | 
|---|
| 397 | bool load_png(const char* pFilename, image& img) | 
|---|
| 398 | { | 
|---|
| 399 | uint8_vec buffer; | 
|---|
| 400 | if (!read_file_to_vec(pFilename, buffer)) | 
|---|
| 401 | { | 
|---|
| 402 | error_printf( "load_png: Failed reading file \"%s\"!\n", pFilename); | 
|---|
| 403 | return false; | 
|---|
| 404 | } | 
|---|
| 405 |  | 
|---|
| 406 | return load_png(buffer.data(), buffer.size(), img, pFilename); | 
|---|
| 407 | } | 
|---|
| 408 |  | 
|---|
| 409 | bool load_jpg(const char *pFilename, image& img) | 
|---|
| 410 | { | 
|---|
| 411 | int width = 0, height = 0, actual_comps = 0; | 
|---|
| 412 | uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); | 
|---|
| 413 | if (!pImage_data) | 
|---|
| 414 | return false; | 
|---|
| 415 |  | 
|---|
| 416 | img.init(pImage_data, width, height, 4); | 
|---|
| 417 |  | 
|---|
| 418 | free(pImage_data); | 
|---|
| 419 |  | 
|---|
| 420 | return true; | 
|---|
| 421 | } | 
|---|
| 422 |  | 
|---|
| 423 | bool load_image(const char* pFilename, image& img) | 
|---|
| 424 | { | 
|---|
| 425 | std::string ext(string_get_extension(std::string(pFilename))); | 
|---|
| 426 |  | 
|---|
| 427 | if (ext.length() == 0) | 
|---|
| 428 | return false; | 
|---|
| 429 |  | 
|---|
| 430 | const char *pExt = ext.c_str(); | 
|---|
| 431 |  | 
|---|
| 432 | if (strcasecmp(pExt, "png") == 0) | 
|---|
| 433 | return load_png(pFilename, img); | 
|---|
| 434 | if (strcasecmp(pExt, "tga") == 0) | 
|---|
| 435 | return load_tga(pFilename, img); | 
|---|
| 436 | if ( (strcasecmp(pExt, "jpg") == 0) || (strcasecmp(pExt, "jfif") == 0) || (strcasecmp(pExt, "jpeg") == 0) ) | 
|---|
| 437 | return load_jpg(pFilename, img); | 
|---|
| 438 |  | 
|---|
| 439 | return false; | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) | 
|---|
| 443 | { | 
|---|
| 444 | if (!img.get_total_pixels()) | 
|---|
| 445 | return false; | 
|---|
| 446 |  | 
|---|
| 447 | void* pPNG_data = nullptr; | 
|---|
| 448 | size_t PNG_data_size = 0; | 
|---|
| 449 |  | 
|---|
| 450 | if (image_save_flags & cImageSaveGrayscale) | 
|---|
| 451 | { | 
|---|
| 452 | uint8_vec g_pixels(img.get_total_pixels()); | 
|---|
| 453 | uint8_t* pDst = &g_pixels[0]; | 
|---|
| 454 |  | 
|---|
| 455 | for (uint32_t y = 0; y < img.get_height(); y++) | 
|---|
| 456 | for (uint32_t x = 0; x < img.get_width(); x++) | 
|---|
| 457 | *pDst++ = img(x, y)[grayscale_comp]; | 
|---|
| 458 |  | 
|---|
| 459 | pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(g_pixels.data(), img.get_width(), img.get_height(), 1, &PNG_data_size, 1, false); | 
|---|
| 460 | } | 
|---|
| 461 | else | 
|---|
| 462 | { | 
|---|
| 463 | bool has_alpha = false; | 
|---|
| 464 |  | 
|---|
| 465 | if ((image_save_flags & cImageSaveIgnoreAlpha) == 0) | 
|---|
| 466 | has_alpha = img.has_alpha(); | 
|---|
| 467 |  | 
|---|
| 468 | if (!has_alpha) | 
|---|
| 469 | { | 
|---|
| 470 | uint8_vec rgb_pixels(img.get_total_pixels() * 3); | 
|---|
| 471 | uint8_t* pDst = &rgb_pixels[0]; | 
|---|
| 472 |  | 
|---|
| 473 | for (uint32_t y = 0; y < img.get_height(); y++) | 
|---|
| 474 | { | 
|---|
| 475 | const color_rgba* pSrc = &img(0, y); | 
|---|
| 476 | for (uint32_t x = 0; x < img.get_width(); x++) | 
|---|
| 477 | { | 
|---|
| 478 | pDst[0] = pSrc->r; | 
|---|
| 479 | pDst[1] = pSrc->g; | 
|---|
| 480 | pDst[2] = pSrc->b; | 
|---|
| 481 |  | 
|---|
| 482 | pSrc++; | 
|---|
| 483 | pDst += 3; | 
|---|
| 484 | } | 
|---|
| 485 | } | 
|---|
| 486 |  | 
|---|
| 487 | pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(rgb_pixels.data(), img.get_width(), img.get_height(), 3, &PNG_data_size, 1, false); | 
|---|
| 488 | } | 
|---|
| 489 | else | 
|---|
| 490 | { | 
|---|
| 491 | pPNG_data = buminiz::tdefl_write_image_to_png_file_in_memory_ex(img.get_ptr(), img.get_width(), img.get_height(), 4, &PNG_data_size, 1, false); | 
|---|
| 492 | } | 
|---|
| 493 | } | 
|---|
| 494 |  | 
|---|
| 495 | if (!pPNG_data) | 
|---|
| 496 | return false; | 
|---|
| 497 |  | 
|---|
| 498 | bool status = write_data_to_file(pFilename, pPNG_data, PNG_data_size); | 
|---|
| 499 | if (!status) | 
|---|
| 500 | { | 
|---|
| 501 | error_printf( "save_png: Failed writing to filename \"%s\"!\n", pFilename); | 
|---|
| 502 | } | 
|---|
| 503 |  | 
|---|
| 504 | free(pPNG_data); | 
|---|
| 505 |  | 
|---|
| 506 | return status; | 
|---|
| 507 | } | 
|---|
| 508 |  | 
|---|
| 509 | bool read_file_to_vec(const char* pFilename, uint8_vec& data) | 
|---|
| 510 | { | 
|---|
| 511 | FILE* pFile = nullptr; | 
|---|
| 512 | #ifdef _WIN32 | 
|---|
| 513 | fopen_s(&pFile, pFilename, "rb"); | 
|---|
| 514 | #else | 
|---|
| 515 | pFile = fopen(pFilename, "rb"); | 
|---|
| 516 | #endif | 
|---|
| 517 | if (!pFile) | 
|---|
| 518 | return false; | 
|---|
| 519 |  | 
|---|
| 520 | fseek(pFile, 0, SEEK_END); | 
|---|
| 521 | #ifdef _WIN32 | 
|---|
| 522 | int64_t filesize = _ftelli64(pFile); | 
|---|
| 523 | #else | 
|---|
| 524 | int64_t filesize = ftello(pFile); | 
|---|
| 525 | #endif | 
|---|
| 526 | if (filesize < 0) | 
|---|
| 527 | { | 
|---|
| 528 | fclose(pFile); | 
|---|
| 529 | return false; | 
|---|
| 530 | } | 
|---|
| 531 | fseek(pFile, 0, SEEK_SET); | 
|---|
| 532 |  | 
|---|
| 533 | if (sizeof(size_t) == sizeof(uint32_t)) | 
|---|
| 534 | { | 
|---|
| 535 | if (filesize > 0x70000000) | 
|---|
| 536 | { | 
|---|
| 537 | // File might be too big to load safely in one alloc | 
|---|
| 538 | fclose(pFile); | 
|---|
| 539 | return false; | 
|---|
| 540 | } | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | if (!data.try_resize((size_t)filesize)) | 
|---|
| 544 | { | 
|---|
| 545 | fclose(pFile); | 
|---|
| 546 | return false; | 
|---|
| 547 | } | 
|---|
| 548 |  | 
|---|
| 549 | if (filesize) | 
|---|
| 550 | { | 
|---|
| 551 | if (fread(&data[0], 1, (size_t)filesize, pFile) != (size_t)filesize) | 
|---|
| 552 | { | 
|---|
| 553 | fclose(pFile); | 
|---|
| 554 | return false; | 
|---|
| 555 | } | 
|---|
| 556 | } | 
|---|
| 557 |  | 
|---|
| 558 | fclose(pFile); | 
|---|
| 559 | return true; | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | bool write_data_to_file(const char* pFilename, const void* pData, size_t len) | 
|---|
| 563 | { | 
|---|
| 564 | FILE* pFile = nullptr; | 
|---|
| 565 | #ifdef _WIN32 | 
|---|
| 566 | fopen_s(&pFile, pFilename, "wb"); | 
|---|
| 567 | #else | 
|---|
| 568 | pFile = fopen(pFilename, "wb"); | 
|---|
| 569 | #endif | 
|---|
| 570 | if (!pFile) | 
|---|
| 571 | return false; | 
|---|
| 572 |  | 
|---|
| 573 | if (len) | 
|---|
| 574 | { | 
|---|
| 575 | if (fwrite(pData, 1, len, pFile) != len) | 
|---|
| 576 | { | 
|---|
| 577 | fclose(pFile); | 
|---|
| 578 | return false; | 
|---|
| 579 | } | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 | return fclose(pFile) != EOF; | 
|---|
| 583 | } | 
|---|
| 584 |  | 
|---|
| 585 | float linear_to_srgb(float l) | 
|---|
| 586 | { | 
|---|
| 587 | assert(l >= 0.0f && l <= 1.0f); | 
|---|
| 588 | if (l < .0031308f) | 
|---|
| 589 | return saturate(l * 12.92f); | 
|---|
| 590 | else | 
|---|
| 591 | return saturate(1.055f * powf(l, 1.0f/2.4f) - .055f); | 
|---|
| 592 | } | 
|---|
| 593 |  | 
|---|
| 594 | float srgb_to_linear(float s) | 
|---|
| 595 | { | 
|---|
| 596 | assert(s >= 0.0f && s <= 1.0f); | 
|---|
| 597 | if (s < .04045f) | 
|---|
| 598 | return saturate(s * (1.0f/12.92f)); | 
|---|
| 599 | else | 
|---|
| 600 | return saturate(powf((s + .055f) * (1.0f/1.055f), 2.4f)); | 
|---|
| 601 | } | 
|---|
| 602 |  | 
|---|
| 603 | bool image_resample(const image &src, image &dst, bool srgb, | 
|---|
| 604 | const char *pFilter, float filter_scale, | 
|---|
| 605 | bool wrapping, | 
|---|
| 606 | uint32_t first_comp, uint32_t num_comps) | 
|---|
| 607 | { | 
|---|
| 608 | assert((first_comp + num_comps) <= 4); | 
|---|
| 609 |  | 
|---|
| 610 | const int cMaxComps = 4; | 
|---|
| 611 |  | 
|---|
| 612 | const uint32_t src_w = src.get_width(), src_h = src.get_height(); | 
|---|
| 613 | const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); | 
|---|
| 614 |  | 
|---|
| 615 | if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) | 
|---|
| 616 | { | 
|---|
| 617 | printf( "Image is too large!\n"); | 
|---|
| 618 | return false; | 
|---|
| 619 | } | 
|---|
| 620 |  | 
|---|
| 621 | if (!src_w || !src_h || !dst_w || !dst_h) | 
|---|
| 622 | return false; | 
|---|
| 623 |  | 
|---|
| 624 | if ((num_comps < 1) || (num_comps > cMaxComps)) | 
|---|
| 625 | return false; | 
|---|
| 626 |  | 
|---|
| 627 | if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) | 
|---|
| 628 | { | 
|---|
| 629 | printf( "Image is too large!\n"); | 
|---|
| 630 | return false; | 
|---|
| 631 | } | 
|---|
| 632 |  | 
|---|
| 633 | if ((src_w == dst_w) && (src_h == dst_h)) | 
|---|
| 634 | { | 
|---|
| 635 | dst = src; | 
|---|
| 636 | return true; | 
|---|
| 637 | } | 
|---|
| 638 |  | 
|---|
| 639 | float srgb_to_linear_table[256]; | 
|---|
| 640 | if (srgb) | 
|---|
| 641 | { | 
|---|
| 642 | for (int i = 0; i < 256; ++i) | 
|---|
| 643 | srgb_to_linear_table[i] = srgb_to_linear((float)i * (1.0f/255.0f)); | 
|---|
| 644 | } | 
|---|
| 645 |  | 
|---|
| 646 | const int LINEAR_TO_SRGB_TABLE_SIZE = 8192; | 
|---|
| 647 | uint8_t linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE]; | 
|---|
| 648 |  | 
|---|
| 649 | if (srgb) | 
|---|
| 650 | { | 
|---|
| 651 | for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i) | 
|---|
| 652 | linear_to_srgb_table[i] = (uint8_t)clamp<int>((int)(255.0f * linear_to_srgb((float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1))) + .5f), 0, 255); | 
|---|
| 653 | } | 
|---|
| 654 |  | 
|---|
| 655 | std::vector<float> samples[cMaxComps]; | 
|---|
| 656 | Resampler *resamplers[cMaxComps]; | 
|---|
| 657 |  | 
|---|
| 658 | resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, | 
|---|
| 659 | wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, | 
|---|
| 660 | pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); | 
|---|
| 661 | samples[0].resize(src_w); | 
|---|
| 662 |  | 
|---|
| 663 | for (uint32_t i = 1; i < num_comps; ++i) | 
|---|
| 664 | { | 
|---|
| 665 | resamplers[i] = new Resampler(src_w, src_h, dst_w, dst_h, | 
|---|
| 666 | wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, | 
|---|
| 667 | pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), filter_scale, filter_scale, 0, 0); | 
|---|
| 668 | samples[i].resize(src_w); | 
|---|
| 669 | } | 
|---|
| 670 |  | 
|---|
| 671 | uint32_t dst_y = 0; | 
|---|
| 672 |  | 
|---|
| 673 | for (uint32_t src_y = 0; src_y < src_h; ++src_y) | 
|---|
| 674 | { | 
|---|
| 675 | const color_rgba *pSrc = &src(0, src_y); | 
|---|
| 676 |  | 
|---|
| 677 | // Put source lines into resampler(s) | 
|---|
| 678 | for (uint32_t x = 0; x < src_w; ++x) | 
|---|
| 679 | { | 
|---|
| 680 | for (uint32_t c = 0; c < num_comps; ++c) | 
|---|
| 681 | { | 
|---|
| 682 | const uint32_t comp_index = first_comp + c; | 
|---|
| 683 | const uint32_t v = (*pSrc)[comp_index]; | 
|---|
| 684 |  | 
|---|
| 685 | if (!srgb || (comp_index == 3)) | 
|---|
| 686 | samples[c][x] = v * (1.0f / 255.0f); | 
|---|
| 687 | else | 
|---|
| 688 | samples[c][x] = srgb_to_linear_table[v]; | 
|---|
| 689 | } | 
|---|
| 690 |  | 
|---|
| 691 | pSrc++; | 
|---|
| 692 | } | 
|---|
| 693 |  | 
|---|
| 694 | for (uint32_t c = 0; c < num_comps; ++c) | 
|---|
| 695 | { | 
|---|
| 696 | if (!resamplers[c]->put_line(&samples[c][0])) | 
|---|
| 697 | { | 
|---|
| 698 | for (uint32_t i = 0; i < num_comps; i++) | 
|---|
| 699 | delete resamplers[i]; | 
|---|
| 700 | return false; | 
|---|
| 701 | } | 
|---|
| 702 | } | 
|---|
| 703 |  | 
|---|
| 704 | // Now retrieve any output lines | 
|---|
| 705 | for (;;) | 
|---|
| 706 | { | 
|---|
| 707 | uint32_t c; | 
|---|
| 708 | for (c = 0; c < num_comps; ++c) | 
|---|
| 709 | { | 
|---|
| 710 | const uint32_t comp_index = first_comp + c; | 
|---|
| 711 |  | 
|---|
| 712 | const float *pOutput_samples = resamplers[c]->get_line(); | 
|---|
| 713 | if (!pOutput_samples) | 
|---|
| 714 | break; | 
|---|
| 715 |  | 
|---|
| 716 | const bool linear_flag = !srgb || (comp_index == 3); | 
|---|
| 717 |  | 
|---|
| 718 | color_rgba *pDst = &dst(0, dst_y); | 
|---|
| 719 |  | 
|---|
| 720 | for (uint32_t x = 0; x < dst_w; x++) | 
|---|
| 721 | { | 
|---|
| 722 | // TODO: Add dithering | 
|---|
| 723 | if (linear_flag) | 
|---|
| 724 | { | 
|---|
| 725 | int j = (int)(255.0f * pOutput_samples[x] + .5f); | 
|---|
| 726 | (*pDst)[comp_index] = (uint8_t)clamp<int>(j, 0, 255); | 
|---|
| 727 | } | 
|---|
| 728 | else | 
|---|
| 729 | { | 
|---|
| 730 | int j = (int)((LINEAR_TO_SRGB_TABLE_SIZE - 1) * pOutput_samples[x] + .5f); | 
|---|
| 731 | (*pDst)[comp_index] = linear_to_srgb_table[clamp<int>(j, 0, LINEAR_TO_SRGB_TABLE_SIZE - 1)]; | 
|---|
| 732 | } | 
|---|
| 733 |  | 
|---|
| 734 | pDst++; | 
|---|
| 735 | } | 
|---|
| 736 | } | 
|---|
| 737 | if (c < num_comps) | 
|---|
| 738 | break; | 
|---|
| 739 |  | 
|---|
| 740 | ++dst_y; | 
|---|
| 741 | } | 
|---|
| 742 | } | 
|---|
| 743 |  | 
|---|
| 744 | for (uint32_t i = 0; i < num_comps; ++i) | 
|---|
| 745 | delete resamplers[i]; | 
|---|
| 746 |  | 
|---|
| 747 | return true; | 
|---|
| 748 | } | 
|---|
| 749 |  | 
|---|
| 750 | void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms) | 
|---|
| 751 | { | 
|---|
| 752 | // See the paper "In-Place Calculation of Minimum Redundancy Codes" by Moffat and Katajainen | 
|---|
| 753 | if (!num_syms) | 
|---|
| 754 | return; | 
|---|
| 755 |  | 
|---|
| 756 | if (1 == num_syms) | 
|---|
| 757 | { | 
|---|
| 758 | A[0].m_key = 1; | 
|---|
| 759 | return; | 
|---|
| 760 | } | 
|---|
| 761 |  | 
|---|
| 762 | A[0].m_key += A[1].m_key; | 
|---|
| 763 |  | 
|---|
| 764 | int s = 2, r = 0, next; | 
|---|
| 765 | for (next = 1; next < (num_syms - 1); ++next) | 
|---|
| 766 | { | 
|---|
| 767 | if ((s >= num_syms) || (A[r].m_key < A[s].m_key)) | 
|---|
| 768 | { | 
|---|
| 769 | A[next].m_key = A[r].m_key; | 
|---|
| 770 | A[r].m_key = next; | 
|---|
| 771 | ++r; | 
|---|
| 772 | } | 
|---|
| 773 | else | 
|---|
| 774 | { | 
|---|
| 775 | A[next].m_key = A[s].m_key; | 
|---|
| 776 | ++s; | 
|---|
| 777 | } | 
|---|
| 778 |  | 
|---|
| 779 | if ((s >= num_syms) || ((r < next) && A[r].m_key < A[s].m_key)) | 
|---|
| 780 | { | 
|---|
| 781 | A[next].m_key = A[next].m_key + A[r].m_key; | 
|---|
| 782 | A[r].m_key = next; | 
|---|
| 783 | ++r; | 
|---|
| 784 | } | 
|---|
| 785 | else | 
|---|
| 786 | { | 
|---|
| 787 | A[next].m_key = A[next].m_key + A[s].m_key; | 
|---|
| 788 | ++s; | 
|---|
| 789 | } | 
|---|
| 790 | } | 
|---|
| 791 | A[num_syms - 2].m_key = 0; | 
|---|
| 792 |  | 
|---|
| 793 | for (next = num_syms - 3; next >= 0; --next) | 
|---|
| 794 | { | 
|---|
| 795 | A[next].m_key = 1 + A[A[next].m_key].m_key; | 
|---|
| 796 | } | 
|---|
| 797 |  | 
|---|
| 798 | int num_avail = 1, num_used = 0, depth = 0; | 
|---|
| 799 | r = num_syms - 2; | 
|---|
| 800 | next = num_syms - 1; | 
|---|
| 801 | while (num_avail > 0) | 
|---|
| 802 | { | 
|---|
| 803 | for ( ; (r >= 0) && ((int)A[r].m_key == depth); ++num_used, --r ) | 
|---|
| 804 | ; | 
|---|
| 805 |  | 
|---|
| 806 | for ( ; num_avail > num_used; --next, --num_avail) | 
|---|
| 807 | A[next].m_key = depth; | 
|---|
| 808 |  | 
|---|
| 809 | num_avail = 2 * num_used; | 
|---|
| 810 | num_used = 0; | 
|---|
| 811 | ++depth; | 
|---|
| 812 | } | 
|---|
| 813 | } | 
|---|
| 814 |  | 
|---|
| 815 | void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) | 
|---|
| 816 | { | 
|---|
| 817 | int i; | 
|---|
| 818 | uint32_t total = 0; | 
|---|
| 819 | if (code_list_len <= 1) | 
|---|
| 820 | return; | 
|---|
| 821 |  | 
|---|
| 822 | for (i = max_code_size + 1; i <= cHuffmanMaxSupportedInternalCodeSize; i++) | 
|---|
| 823 | pNum_codes[max_code_size] += pNum_codes[i]; | 
|---|
| 824 |  | 
|---|
| 825 | for (i = max_code_size; i > 0; i--) | 
|---|
| 826 | total += (((uint32_t)pNum_codes[i]) << (max_code_size - i)); | 
|---|
| 827 |  | 
|---|
| 828 | while (total != (1UL << max_code_size)) | 
|---|
| 829 | { | 
|---|
| 830 | pNum_codes[max_code_size]--; | 
|---|
| 831 | for (i = max_code_size - 1; i > 0; i--) | 
|---|
| 832 | { | 
|---|
| 833 | if (pNum_codes[i]) | 
|---|
| 834 | { | 
|---|
| 835 | pNum_codes[i]--; | 
|---|
| 836 | pNum_codes[i + 1] += 2; | 
|---|
| 837 | break; | 
|---|
| 838 | } | 
|---|
| 839 | } | 
|---|
| 840 |  | 
|---|
| 841 | total--; | 
|---|
| 842 | } | 
|---|
| 843 | } | 
|---|
| 844 |  | 
|---|
| 845 | sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1) | 
|---|
| 846 | { | 
|---|
| 847 | uint32_t total_passes = 2, pass_shift, pass, i, hist[256 * 2]; | 
|---|
| 848 | sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; | 
|---|
| 849 |  | 
|---|
| 850 | clear_obj(hist); | 
|---|
| 851 |  | 
|---|
| 852 | for (i = 0; i < num_syms; i++) | 
|---|
| 853 | { | 
|---|
| 854 | uint32_t freq = pSyms0[i].m_key; | 
|---|
| 855 |  | 
|---|
| 856 | // We scale all input frequencies to 16-bits. | 
|---|
| 857 | assert(freq <= UINT16_MAX); | 
|---|
| 858 |  | 
|---|
| 859 | hist[freq & 0xFF]++; | 
|---|
| 860 | hist[256 + ((freq >> 8) & 0xFF)]++; | 
|---|
| 861 | } | 
|---|
| 862 |  | 
|---|
| 863 | while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) | 
|---|
| 864 | total_passes--; | 
|---|
| 865 |  | 
|---|
| 866 | for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) | 
|---|
| 867 | { | 
|---|
| 868 | const uint32_t *pHist = &hist[pass << 8]; | 
|---|
| 869 | uint32_t offsets[256], cur_ofs = 0; | 
|---|
| 870 | for (i = 0; i < 256; i++) | 
|---|
| 871 | { | 
|---|
| 872 | offsets[i] = cur_ofs; | 
|---|
| 873 | cur_ofs += pHist[i]; | 
|---|
| 874 | } | 
|---|
| 875 |  | 
|---|
| 876 | for (i = 0; i < num_syms; i++) | 
|---|
| 877 | pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; | 
|---|
| 878 |  | 
|---|
| 879 | sym_freq *t = pCur_syms; | 
|---|
| 880 | pCur_syms = pNew_syms; | 
|---|
| 881 | pNew_syms = t; | 
|---|
| 882 | } | 
|---|
| 883 |  | 
|---|
| 884 | return pCur_syms; | 
|---|
| 885 | } | 
|---|
| 886 |  | 
|---|
| 887 | bool huffman_encoding_table::init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size) | 
|---|
| 888 | { | 
|---|
| 889 | if (max_code_size > cHuffmanMaxSupportedCodeSize) | 
|---|
| 890 | return false; | 
|---|
| 891 | if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) | 
|---|
| 892 | return false; | 
|---|
| 893 |  | 
|---|
| 894 | uint32_t total_used_syms = 0; | 
|---|
| 895 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 896 | if (pFreq[i]) | 
|---|
| 897 | total_used_syms++; | 
|---|
| 898 |  | 
|---|
| 899 | if (!total_used_syms) | 
|---|
| 900 | return false; | 
|---|
| 901 |  | 
|---|
| 902 | std::vector<sym_freq> sym_freq0(total_used_syms), sym_freq1(total_used_syms); | 
|---|
| 903 | for (uint32_t i = 0, j = 0; i < num_syms; i++) | 
|---|
| 904 | { | 
|---|
| 905 | if (pFreq[i]) | 
|---|
| 906 | { | 
|---|
| 907 | sym_freq0[j].m_key = pFreq[i]; | 
|---|
| 908 | sym_freq0[j++].m_sym_index = static_cast<uint16_t>(i); | 
|---|
| 909 | } | 
|---|
| 910 | } | 
|---|
| 911 |  | 
|---|
| 912 | sym_freq *pSym_freq = canonical_huffman_radix_sort_syms(total_used_syms, &sym_freq0[0], &sym_freq1[0]); | 
|---|
| 913 |  | 
|---|
| 914 | canonical_huffman_calculate_minimum_redundancy(pSym_freq, total_used_syms); | 
|---|
| 915 |  | 
|---|
| 916 | int num_codes[cHuffmanMaxSupportedInternalCodeSize + 1]; | 
|---|
| 917 | clear_obj(num_codes); | 
|---|
| 918 |  | 
|---|
| 919 | for (uint32_t i = 0; i < total_used_syms; i++) | 
|---|
| 920 | { | 
|---|
| 921 | if (pSym_freq[i].m_key > cHuffmanMaxSupportedInternalCodeSize) | 
|---|
| 922 | return false; | 
|---|
| 923 |  | 
|---|
| 924 | num_codes[pSym_freq[i].m_key]++; | 
|---|
| 925 | } | 
|---|
| 926 |  | 
|---|
| 927 | canonical_huffman_enforce_max_code_size(num_codes, total_used_syms, max_code_size); | 
|---|
| 928 |  | 
|---|
| 929 | m_code_sizes.resize(0); | 
|---|
| 930 | m_code_sizes.resize(num_syms); | 
|---|
| 931 |  | 
|---|
| 932 | m_codes.resize(0); | 
|---|
| 933 | m_codes.resize(num_syms); | 
|---|
| 934 |  | 
|---|
| 935 | for (uint32_t i = 1, j = total_used_syms; i <= max_code_size; i++) | 
|---|
| 936 | for (uint32_t l = num_codes[i]; l > 0; l--) | 
|---|
| 937 | m_code_sizes[pSym_freq[--j].m_sym_index] = static_cast<uint8_t>(i); | 
|---|
| 938 |  | 
|---|
| 939 | uint32_t next_code[cHuffmanMaxSupportedInternalCodeSize + 1]; | 
|---|
| 940 |  | 
|---|
| 941 | next_code[1] = 0; | 
|---|
| 942 | for (uint32_t j = 0, i = 2; i <= max_code_size; i++) | 
|---|
| 943 | next_code[i] = j = ((j + num_codes[i - 1]) << 1); | 
|---|
| 944 |  | 
|---|
| 945 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 946 | { | 
|---|
| 947 | uint32_t rev_code = 0, code, code_size; | 
|---|
| 948 | if ((code_size = m_code_sizes[i]) == 0) | 
|---|
| 949 | continue; | 
|---|
| 950 | if (code_size > cHuffmanMaxSupportedInternalCodeSize) | 
|---|
| 951 | return false; | 
|---|
| 952 | code = next_code[code_size]++; | 
|---|
| 953 | for (uint32_t l = code_size; l > 0; l--, code >>= 1) | 
|---|
| 954 | rev_code = (rev_code << 1) | (code & 1); | 
|---|
| 955 | m_codes[i] = static_cast<uint16_t>(rev_code); | 
|---|
| 956 | } | 
|---|
| 957 |  | 
|---|
| 958 | return true; | 
|---|
| 959 | } | 
|---|
| 960 |  | 
|---|
| 961 | bool huffman_encoding_table::init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size) | 
|---|
| 962 | { | 
|---|
| 963 | if ((!num_syms) || (num_syms > cHuffmanMaxSyms)) | 
|---|
| 964 | return false; | 
|---|
| 965 |  | 
|---|
| 966 | uint16_vec sym_freq(num_syms); | 
|---|
| 967 |  | 
|---|
| 968 | uint32_t max_freq = 0; | 
|---|
| 969 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 970 | max_freq = maximum(max_freq, pSym_freq[i]); | 
|---|
| 971 |  | 
|---|
| 972 | if (max_freq < UINT16_MAX) | 
|---|
| 973 | { | 
|---|
| 974 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 975 | sym_freq[i] = static_cast<uint16_t>(pSym_freq[i]); | 
|---|
| 976 | } | 
|---|
| 977 | else | 
|---|
| 978 | { | 
|---|
| 979 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 980 | { | 
|---|
| 981 | if (pSym_freq[i]) | 
|---|
| 982 | { | 
|---|
| 983 | uint32_t f = static_cast<uint32_t>((static_cast<uint64_t>(pSym_freq[i]) * 65534U + (max_freq >> 1)) / max_freq); | 
|---|
| 984 | sym_freq[i] = static_cast<uint16_t>(clamp<uint32_t>(f, 1, 65534)); | 
|---|
| 985 | } | 
|---|
| 986 | } | 
|---|
| 987 | } | 
|---|
| 988 |  | 
|---|
| 989 | return init(num_syms, &sym_freq[0], max_code_size); | 
|---|
| 990 | } | 
|---|
| 991 |  | 
|---|
| 992 | void bitwise_coder::end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len) | 
|---|
| 993 | { | 
|---|
| 994 | if (run_size) | 
|---|
| 995 | { | 
|---|
| 996 | if (run_size < cHuffmanSmallRepeatSizeMin) | 
|---|
| 997 | { | 
|---|
| 998 | while (run_size--) | 
|---|
| 999 | syms.push_back(static_cast<uint16_t>(len)); | 
|---|
| 1000 | } | 
|---|
| 1001 | else if (run_size <= cHuffmanSmallRepeatSizeMax) | 
|---|
| 1002 | { | 
|---|
| 1003 | syms.push_back(static_cast<uint16_t>(cHuffmanSmallRepeatCode | ((run_size - cHuffmanSmallRepeatSizeMin) << 6))); | 
|---|
| 1004 | } | 
|---|
| 1005 | else | 
|---|
| 1006 | { | 
|---|
| 1007 | assert((run_size >= cHuffmanBigRepeatSizeMin) && (run_size <= cHuffmanBigRepeatSizeMax)); | 
|---|
| 1008 | syms.push_back(static_cast<uint16_t>(cHuffmanBigRepeatCode | ((run_size - cHuffmanBigRepeatSizeMin) << 6))); | 
|---|
| 1009 | } | 
|---|
| 1010 | } | 
|---|
| 1011 |  | 
|---|
| 1012 | run_size = 0; | 
|---|
| 1013 | } | 
|---|
| 1014 |  | 
|---|
| 1015 | void bitwise_coder::end_zero_run(uint16_vec &syms, uint32_t &run_size) | 
|---|
| 1016 | { | 
|---|
| 1017 | if (run_size) | 
|---|
| 1018 | { | 
|---|
| 1019 | if (run_size < cHuffmanSmallZeroRunSizeMin) | 
|---|
| 1020 | { | 
|---|
| 1021 | while (run_size--) | 
|---|
| 1022 | syms.push_back(0); | 
|---|
| 1023 | } | 
|---|
| 1024 | else if (run_size <= cHuffmanSmallZeroRunSizeMax) | 
|---|
| 1025 | { | 
|---|
| 1026 | syms.push_back(static_cast<uint16_t>(cHuffmanSmallZeroRunCode | ((run_size - cHuffmanSmallZeroRunSizeMin) << 6))); | 
|---|
| 1027 | } | 
|---|
| 1028 | else | 
|---|
| 1029 | { | 
|---|
| 1030 | assert((run_size >= cHuffmanBigZeroRunSizeMin) && (run_size <= cHuffmanBigZeroRunSizeMax)); | 
|---|
| 1031 | syms.push_back(static_cast<uint16_t>(cHuffmanBigZeroRunCode | ((run_size - cHuffmanBigZeroRunSizeMin) << 6))); | 
|---|
| 1032 | } | 
|---|
| 1033 | } | 
|---|
| 1034 |  | 
|---|
| 1035 | run_size = 0; | 
|---|
| 1036 | } | 
|---|
| 1037 |  | 
|---|
| 1038 | uint32_t bitwise_coder::emit_huffman_table(const huffman_encoding_table &tab) | 
|---|
| 1039 | { | 
|---|
| 1040 | const uint64_t start_bits = m_total_bits; | 
|---|
| 1041 |  | 
|---|
| 1042 | const uint8_vec &code_sizes = tab.get_code_sizes(); | 
|---|
| 1043 |  | 
|---|
| 1044 | uint32_t total_used = tab.get_total_used_codes(); | 
|---|
| 1045 | put_bits(total_used, cHuffmanMaxSymsLog2); | 
|---|
| 1046 |  | 
|---|
| 1047 | if (!total_used) | 
|---|
| 1048 | return 0; | 
|---|
| 1049 |  | 
|---|
| 1050 | uint16_vec syms; | 
|---|
| 1051 | syms.reserve(total_used + 16); | 
|---|
| 1052 |  | 
|---|
| 1053 | uint32_t prev_code_len = UINT_MAX, zero_run_size = 0, nonzero_run_size = 0; | 
|---|
| 1054 |  | 
|---|
| 1055 | for (uint32_t i = 0; i <= total_used; ++i) | 
|---|
| 1056 | { | 
|---|
| 1057 | const uint32_t code_len = (i == total_used) ? 0xFF : code_sizes[i]; | 
|---|
| 1058 | assert((code_len == 0xFF) || (code_len <= 16)); | 
|---|
| 1059 |  | 
|---|
| 1060 | if (code_len) | 
|---|
| 1061 | { | 
|---|
| 1062 | end_zero_run(syms, zero_run_size); | 
|---|
| 1063 |  | 
|---|
| 1064 | if (code_len != prev_code_len) | 
|---|
| 1065 | { | 
|---|
| 1066 | end_nonzero_run(syms, nonzero_run_size, prev_code_len); | 
|---|
| 1067 | if (code_len != 0xFF) | 
|---|
| 1068 | syms.push_back(static_cast<uint16_t>(code_len)); | 
|---|
| 1069 | } | 
|---|
| 1070 | else if (++nonzero_run_size == cHuffmanBigRepeatSizeMax) | 
|---|
| 1071 | end_nonzero_run(syms, nonzero_run_size, prev_code_len); | 
|---|
| 1072 | } | 
|---|
| 1073 | else | 
|---|
| 1074 | { | 
|---|
| 1075 | end_nonzero_run(syms, nonzero_run_size, prev_code_len); | 
|---|
| 1076 |  | 
|---|
| 1077 | if (++zero_run_size == cHuffmanBigZeroRunSizeMax) | 
|---|
| 1078 | end_zero_run(syms, zero_run_size); | 
|---|
| 1079 | } | 
|---|
| 1080 |  | 
|---|
| 1081 | prev_code_len = code_len; | 
|---|
| 1082 | } | 
|---|
| 1083 |  | 
|---|
| 1084 | histogram h(cHuffmanTotalCodelengthCodes); | 
|---|
| 1085 | for (uint32_t i = 0; i < syms.size(); i++) | 
|---|
| 1086 | h.inc(syms[i] & 63); | 
|---|
| 1087 |  | 
|---|
| 1088 | huffman_encoding_table ct; | 
|---|
| 1089 | if (!ct.init(h, 7)) | 
|---|
| 1090 | return 0; | 
|---|
| 1091 |  | 
|---|
| 1092 | assert(cHuffmanTotalSortedCodelengthCodes == cHuffmanTotalCodelengthCodes); | 
|---|
| 1093 |  | 
|---|
| 1094 | uint32_t total_codelength_codes; | 
|---|
| 1095 | for (total_codelength_codes = cHuffmanTotalSortedCodelengthCodes; total_codelength_codes > 0; total_codelength_codes--) | 
|---|
| 1096 | if (ct.get_code_sizes()[g_huffman_sorted_codelength_codes[total_codelength_codes - 1]]) | 
|---|
| 1097 | break; | 
|---|
| 1098 |  | 
|---|
| 1099 | assert(total_codelength_codes); | 
|---|
| 1100 |  | 
|---|
| 1101 | put_bits(total_codelength_codes, 5); | 
|---|
| 1102 | for (uint32_t i = 0; i < total_codelength_codes; i++) | 
|---|
| 1103 | put_bits(ct.get_code_sizes()[g_huffman_sorted_codelength_codes[i]], 3); | 
|---|
| 1104 |  | 
|---|
| 1105 | for (uint32_t i = 0; i < syms.size(); ++i) | 
|---|
| 1106 | { | 
|---|
| 1107 | const uint32_t l = syms[i] & 63, e = syms[i] >> 6; | 
|---|
| 1108 |  | 
|---|
| 1109 | put_code(l, ct); | 
|---|
| 1110 |  | 
|---|
| 1111 | if (l == cHuffmanSmallZeroRunCode) | 
|---|
| 1112 | put_bits(e, cHuffmanSmallZeroRunExtraBits); | 
|---|
| 1113 | else if (l == cHuffmanBigZeroRunCode) | 
|---|
| 1114 | put_bits(e, cHuffmanBigZeroRunExtraBits); | 
|---|
| 1115 | else if (l == cHuffmanSmallRepeatCode) | 
|---|
| 1116 | put_bits(e, cHuffmanSmallRepeatExtraBits); | 
|---|
| 1117 | else if (l == cHuffmanBigRepeatCode) | 
|---|
| 1118 | put_bits(e, cHuffmanBigRepeatExtraBits); | 
|---|
| 1119 | } | 
|---|
| 1120 |  | 
|---|
| 1121 | return (uint32_t)(m_total_bits - start_bits); | 
|---|
| 1122 | } | 
|---|
| 1123 |  | 
|---|
| 1124 | bool huffman_test(int rand_seed) | 
|---|
| 1125 | { | 
|---|
| 1126 | histogram h(19); | 
|---|
| 1127 |  | 
|---|
| 1128 | // Feed in a fibonacci sequence to force large codesizes | 
|---|
| 1129 | h[0] += 1; h[1] += 1; h[2] += 2; h[3] += 3; | 
|---|
| 1130 | h[4] += 5; h[5] += 8; h[6] += 13; h[7] += 21; | 
|---|
| 1131 | h[8] += 34; h[9] += 55; h[10] += 89; h[11] += 144; | 
|---|
| 1132 | h[12] += 233; h[13] += 377; h[14] += 610; h[15] += 987; | 
|---|
| 1133 | h[16] += 1597; h[17] += 2584; h[18] += 4181; | 
|---|
| 1134 |  | 
|---|
| 1135 | huffman_encoding_table etab; | 
|---|
| 1136 | etab.init(h, 16); | 
|---|
| 1137 |  | 
|---|
| 1138 | { | 
|---|
| 1139 | bitwise_coder c; | 
|---|
| 1140 | c.init(1024); | 
|---|
| 1141 |  | 
|---|
| 1142 | c.emit_huffman_table(etab); | 
|---|
| 1143 | for (int i = 0; i < 19; i++) | 
|---|
| 1144 | c.put_code(i, etab); | 
|---|
| 1145 |  | 
|---|
| 1146 | c.flush(); | 
|---|
| 1147 |  | 
|---|
| 1148 | basist::bitwise_decoder d; | 
|---|
| 1149 | d.init(&c.get_bytes()[0], static_cast<uint32_t>(c.get_bytes().size())); | 
|---|
| 1150 |  | 
|---|
| 1151 | basist::huffman_decoding_table dtab; | 
|---|
| 1152 | bool success = d.read_huffman_table(dtab); | 
|---|
| 1153 | if (!success) | 
|---|
| 1154 | { | 
|---|
| 1155 | assert(0); | 
|---|
| 1156 | printf( "Failure 5\n"); | 
|---|
| 1157 | return false; | 
|---|
| 1158 | } | 
|---|
| 1159 |  | 
|---|
| 1160 | for (uint32_t i = 0; i < 19; i++) | 
|---|
| 1161 | { | 
|---|
| 1162 | uint32_t s = d.decode_huffman(dtab); | 
|---|
| 1163 | if (s != i) | 
|---|
| 1164 | { | 
|---|
| 1165 | assert(0); | 
|---|
| 1166 | printf( "Failure 5\n"); | 
|---|
| 1167 | return false; | 
|---|
| 1168 | } | 
|---|
| 1169 | } | 
|---|
| 1170 | } | 
|---|
| 1171 |  | 
|---|
| 1172 | basisu::rand r; | 
|---|
| 1173 | r.seed(rand_seed); | 
|---|
| 1174 |  | 
|---|
| 1175 | for (int iter = 0; iter < 500000; iter++) | 
|---|
| 1176 | { | 
|---|
| 1177 | printf( "%u\n", iter); | 
|---|
| 1178 |  | 
|---|
| 1179 | uint32_t max_sym = r.irand(0, 8193); | 
|---|
| 1180 | uint32_t num_codes = r.irand(1, 10000); | 
|---|
| 1181 | uint_vec syms(num_codes); | 
|---|
| 1182 |  | 
|---|
| 1183 | for (uint32_t i = 0; i < num_codes; i++) | 
|---|
| 1184 | { | 
|---|
| 1185 | if (r.bit()) | 
|---|
| 1186 | syms[i] = r.irand(0, max_sym); | 
|---|
| 1187 | else | 
|---|
| 1188 | { | 
|---|
| 1189 | int s = (int)(r.gaussian((float)max_sym / 2, (float)maximum<int>(1, max_sym / 2)) + .5f); | 
|---|
| 1190 | s = basisu::clamp<int>(s, 0, max_sym); | 
|---|
| 1191 |  | 
|---|
| 1192 | syms[i] = s; | 
|---|
| 1193 | } | 
|---|
| 1194 |  | 
|---|
| 1195 | } | 
|---|
| 1196 |  | 
|---|
| 1197 | histogram h1(max_sym + 1); | 
|---|
| 1198 | for (uint32_t i = 0; i < num_codes; i++) | 
|---|
| 1199 | h1[syms[i]]++; | 
|---|
| 1200 |  | 
|---|
| 1201 | huffman_encoding_table etab2; | 
|---|
| 1202 | if (!etab2.init(h1, 16)) | 
|---|
| 1203 | { | 
|---|
| 1204 | assert(0); | 
|---|
| 1205 | printf( "Failed 0\n"); | 
|---|
| 1206 | return false; | 
|---|
| 1207 | } | 
|---|
| 1208 |  | 
|---|
| 1209 | bitwise_coder c; | 
|---|
| 1210 | c.init(1024); | 
|---|
| 1211 |  | 
|---|
| 1212 | c.emit_huffman_table(etab2); | 
|---|
| 1213 |  | 
|---|
| 1214 | for (uint32_t i = 0; i < num_codes; i++) | 
|---|
| 1215 | c.put_code(syms[i], etab2); | 
|---|
| 1216 |  | 
|---|
| 1217 | c.flush(); | 
|---|
| 1218 |  | 
|---|
| 1219 | basist::bitwise_decoder d; | 
|---|
| 1220 | d.init(&c.get_bytes()[0], (uint32_t)c.get_bytes().size()); | 
|---|
| 1221 |  | 
|---|
| 1222 | basist::huffman_decoding_table dtab; | 
|---|
| 1223 | bool success = d.read_huffman_table(dtab); | 
|---|
| 1224 | if (!success) | 
|---|
| 1225 | { | 
|---|
| 1226 | assert(0); | 
|---|
| 1227 | printf( "Failed 2\n"); | 
|---|
| 1228 | return false; | 
|---|
| 1229 | } | 
|---|
| 1230 |  | 
|---|
| 1231 | for (uint32_t i = 0; i < num_codes; i++) | 
|---|
| 1232 | { | 
|---|
| 1233 | uint32_t s = d.decode_huffman(dtab); | 
|---|
| 1234 | if (s != syms[i]) | 
|---|
| 1235 | { | 
|---|
| 1236 | assert(0); | 
|---|
| 1237 | printf( "Failed 4\n"); | 
|---|
| 1238 | return false; | 
|---|
| 1239 | } | 
|---|
| 1240 | } | 
|---|
| 1241 |  | 
|---|
| 1242 | } | 
|---|
| 1243 | return true; | 
|---|
| 1244 | } | 
|---|
| 1245 |  | 
|---|
| 1246 | void palette_index_reorderer::init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) | 
|---|
| 1247 | { | 
|---|
| 1248 | assert((num_syms > 0) && (num_indices > 0)); | 
|---|
| 1249 | assert((dist_func_weight >= 0.0f) && (dist_func_weight <= 1.0f)); | 
|---|
| 1250 |  | 
|---|
| 1251 | clear(); | 
|---|
| 1252 |  | 
|---|
| 1253 | m_remap_table.resize(num_syms); | 
|---|
| 1254 | m_entries_picked.reserve(num_syms); | 
|---|
| 1255 | m_total_count_to_picked.resize(num_syms); | 
|---|
| 1256 |  | 
|---|
| 1257 | if (num_indices <= 1) | 
|---|
| 1258 | return; | 
|---|
| 1259 |  | 
|---|
| 1260 | prepare_hist(num_syms, num_indices, pIndices); | 
|---|
| 1261 | find_initial(num_syms); | 
|---|
| 1262 |  | 
|---|
| 1263 | while (m_entries_to_do.size()) | 
|---|
| 1264 | { | 
|---|
| 1265 | // Find the best entry to move into the picked list. | 
|---|
| 1266 | uint32_t best_entry; | 
|---|
| 1267 | double best_count; | 
|---|
| 1268 | find_next_entry(best_entry, best_count, pDist_func, pCtx, dist_func_weight); | 
|---|
| 1269 |  | 
|---|
| 1270 | // We now have chosen an entry to place in the picked list, now determine which side it goes on. | 
|---|
| 1271 | const uint32_t entry_to_move = m_entries_to_do[best_entry]; | 
|---|
| 1272 |  | 
|---|
| 1273 | float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); | 
|---|
| 1274 |  | 
|---|
| 1275 | // Put entry_to_move either on the "left" or "right" side of the picked entries | 
|---|
| 1276 | if (side <= 0) | 
|---|
| 1277 | m_entries_picked.push_back(entry_to_move); | 
|---|
| 1278 | else | 
|---|
| 1279 | m_entries_picked.insert(m_entries_picked.begin(), entry_to_move); | 
|---|
| 1280 |  | 
|---|
| 1281 | // Erase best_entry from the todo list | 
|---|
| 1282 | m_entries_to_do.erase(m_entries_to_do.begin() + best_entry); | 
|---|
| 1283 |  | 
|---|
| 1284 | // We've just moved best_entry to the picked list, so now we need to update m_total_count_to_picked[] to factor the additional count to best_entry | 
|---|
| 1285 | for (uint32_t i = 0; i < m_entries_to_do.size(); i++) | 
|---|
| 1286 | m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], entry_to_move, num_syms); | 
|---|
| 1287 | } | 
|---|
| 1288 |  | 
|---|
| 1289 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 1290 | m_remap_table[m_entries_picked[i]] = i; | 
|---|
| 1291 | } | 
|---|
| 1292 |  | 
|---|
| 1293 | void palette_index_reorderer::prepare_hist(uint32_t num_syms, uint32_t num_indices, const uint32_t *pIndices) | 
|---|
| 1294 | { | 
|---|
| 1295 | m_hist.resize(0); | 
|---|
| 1296 | m_hist.resize(num_syms * num_syms); | 
|---|
| 1297 |  | 
|---|
| 1298 | for (uint32_t i = 0; i < num_indices; i++) | 
|---|
| 1299 | { | 
|---|
| 1300 | const uint32_t idx = pIndices[i]; | 
|---|
| 1301 | inc_hist(idx, (i < (num_indices - 1)) ? pIndices[i + 1] : -1, num_syms); | 
|---|
| 1302 | inc_hist(idx, (i > 0) ? pIndices[i - 1] : -1, num_syms); | 
|---|
| 1303 | } | 
|---|
| 1304 | } | 
|---|
| 1305 |  | 
|---|
| 1306 | void palette_index_reorderer::find_initial(uint32_t num_syms) | 
|---|
| 1307 | { | 
|---|
| 1308 | uint32_t max_count = 0, max_index = 0; | 
|---|
| 1309 | for (uint32_t i = 0; i < num_syms * num_syms; i++) | 
|---|
| 1310 | if (m_hist[i] > max_count) | 
|---|
| 1311 | max_count = m_hist[i], max_index = i; | 
|---|
| 1312 |  | 
|---|
| 1313 | uint32_t a = max_index / num_syms, b = max_index % num_syms; | 
|---|
| 1314 |  | 
|---|
| 1315 | m_entries_picked.push_back(a); | 
|---|
| 1316 | m_entries_picked.push_back(b); | 
|---|
| 1317 |  | 
|---|
| 1318 | for (uint32_t i = 0; i < num_syms; i++) | 
|---|
| 1319 | if ((i != b) && (i != a)) | 
|---|
| 1320 | m_entries_to_do.push_back(i); | 
|---|
| 1321 |  | 
|---|
| 1322 | for (uint32_t i = 0; i < m_entries_to_do.size(); i++) | 
|---|
| 1323 | for (uint32_t j = 0; j < m_entries_picked.size(); j++) | 
|---|
| 1324 | m_total_count_to_picked[m_entries_to_do[i]] += get_hist(m_entries_to_do[i], m_entries_picked[j], num_syms); | 
|---|
| 1325 | } | 
|---|
| 1326 |  | 
|---|
| 1327 | void palette_index_reorderer::find_next_entry(uint32_t &best_entry, double &best_count, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) | 
|---|
| 1328 | { | 
|---|
| 1329 | best_entry = 0; | 
|---|
| 1330 | best_count = 0; | 
|---|
| 1331 |  | 
|---|
| 1332 | for (uint32_t i = 0; i < m_entries_to_do.size(); i++) | 
|---|
| 1333 | { | 
|---|
| 1334 | const uint32_t u = m_entries_to_do[i]; | 
|---|
| 1335 | double total_count = m_total_count_to_picked[u]; | 
|---|
| 1336 |  | 
|---|
| 1337 | if (pDist_func) | 
|---|
| 1338 | { | 
|---|
| 1339 | float w = maximum<float>((*pDist_func)(u, m_entries_picked.front(), pCtx), (*pDist_func)(u, m_entries_picked.back(), pCtx)); | 
|---|
| 1340 | assert((w >= 0.0f) && (w <= 1.0f)); | 
|---|
| 1341 | total_count = (total_count + 1.0f) * lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, w); | 
|---|
| 1342 | } | 
|---|
| 1343 |  | 
|---|
| 1344 | if (total_count <= best_count) | 
|---|
| 1345 | continue; | 
|---|
| 1346 |  | 
|---|
| 1347 | best_entry = i; | 
|---|
| 1348 | best_count = total_count; | 
|---|
| 1349 | } | 
|---|
| 1350 | } | 
|---|
| 1351 |  | 
|---|
| 1352 | float palette_index_reorderer::pick_side(uint32_t num_syms, uint32_t entry_to_move, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight) | 
|---|
| 1353 | { | 
|---|
| 1354 | float which_side = 0; | 
|---|
| 1355 |  | 
|---|
| 1356 | int l_count = 0, r_count = 0; | 
|---|
| 1357 | for (uint32_t j = 0; j < m_entries_picked.size(); j++) | 
|---|
| 1358 | { | 
|---|
| 1359 | const int count = get_hist(entry_to_move, m_entries_picked[j], num_syms), r = ((int)m_entries_picked.size() + 1 - 2 * (j + 1)); | 
|---|
| 1360 | which_side += static_cast<float>(r * count); | 
|---|
| 1361 | if (r >= 0) | 
|---|
| 1362 | l_count += r * count; | 
|---|
| 1363 | else | 
|---|
| 1364 | r_count += -r * count; | 
|---|
| 1365 | } | 
|---|
| 1366 |  | 
|---|
| 1367 | if (pDist_func) | 
|---|
| 1368 | { | 
|---|
| 1369 | float w_left = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.front(), pCtx)); | 
|---|
| 1370 | float w_right = lerp(1.0f - dist_func_weight, 1.0f + dist_func_weight, (*pDist_func)(entry_to_move, m_entries_picked.back(), pCtx)); | 
|---|
| 1371 | which_side = w_left * l_count - w_right * r_count; | 
|---|
| 1372 | } | 
|---|
| 1373 | return which_side; | 
|---|
| 1374 | } | 
|---|
| 1375 |  | 
|---|
| 1376 | void image_metrics::calc(const image &a, const image &b, uint32_t first_chan, uint32_t total_chans, bool avg_comp_error, bool use_601_luma) | 
|---|
| 1377 | { | 
|---|
| 1378 | assert((first_chan < 4U) && (first_chan + total_chans <= 4U)); | 
|---|
| 1379 |  | 
|---|
| 1380 | const uint32_t width = basisu::minimum(a.get_width(), b.get_width()); | 
|---|
| 1381 | const uint32_t height = basisu::minimum(a.get_height(), b.get_height()); | 
|---|
| 1382 |  | 
|---|
| 1383 | double hist[256]; | 
|---|
| 1384 | clear_obj(hist); | 
|---|
| 1385 |  | 
|---|
| 1386 | for (uint32_t y = 0; y < height; y++) | 
|---|
| 1387 | { | 
|---|
| 1388 | for (uint32_t x = 0; x < width; x++) | 
|---|
| 1389 | { | 
|---|
| 1390 | const color_rgba &ca = a(x, y), &cb = b(x, y); | 
|---|
| 1391 |  | 
|---|
| 1392 | if (total_chans) | 
|---|
| 1393 | { | 
|---|
| 1394 | for (uint32_t c = 0; c < total_chans; c++) | 
|---|
| 1395 | hist[iabs(ca[first_chan + c] - cb[first_chan + c])]++; | 
|---|
| 1396 | } | 
|---|
| 1397 | else | 
|---|
| 1398 | { | 
|---|
| 1399 | if (use_601_luma) | 
|---|
| 1400 | hist[iabs(ca.get_601_luma() - cb.get_601_luma())]++; | 
|---|
| 1401 | else | 
|---|
| 1402 | hist[iabs(ca.get_709_luma() - cb.get_709_luma())]++; | 
|---|
| 1403 | } | 
|---|
| 1404 | } | 
|---|
| 1405 | } | 
|---|
| 1406 |  | 
|---|
| 1407 | m_max = 0; | 
|---|
| 1408 | double sum = 0.0f, sum2 = 0.0f; | 
|---|
| 1409 | for (uint32_t i = 0; i < 256; i++) | 
|---|
| 1410 | { | 
|---|
| 1411 | if (hist[i]) | 
|---|
| 1412 | { | 
|---|
| 1413 | m_max = basisu::maximum<float>(m_max, (float)i); | 
|---|
| 1414 | double v = i * hist[i]; | 
|---|
| 1415 | sum += v; | 
|---|
| 1416 | sum2 += i * v; | 
|---|
| 1417 | } | 
|---|
| 1418 | } | 
|---|
| 1419 |  | 
|---|
| 1420 | double total_values = (double)width * (double)height; | 
|---|
| 1421 | if (avg_comp_error) | 
|---|
| 1422 | total_values *= (double)clamp<uint32_t>(total_chans, 1, 4); | 
|---|
| 1423 |  | 
|---|
| 1424 | m_mean = (float)clamp<double>(sum / total_values, 0.0f, 255.0); | 
|---|
| 1425 | m_mean_squared = (float)clamp<double>(sum2 / total_values, 0.0f, 255.0f * 255.0f); | 
|---|
| 1426 | m_rms = (float)sqrt(m_mean_squared); | 
|---|
| 1427 | m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f; | 
|---|
| 1428 | } | 
|---|
| 1429 |  | 
|---|
| 1430 | void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed) | 
|---|
| 1431 | { | 
|---|
| 1432 | rand r(seed); | 
|---|
| 1433 |  | 
|---|
| 1434 | uint8_t *pDst = static_cast<uint8_t *>(pBuf); | 
|---|
| 1435 |  | 
|---|
| 1436 | while (size >= sizeof(uint32_t)) | 
|---|
| 1437 | { | 
|---|
| 1438 | *(uint32_t *)pDst = r.urand32(); | 
|---|
| 1439 | pDst += sizeof(uint32_t); | 
|---|
| 1440 | size -= sizeof(uint32_t); | 
|---|
| 1441 | } | 
|---|
| 1442 |  | 
|---|
| 1443 | while (size) | 
|---|
| 1444 | { | 
|---|
| 1445 | *pDst++ = r.byte(); | 
|---|
| 1446 | size--; | 
|---|
| 1447 | } | 
|---|
| 1448 | } | 
|---|
| 1449 |  | 
|---|
| 1450 | uint32_t hash_hsieh(const uint8_t *pBuf, size_t len) | 
|---|
| 1451 | { | 
|---|
| 1452 | if (!pBuf || !len) | 
|---|
| 1453 | return 0; | 
|---|
| 1454 |  | 
|---|
| 1455 | uint32_t h = static_cast<uint32_t>(len); | 
|---|
| 1456 |  | 
|---|
| 1457 | const uint32_t bytes_left = len & 3; | 
|---|
| 1458 | len >>= 2; | 
|---|
| 1459 |  | 
|---|
| 1460 | while (len--) | 
|---|
| 1461 | { | 
|---|
| 1462 | const uint16_t *pWords = reinterpret_cast<const uint16_t *>(pBuf); | 
|---|
| 1463 |  | 
|---|
| 1464 | h += pWords[0]; | 
|---|
| 1465 |  | 
|---|
| 1466 | const uint32_t t = (pWords[1] << 11) ^ h; | 
|---|
| 1467 | h = (h << 16) ^ t; | 
|---|
| 1468 |  | 
|---|
| 1469 | pBuf += sizeof(uint32_t); | 
|---|
| 1470 |  | 
|---|
| 1471 | h += h >> 11; | 
|---|
| 1472 | } | 
|---|
| 1473 |  | 
|---|
| 1474 | switch (bytes_left) | 
|---|
| 1475 | { | 
|---|
| 1476 | case 1: | 
|---|
| 1477 | h += *reinterpret_cast<const signed char*>(pBuf); | 
|---|
| 1478 | h ^= h << 10; | 
|---|
| 1479 | h += h >> 1; | 
|---|
| 1480 | break; | 
|---|
| 1481 | case 2: | 
|---|
| 1482 | h += *reinterpret_cast<const uint16_t *>(pBuf); | 
|---|
| 1483 | h ^= h << 11; | 
|---|
| 1484 | h += h >> 17; | 
|---|
| 1485 | break; | 
|---|
| 1486 | case 3: | 
|---|
| 1487 | h += *reinterpret_cast<const uint16_t *>(pBuf); | 
|---|
| 1488 | h ^= h << 16; | 
|---|
| 1489 | h ^= (static_cast<signed char>(pBuf[sizeof(uint16_t)])) << 18; | 
|---|
| 1490 | h += h >> 11; | 
|---|
| 1491 | break; | 
|---|
| 1492 | default: | 
|---|
| 1493 | break; | 
|---|
| 1494 | } | 
|---|
| 1495 |  | 
|---|
| 1496 | h ^= h << 3; | 
|---|
| 1497 | h += h >> 5; | 
|---|
| 1498 | h ^= h << 4; | 
|---|
| 1499 | h += h >> 17; | 
|---|
| 1500 | h ^= h << 25; | 
|---|
| 1501 | h += h >> 6; | 
|---|
| 1502 |  | 
|---|
| 1503 | return h; | 
|---|
| 1504 | } | 
|---|
| 1505 |  | 
|---|
| 1506 | job_pool::job_pool(uint32_t num_threads) : | 
|---|
| 1507 | m_num_active_jobs(0), | 
|---|
| 1508 | m_kill_flag(false) | 
|---|
| 1509 | { | 
|---|
| 1510 | assert(num_threads >= 1U); | 
|---|
| 1511 |  | 
|---|
| 1512 | debug_printf( "job_pool::job_pool: %u total threads\n", num_threads); | 
|---|
| 1513 |  | 
|---|
| 1514 | if (num_threads > 1) | 
|---|
| 1515 | { | 
|---|
| 1516 | m_threads.resize(num_threads - 1); | 
|---|
| 1517 |  | 
|---|
| 1518 | for (int i = 0; i < ((int)num_threads - 1); i++) | 
|---|
| 1519 | m_threads[i] = std::thread([this, i] { job_thread(i); }); | 
|---|
| 1520 | } | 
|---|
| 1521 | } | 
|---|
| 1522 |  | 
|---|
| 1523 | job_pool::~job_pool() | 
|---|
| 1524 | { | 
|---|
| 1525 | debug_printf( "job_pool::~job_pool\n"); | 
|---|
| 1526 |  | 
|---|
| 1527 | // Notify all workers that they need to die right now. | 
|---|
| 1528 | m_kill_flag = true; | 
|---|
| 1529 |  | 
|---|
| 1530 | m_has_work.notify_all(); | 
|---|
| 1531 |  | 
|---|
| 1532 | // Wait for all workers to die. | 
|---|
| 1533 | for (uint32_t i = 0; i < m_threads.size(); i++) | 
|---|
| 1534 | m_threads[i].join(); | 
|---|
| 1535 | } | 
|---|
| 1536 |  | 
|---|
| 1537 | void job_pool::add_job(const std::function<void()>& job) | 
|---|
| 1538 | { | 
|---|
| 1539 | std::unique_lock<std::mutex> lock(m_mutex); | 
|---|
| 1540 |  | 
|---|
| 1541 | m_queue.emplace_back(job); | 
|---|
| 1542 |  | 
|---|
| 1543 | const size_t queue_size = m_queue.size(); | 
|---|
| 1544 |  | 
|---|
| 1545 | lock.unlock(); | 
|---|
| 1546 |  | 
|---|
| 1547 | if (queue_size > 1) | 
|---|
| 1548 | m_has_work.notify_one(); | 
|---|
| 1549 | } | 
|---|
| 1550 |  | 
|---|
| 1551 | void job_pool::add_job(std::function<void()>&& job) | 
|---|
| 1552 | { | 
|---|
| 1553 | std::unique_lock<std::mutex> lock(m_mutex); | 
|---|
| 1554 |  | 
|---|
| 1555 | m_queue.emplace_back(std::move(job)); | 
|---|
| 1556 |  | 
|---|
| 1557 | const size_t queue_size = m_queue.size(); | 
|---|
| 1558 |  | 
|---|
| 1559 | lock.unlock(); | 
|---|
| 1560 |  | 
|---|
| 1561 | if (queue_size > 1) | 
|---|
| 1562 | { | 
|---|
| 1563 | m_has_work.notify_one(); | 
|---|
| 1564 | } | 
|---|
| 1565 | } | 
|---|
| 1566 |  | 
|---|
| 1567 | void job_pool::wait_for_all() | 
|---|
| 1568 | { | 
|---|
| 1569 | std::unique_lock<std::mutex> lock(m_mutex); | 
|---|
| 1570 |  | 
|---|
| 1571 | // Drain the job queue on the calling thread. | 
|---|
| 1572 | while (!m_queue.empty()) | 
|---|
| 1573 | { | 
|---|
| 1574 | std::function<void()> job(m_queue.back()); | 
|---|
| 1575 | m_queue.pop_back(); | 
|---|
| 1576 |  | 
|---|
| 1577 | lock.unlock(); | 
|---|
| 1578 |  | 
|---|
| 1579 | job(); | 
|---|
| 1580 |  | 
|---|
| 1581 | lock.lock(); | 
|---|
| 1582 | } | 
|---|
| 1583 |  | 
|---|
| 1584 | // The queue is empty, now wait for all active jobs to finish up. | 
|---|
| 1585 | m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } ); | 
|---|
| 1586 | } | 
|---|
| 1587 |  | 
|---|
| 1588 | void job_pool::job_thread(uint32_t index) | 
|---|
| 1589 | { | 
|---|
| 1590 | BASISU_NOTE_UNUSED(index); | 
|---|
| 1591 | //debug_printf("job_pool::job_thread: starting %u\n", index); | 
|---|
| 1592 |  | 
|---|
| 1593 | while (true) | 
|---|
| 1594 | { | 
|---|
| 1595 | std::unique_lock<std::mutex> lock(m_mutex); | 
|---|
| 1596 |  | 
|---|
| 1597 | // Wait for any jobs to be issued. | 
|---|
| 1598 | m_has_work.wait(lock, [this] { return m_kill_flag || m_queue.size(); } ); | 
|---|
| 1599 |  | 
|---|
| 1600 | // Check to see if we're supposed to exit. | 
|---|
| 1601 | if (m_kill_flag) | 
|---|
| 1602 | break; | 
|---|
| 1603 |  | 
|---|
| 1604 | // Get the job and execute it. | 
|---|
| 1605 | std::function<void()> job(m_queue.back()); | 
|---|
| 1606 | m_queue.pop_back(); | 
|---|
| 1607 |  | 
|---|
| 1608 | ++m_num_active_jobs; | 
|---|
| 1609 |  | 
|---|
| 1610 | lock.unlock(); | 
|---|
| 1611 |  | 
|---|
| 1612 | job(); | 
|---|
| 1613 |  | 
|---|
| 1614 | lock.lock(); | 
|---|
| 1615 |  | 
|---|
| 1616 | --m_num_active_jobs; | 
|---|
| 1617 |  | 
|---|
| 1618 | // Now check if there are no more jobs remaining. | 
|---|
| 1619 | const bool all_done = m_queue.empty() && !m_num_active_jobs; | 
|---|
| 1620 |  | 
|---|
| 1621 | lock.unlock(); | 
|---|
| 1622 |  | 
|---|
| 1623 | if (all_done) | 
|---|
| 1624 | m_no_more_jobs.notify_all(); | 
|---|
| 1625 | } | 
|---|
| 1626 |  | 
|---|
| 1627 | //debug_printf("job_pool::job_thread: exiting\n"); | 
|---|
| 1628 | } | 
|---|
| 1629 |  | 
|---|
| 1630 | // .TGA image loading | 
|---|
| 1631 | #pragma pack(push) | 
|---|
| 1632 | #pragma pack(1) | 
|---|
| 1633 | struct | 
|---|
| 1634 | { | 
|---|
| 1635 | uint8_t			; | 
|---|
| 1636 | uint8_t			; | 
|---|
| 1637 | uint8_t			; | 
|---|
| 1638 | packed_uint<2>	; | 
|---|
| 1639 | packed_uint<2> ; | 
|---|
| 1640 | uint8_t			; | 
|---|
| 1641 | packed_uint<2> ; | 
|---|
| 1642 | packed_uint<2> ; | 
|---|
| 1643 | packed_uint<2> ; | 
|---|
| 1644 | packed_uint<2> ; | 
|---|
| 1645 | uint8_t			; | 
|---|
| 1646 | uint8_t			; | 
|---|
| 1647 | }; | 
|---|
| 1648 | #pragma pack(pop) | 
|---|
| 1649 |  | 
|---|
| 1650 | const uint32_t MAX_TGA_IMAGE_SIZE = 16384; | 
|---|
| 1651 |  | 
|---|
| 1652 | enum tga_image_type | 
|---|
| 1653 | { | 
|---|
| 1654 | cITPalettized = 1, | 
|---|
| 1655 | cITRGB = 2, | 
|---|
| 1656 | cITGrayscale = 3 | 
|---|
| 1657 | }; | 
|---|
| 1658 |  | 
|---|
| 1659 | uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans) | 
|---|
| 1660 | { | 
|---|
| 1661 | width = 0; | 
|---|
| 1662 | height = 0; | 
|---|
| 1663 | n_chans = 0; | 
|---|
| 1664 |  | 
|---|
| 1665 | if (buf_size <= sizeof(tga_header)) | 
|---|
| 1666 | return nullptr; | 
|---|
| 1667 |  | 
|---|
| 1668 | const tga_header &hdr = *reinterpret_cast<const tga_header *>(pBuf); | 
|---|
| 1669 |  | 
|---|
| 1670 | if ((!hdr.m_width) || (!hdr.m_height) || (hdr.m_width > MAX_TGA_IMAGE_SIZE) || (hdr.m_height > MAX_TGA_IMAGE_SIZE)) | 
|---|
| 1671 | return nullptr; | 
|---|
| 1672 |  | 
|---|
| 1673 | if (hdr.m_desc >> 6) | 
|---|
| 1674 | return nullptr; | 
|---|
| 1675 |  | 
|---|
| 1676 | // Simple validation | 
|---|
| 1677 | if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) | 
|---|
| 1678 | return nullptr; | 
|---|
| 1679 |  | 
|---|
| 1680 | if (hdr.m_cmap) | 
|---|
| 1681 | { | 
|---|
| 1682 | if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) | 
|---|
| 1683 | return nullptr; | 
|---|
| 1684 |  | 
|---|
| 1685 | // Nobody implements CMapFirst correctly, so we're not supporting it. Never seen it used, either. | 
|---|
| 1686 | if (hdr.m_cmap_first != 0) | 
|---|
| 1687 | return nullptr; | 
|---|
| 1688 | } | 
|---|
| 1689 |  | 
|---|
| 1690 | const bool x_flipped = (hdr.m_desc & 0x10) != 0; | 
|---|
| 1691 | const bool y_flipped = (hdr.m_desc & 0x20) == 0; | 
|---|
| 1692 |  | 
|---|
| 1693 | bool rle_flag = false; | 
|---|
| 1694 | int file_image_type = hdr.m_type; | 
|---|
| 1695 | if (file_image_type > 8) | 
|---|
| 1696 | { | 
|---|
| 1697 | file_image_type -= 8; | 
|---|
| 1698 | rle_flag = true; | 
|---|
| 1699 | } | 
|---|
| 1700 |  | 
|---|
| 1701 | const tga_image_type image_type = static_cast<tga_image_type>(file_image_type); | 
|---|
| 1702 |  | 
|---|
| 1703 | switch (file_image_type) | 
|---|
| 1704 | { | 
|---|
| 1705 | case cITRGB: | 
|---|
| 1706 | if (hdr.m_depth == 8) | 
|---|
| 1707 | return nullptr; | 
|---|
| 1708 | break; | 
|---|
| 1709 | case cITPalettized: | 
|---|
| 1710 | if ((hdr.m_depth != 8) || (hdr.m_cmap != 1) || (hdr.m_cmap_len == 0)) | 
|---|
| 1711 | return nullptr; | 
|---|
| 1712 | break; | 
|---|
| 1713 | case cITGrayscale: | 
|---|
| 1714 | if ((hdr.m_cmap != 0) || (hdr.m_cmap_len != 0)) | 
|---|
| 1715 | return nullptr; | 
|---|
| 1716 | if ((hdr.m_depth != 8) && (hdr.m_depth != 16)) | 
|---|
| 1717 | return nullptr; | 
|---|
| 1718 | break; | 
|---|
| 1719 | default: | 
|---|
| 1720 | return nullptr; | 
|---|
| 1721 | } | 
|---|
| 1722 |  | 
|---|
| 1723 | uint32_t tga_bytes_per_pixel = 0; | 
|---|
| 1724 |  | 
|---|
| 1725 | switch (hdr.m_depth) | 
|---|
| 1726 | { | 
|---|
| 1727 | case 32: | 
|---|
| 1728 | tga_bytes_per_pixel = 4; | 
|---|
| 1729 | n_chans = 4; | 
|---|
| 1730 | break; | 
|---|
| 1731 | case 24: | 
|---|
| 1732 | tga_bytes_per_pixel = 3; | 
|---|
| 1733 | n_chans = 3; | 
|---|
| 1734 | break; | 
|---|
| 1735 | case 16: | 
|---|
| 1736 | case 15: | 
|---|
| 1737 | tga_bytes_per_pixel = 2; | 
|---|
| 1738 | // For compatibility with stb_image_write.h | 
|---|
| 1739 | n_chans = ((file_image_type == cITGrayscale) && (hdr.m_depth == 16)) ? 4 : 3; | 
|---|
| 1740 | break; | 
|---|
| 1741 | case 8: | 
|---|
| 1742 | tga_bytes_per_pixel = 1; | 
|---|
| 1743 | // For palettized RGBA support, which both FreeImage and stb_image support. | 
|---|
| 1744 | n_chans = ((file_image_type == cITPalettized) && (hdr.m_cmap_bpp == 32)) ? 4 : 3; | 
|---|
| 1745 | break; | 
|---|
| 1746 | default: | 
|---|
| 1747 | return nullptr; | 
|---|
| 1748 | } | 
|---|
| 1749 |  | 
|---|
| 1750 | //const uint32_t bytes_per_line = hdr.m_width * tga_bytes_per_pixel; | 
|---|
| 1751 |  | 
|---|
| 1752 | const uint8_t *pSrc = pBuf + sizeof(tga_header); | 
|---|
| 1753 | uint32_t bytes_remaining = buf_size - sizeof(tga_header); | 
|---|
| 1754 |  | 
|---|
| 1755 | if (hdr.m_id_len) | 
|---|
| 1756 | { | 
|---|
| 1757 | if (bytes_remaining < hdr.m_id_len) | 
|---|
| 1758 | return nullptr; | 
|---|
| 1759 | pSrc += hdr.m_id_len; | 
|---|
| 1760 | bytes_remaining += hdr.m_id_len; | 
|---|
| 1761 | } | 
|---|
| 1762 |  | 
|---|
| 1763 | color_rgba pal[256]; | 
|---|
| 1764 | for (uint32_t i = 0; i < 256; i++) | 
|---|
| 1765 | pal[i].set(0, 0, 0, 255); | 
|---|
| 1766 |  | 
|---|
| 1767 | if ((hdr.m_cmap) && (hdr.m_cmap_len)) | 
|---|
| 1768 | { | 
|---|
| 1769 | if (image_type == cITPalettized) | 
|---|
| 1770 | { | 
|---|
| 1771 | // Note I cannot find any files using 32bpp palettes in the wild (never seen any in ~30 years). | 
|---|
| 1772 | if ( ((hdr.m_cmap_bpp != 32) && (hdr.m_cmap_bpp != 24) && (hdr.m_cmap_bpp != 15) && (hdr.m_cmap_bpp != 16)) || (hdr.m_cmap_len > 256) ) | 
|---|
| 1773 | return nullptr; | 
|---|
| 1774 |  | 
|---|
| 1775 | if (hdr.m_cmap_bpp == 32) | 
|---|
| 1776 | { | 
|---|
| 1777 | const uint32_t pal_size = hdr.m_cmap_len * 4; | 
|---|
| 1778 | if (bytes_remaining < pal_size) | 
|---|
| 1779 | return nullptr; | 
|---|
| 1780 |  | 
|---|
| 1781 | for (uint32_t i = 0; i < hdr.m_cmap_len; i++) | 
|---|
| 1782 | { | 
|---|
| 1783 | pal[i].r = pSrc[i * 4 + 2]; | 
|---|
| 1784 | pal[i].g = pSrc[i * 4 + 1]; | 
|---|
| 1785 | pal[i].b = pSrc[i * 4 + 0]; | 
|---|
| 1786 | pal[i].a = pSrc[i * 4 + 3]; | 
|---|
| 1787 | } | 
|---|
| 1788 |  | 
|---|
| 1789 | bytes_remaining -= pal_size; | 
|---|
| 1790 | pSrc += pal_size; | 
|---|
| 1791 | } | 
|---|
| 1792 | else if (hdr.m_cmap_bpp == 24) | 
|---|
| 1793 | { | 
|---|
| 1794 | const uint32_t pal_size = hdr.m_cmap_len * 3; | 
|---|
| 1795 | if (bytes_remaining < pal_size) | 
|---|
| 1796 | return nullptr; | 
|---|
| 1797 |  | 
|---|
| 1798 | for (uint32_t i = 0; i < hdr.m_cmap_len; i++) | 
|---|
| 1799 | { | 
|---|
| 1800 | pal[i].r = pSrc[i * 3 + 2]; | 
|---|
| 1801 | pal[i].g = pSrc[i * 3 + 1]; | 
|---|
| 1802 | pal[i].b = pSrc[i * 3 + 0]; | 
|---|
| 1803 | pal[i].a = 255; | 
|---|
| 1804 | } | 
|---|
| 1805 |  | 
|---|
| 1806 | bytes_remaining -= pal_size; | 
|---|
| 1807 | pSrc += pal_size; | 
|---|
| 1808 | } | 
|---|
| 1809 | else | 
|---|
| 1810 | { | 
|---|
| 1811 | const uint32_t pal_size = hdr.m_cmap_len * 2; | 
|---|
| 1812 | if (bytes_remaining < pal_size) | 
|---|
| 1813 | return nullptr; | 
|---|
| 1814 |  | 
|---|
| 1815 | for (uint32_t i = 0; i < hdr.m_cmap_len; i++) | 
|---|
| 1816 | { | 
|---|
| 1817 | const uint32_t v = pSrc[i * 2 + 0] | (pSrc[i * 2 + 1] << 8); | 
|---|
| 1818 |  | 
|---|
| 1819 | pal[i].r = (((v >> 10) & 31) * 255 + 15) / 31; | 
|---|
| 1820 | pal[i].g = (((v >> 5) & 31) * 255 + 15) / 31; | 
|---|
| 1821 | pal[i].b = ((v & 31) * 255 + 15) / 31; | 
|---|
| 1822 | pal[i].a = 255; | 
|---|
| 1823 | } | 
|---|
| 1824 |  | 
|---|
| 1825 | bytes_remaining -= pal_size; | 
|---|
| 1826 | pSrc += pal_size; | 
|---|
| 1827 | } | 
|---|
| 1828 | } | 
|---|
| 1829 | else | 
|---|
| 1830 | { | 
|---|
| 1831 | const uint32_t bytes_to_skip = (hdr.m_cmap_bpp >> 3) * hdr.m_cmap_len; | 
|---|
| 1832 | if (bytes_remaining < bytes_to_skip) | 
|---|
| 1833 | return nullptr; | 
|---|
| 1834 | pSrc += bytes_to_skip; | 
|---|
| 1835 | bytes_remaining += bytes_to_skip; | 
|---|
| 1836 | } | 
|---|
| 1837 | } | 
|---|
| 1838 |  | 
|---|
| 1839 | width = hdr.m_width; | 
|---|
| 1840 | height = hdr.m_height; | 
|---|
| 1841 |  | 
|---|
| 1842 | const uint32_t source_pitch = width * tga_bytes_per_pixel; | 
|---|
| 1843 | const uint32_t dest_pitch = width * n_chans; | 
|---|
| 1844 |  | 
|---|
| 1845 | uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); | 
|---|
| 1846 | if (!pImage) | 
|---|
| 1847 | return nullptr; | 
|---|
| 1848 |  | 
|---|
| 1849 | std::vector<uint8_t> input_line_buf; | 
|---|
| 1850 | if (rle_flag) | 
|---|
| 1851 | input_line_buf.resize(source_pitch); | 
|---|
| 1852 |  | 
|---|
| 1853 | int run_type = 0, run_remaining = 0; | 
|---|
| 1854 | uint8_t run_pixel[4]; | 
|---|
| 1855 | memset(run_pixel, 0, sizeof(run_pixel)); | 
|---|
| 1856 |  | 
|---|
| 1857 | for (int y = 0; y < height; y++) | 
|---|
| 1858 | { | 
|---|
| 1859 | const uint8_t *pLine_data; | 
|---|
| 1860 |  | 
|---|
| 1861 | if (rle_flag) | 
|---|
| 1862 | { | 
|---|
| 1863 | int pixels_remaining = width; | 
|---|
| 1864 | uint8_t *pDst = &input_line_buf[0]; | 
|---|
| 1865 |  | 
|---|
| 1866 | do | 
|---|
| 1867 | { | 
|---|
| 1868 | if (!run_remaining) | 
|---|
| 1869 | { | 
|---|
| 1870 | if (bytes_remaining < 1) | 
|---|
| 1871 | { | 
|---|
| 1872 | free(pImage); | 
|---|
| 1873 | return nullptr; | 
|---|
| 1874 | } | 
|---|
| 1875 |  | 
|---|
| 1876 | int v = *pSrc++; | 
|---|
| 1877 | bytes_remaining--; | 
|---|
| 1878 |  | 
|---|
| 1879 | run_type = v & 0x80; | 
|---|
| 1880 | run_remaining = (v & 0x7F) + 1; | 
|---|
| 1881 |  | 
|---|
| 1882 | if (run_type) | 
|---|
| 1883 | { | 
|---|
| 1884 | if (bytes_remaining < tga_bytes_per_pixel) | 
|---|
| 1885 | { | 
|---|
| 1886 | free(pImage); | 
|---|
| 1887 | return nullptr; | 
|---|
| 1888 | } | 
|---|
| 1889 |  | 
|---|
| 1890 | memcpy(run_pixel, pSrc, tga_bytes_per_pixel); | 
|---|
| 1891 | pSrc += tga_bytes_per_pixel; | 
|---|
| 1892 | bytes_remaining -= tga_bytes_per_pixel; | 
|---|
| 1893 | } | 
|---|
| 1894 | } | 
|---|
| 1895 |  | 
|---|
| 1896 | const uint32_t n = basisu::minimum<uint32_t>(pixels_remaining, run_remaining); | 
|---|
| 1897 | pixels_remaining -= n; | 
|---|
| 1898 | run_remaining -= n; | 
|---|
| 1899 |  | 
|---|
| 1900 | if (run_type) | 
|---|
| 1901 | { | 
|---|
| 1902 | for (uint32_t i = 0; i < n; i++) | 
|---|
| 1903 | for (uint32_t j = 0; j < tga_bytes_per_pixel; j++) | 
|---|
| 1904 | *pDst++ = run_pixel[j]; | 
|---|
| 1905 | } | 
|---|
| 1906 | else | 
|---|
| 1907 | { | 
|---|
| 1908 | const uint32_t bytes_wanted = n * tga_bytes_per_pixel; | 
|---|
| 1909 |  | 
|---|
| 1910 | if (bytes_remaining < bytes_wanted) | 
|---|
| 1911 | { | 
|---|
| 1912 | free(pImage); | 
|---|
| 1913 | return nullptr; | 
|---|
| 1914 | } | 
|---|
| 1915 |  | 
|---|
| 1916 | memcpy(pDst, pSrc, bytes_wanted); | 
|---|
| 1917 | pDst += bytes_wanted; | 
|---|
| 1918 |  | 
|---|
| 1919 | pSrc += bytes_wanted; | 
|---|
| 1920 | bytes_remaining -= bytes_wanted; | 
|---|
| 1921 | } | 
|---|
| 1922 |  | 
|---|
| 1923 | } while (pixels_remaining); | 
|---|
| 1924 |  | 
|---|
| 1925 | assert((pDst - &input_line_buf[0]) == width * tga_bytes_per_pixel); | 
|---|
| 1926 |  | 
|---|
| 1927 | pLine_data = &input_line_buf[0]; | 
|---|
| 1928 | } | 
|---|
| 1929 | else | 
|---|
| 1930 | { | 
|---|
| 1931 | if (bytes_remaining < source_pitch) | 
|---|
| 1932 | { | 
|---|
| 1933 | free(pImage); | 
|---|
| 1934 | return nullptr; | 
|---|
| 1935 | } | 
|---|
| 1936 |  | 
|---|
| 1937 | pLine_data = pSrc; | 
|---|
| 1938 | bytes_remaining -= source_pitch; | 
|---|
| 1939 | pSrc += source_pitch; | 
|---|
| 1940 | } | 
|---|
| 1941 |  | 
|---|
| 1942 | // Convert to 24bpp RGB or 32bpp RGBA. | 
|---|
| 1943 | uint8_t *pDst = pImage + (y_flipped ? (height - 1 - y) : y) * dest_pitch + (x_flipped ? (width - 1) * n_chans : 0); | 
|---|
| 1944 | const int dst_stride = x_flipped ? -((int)n_chans) : n_chans; | 
|---|
| 1945 |  | 
|---|
| 1946 | switch (hdr.m_depth) | 
|---|
| 1947 | { | 
|---|
| 1948 | case 32: | 
|---|
| 1949 | assert(tga_bytes_per_pixel == 4 && n_chans == 4); | 
|---|
| 1950 | for (int i = 0; i < width; i++, pLine_data += 4, pDst += dst_stride) | 
|---|
| 1951 | { | 
|---|
| 1952 | pDst[0] = pLine_data[2]; | 
|---|
| 1953 | pDst[1] = pLine_data[1]; | 
|---|
| 1954 | pDst[2] = pLine_data[0]; | 
|---|
| 1955 | pDst[3] = pLine_data[3]; | 
|---|
| 1956 | } | 
|---|
| 1957 | break; | 
|---|
| 1958 | case 24: | 
|---|
| 1959 | assert(tga_bytes_per_pixel == 3 && n_chans == 3); | 
|---|
| 1960 | for (int i = 0; i < width; i++, pLine_data += 3, pDst += dst_stride) | 
|---|
| 1961 | { | 
|---|
| 1962 | pDst[0] = pLine_data[2]; | 
|---|
| 1963 | pDst[1] = pLine_data[1]; | 
|---|
| 1964 | pDst[2] = pLine_data[0]; | 
|---|
| 1965 | } | 
|---|
| 1966 | break; | 
|---|
| 1967 | case 16: | 
|---|
| 1968 | case 15: | 
|---|
| 1969 | if (image_type == cITRGB) | 
|---|
| 1970 | { | 
|---|
| 1971 | assert(tga_bytes_per_pixel == 2 && n_chans == 3); | 
|---|
| 1972 | for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) | 
|---|
| 1973 | { | 
|---|
| 1974 | const uint32_t v = pLine_data[0] | (pLine_data[1] << 8); | 
|---|
| 1975 | pDst[0] = (((v >> 10) & 31) * 255 + 15) / 31; | 
|---|
| 1976 | pDst[1] = (((v >> 5) & 31) * 255 + 15) / 31; | 
|---|
| 1977 | pDst[2] = ((v & 31) * 255 + 15) / 31; | 
|---|
| 1978 | } | 
|---|
| 1979 | } | 
|---|
| 1980 | else | 
|---|
| 1981 | { | 
|---|
| 1982 | assert(image_type == cITGrayscale && tga_bytes_per_pixel == 2 && n_chans == 4); | 
|---|
| 1983 | for (int i = 0; i < width; i++, pLine_data += 2, pDst += dst_stride) | 
|---|
| 1984 | { | 
|---|
| 1985 | pDst[0] = pLine_data[0]; | 
|---|
| 1986 | pDst[1] = pLine_data[0]; | 
|---|
| 1987 | pDst[2] = pLine_data[0]; | 
|---|
| 1988 | pDst[3] = pLine_data[1]; | 
|---|
| 1989 | } | 
|---|
| 1990 | } | 
|---|
| 1991 | break; | 
|---|
| 1992 | case 8: | 
|---|
| 1993 | assert(tga_bytes_per_pixel == 1); | 
|---|
| 1994 | if (image_type == cITPalettized) | 
|---|
| 1995 | { | 
|---|
| 1996 | if (hdr.m_cmap_bpp == 32) | 
|---|
| 1997 | { | 
|---|
| 1998 | assert(n_chans == 4); | 
|---|
| 1999 | for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) | 
|---|
| 2000 | { | 
|---|
| 2001 | const uint32_t c = *pLine_data; | 
|---|
| 2002 | pDst[0] = pal[c].r; | 
|---|
| 2003 | pDst[1] = pal[c].g; | 
|---|
| 2004 | pDst[2] = pal[c].b; | 
|---|
| 2005 | pDst[3] = pal[c].a; | 
|---|
| 2006 | } | 
|---|
| 2007 | } | 
|---|
| 2008 | else | 
|---|
| 2009 | { | 
|---|
| 2010 | assert(n_chans == 3); | 
|---|
| 2011 | for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) | 
|---|
| 2012 | { | 
|---|
| 2013 | const uint32_t c = *pLine_data; | 
|---|
| 2014 | pDst[0] = pal[c].r; | 
|---|
| 2015 | pDst[1] = pal[c].g; | 
|---|
| 2016 | pDst[2] = pal[c].b; | 
|---|
| 2017 | } | 
|---|
| 2018 | } | 
|---|
| 2019 | } | 
|---|
| 2020 | else | 
|---|
| 2021 | { | 
|---|
| 2022 | assert(n_chans == 3); | 
|---|
| 2023 | for (int i = 0; i < width; i++, pLine_data++, pDst += dst_stride) | 
|---|
| 2024 | { | 
|---|
| 2025 | const uint8_t c = *pLine_data; | 
|---|
| 2026 | pDst[0] = c; | 
|---|
| 2027 | pDst[1] = c; | 
|---|
| 2028 | pDst[2] = c; | 
|---|
| 2029 | } | 
|---|
| 2030 | } | 
|---|
| 2031 | break; | 
|---|
| 2032 | default: | 
|---|
| 2033 | assert(0); | 
|---|
| 2034 | break; | 
|---|
| 2035 | } | 
|---|
| 2036 | } // y | 
|---|
| 2037 |  | 
|---|
| 2038 | return pImage; | 
|---|
| 2039 | } | 
|---|
| 2040 |  | 
|---|
| 2041 | uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans) | 
|---|
| 2042 | { | 
|---|
| 2043 | width = height = n_chans = 0; | 
|---|
| 2044 |  | 
|---|
| 2045 | uint8_vec filedata; | 
|---|
| 2046 | if (!read_file_to_vec(pFilename, filedata)) | 
|---|
| 2047 | return nullptr; | 
|---|
| 2048 |  | 
|---|
| 2049 | if (!filedata.size() || (filedata.size() > UINT32_MAX)) | 
|---|
| 2050 | return nullptr; | 
|---|
| 2051 |  | 
|---|
| 2052 | return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); | 
|---|
| 2053 | } | 
|---|
| 2054 |  | 
|---|
| 2055 | void image::debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t scale_x, uint32_t scale_y, const color_rgba& fg, const color_rgba* pBG, bool alpha_only, const char* pFmt, ...) | 
|---|
| 2056 | { | 
|---|
| 2057 | char buf[2048]; | 
|---|
| 2058 |  | 
|---|
| 2059 | va_list args; | 
|---|
| 2060 | va_start(args, pFmt); | 
|---|
| 2061 | #ifdef _WIN32 | 
|---|
| 2062 | vsprintf_s(buf, sizeof(buf), pFmt, args); | 
|---|
| 2063 | #else | 
|---|
| 2064 | vsnprintf(buf, sizeof(buf), pFmt, args); | 
|---|
| 2065 | #endif | 
|---|
| 2066 | va_end(args); | 
|---|
| 2067 |  | 
|---|
| 2068 | const char* p = buf; | 
|---|
| 2069 |  | 
|---|
| 2070 | const uint32_t orig_x_ofs = x_ofs; | 
|---|
| 2071 |  | 
|---|
| 2072 | while (*p) | 
|---|
| 2073 | { | 
|---|
| 2074 | uint8_t c = *p++; | 
|---|
| 2075 | if ((c < 32) || (c > 127)) | 
|---|
| 2076 | c = '.'; | 
|---|
| 2077 |  | 
|---|
| 2078 | const uint8_t* pGlpyh = &g_debug_font8x8_basic[c - 32][0]; | 
|---|
| 2079 |  | 
|---|
| 2080 | for (uint32_t y = 0; y < 8; y++) | 
|---|
| 2081 | { | 
|---|
| 2082 | uint32_t row_bits = pGlpyh[y]; | 
|---|
| 2083 | for (uint32_t x = 0; x < 8; x++) | 
|---|
| 2084 | { | 
|---|
| 2085 | const uint32_t q = row_bits & (1 << x); | 
|---|
| 2086 |  | 
|---|
| 2087 | const color_rgba* pColor = q ? &fg : pBG; | 
|---|
| 2088 | if (!pColor) | 
|---|
| 2089 | continue; | 
|---|
| 2090 |  | 
|---|
| 2091 | if (alpha_only) | 
|---|
| 2092 | fill_box_alpha(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); | 
|---|
| 2093 | else | 
|---|
| 2094 | fill_box(x_ofs + x * scale_x, y_ofs + y * scale_y, scale_x, scale_y, *pColor); | 
|---|
| 2095 | } | 
|---|
| 2096 | } | 
|---|
| 2097 |  | 
|---|
| 2098 | x_ofs += 8 * scale_x; | 
|---|
| 2099 | if ((x_ofs + 8 * scale_x) > m_width) | 
|---|
| 2100 | { | 
|---|
| 2101 | x_ofs = orig_x_ofs; | 
|---|
| 2102 | y_ofs += 8 * scale_y; | 
|---|
| 2103 | } | 
|---|
| 2104 | } | 
|---|
| 2105 | } | 
|---|
| 2106 |  | 
|---|
| 2107 | } // namespace basisu | 
|---|
| 2108 |  | 
|---|