| 1 | #include "mtmd-audio.h" |
| 2 | |
| 3 | #define _USE_MATH_DEFINES // for M_PI |
| 4 | #include <cmath> |
| 5 | #include <cstdint> |
| 6 | #include <cstring> |
| 7 | #include <thread> |
| 8 | #include <vector> |
| 9 | #include <fstream> |
| 10 | #include <algorithm> |
| 11 | |
| 12 | // most of the code here is copied from whisper.cpp |
| 13 | |
| 14 | // align x to upper multiple of n |
| 15 | #define _ALIGN(x, n) ((((x) + (n) - 1) / (n)) * (n)) |
| 16 | |
| 17 | namespace whisper_preprocessor { |
| 18 | |
| 19 | #define SIN_COS_N_COUNT WHISPER_N_FFT |
| 20 | namespace { |
| 21 | struct whisper_global_cache { |
| 22 | // In FFT, we frequently use sine and cosine operations with the same values. |
| 23 | // We can use precalculated values to speed up the process. |
| 24 | float sin_vals[SIN_COS_N_COUNT]; |
| 25 | float cos_vals[SIN_COS_N_COUNT]; |
| 26 | |
| 27 | // Hann window (Use cosf to eliminate difference) |
| 28 | // ref: https://pytorch.org/docs/stable/generated/torch.hann_window.html |
| 29 | // ref: https://github.com/openai/whisper/blob/main/whisper/audio.py#L147 |
| 30 | float hann_window[WHISPER_N_FFT]; |
| 31 | |
| 32 | whisper_global_cache() { |
| 33 | fill_sin_cos_table(); |
| 34 | fill_hann_window(length: sizeof(hann_window)/sizeof(hann_window[0]), periodic: true, output: hann_window); |
| 35 | } |
| 36 | |
| 37 | void fill_sin_cos_table() { |
| 38 | for (int i = 0; i < SIN_COS_N_COUNT; i++) { |
| 39 | double theta = (2 * M_PI * i) / SIN_COS_N_COUNT; |
| 40 | sin_vals[i] = sinf(x: theta); |
| 41 | cos_vals[i] = cosf(x: theta); |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | void fill_hann_window(int length, bool periodic, float * output) { |
| 46 | int offset = -1; |
| 47 | if (periodic) { |
| 48 | offset = 0; |
| 49 | } |
| 50 | for (int i = 0; i < length; i++) { |
| 51 | output[i] = 0.5 * (1.0 - cosf(x: (2.0 * M_PI * i) / (length + offset))); |
| 52 | } |
| 53 | } |
| 54 | } global_cache; |
| 55 | } |
| 56 | |
| 57 | // naive Discrete Fourier Transform |
| 58 | // input is real-valued |
| 59 | // output is complex-valued |
| 60 | static void dft(const float* in, int N, float* out) { |
| 61 | const int sin_cos_step = SIN_COS_N_COUNT / N; |
| 62 | |
| 63 | for (int k = 0; k < N; k++) { |
| 64 | float re = 0; |
| 65 | float im = 0; |
| 66 | |
| 67 | for (int n = 0; n < N; n++) { |
| 68 | int idx = (k * n * sin_cos_step) % (SIN_COS_N_COUNT); // t = 2*M_PI*k*n/N |
| 69 | re += in[n]*global_cache.cos_vals[idx]; // cos(t) |
| 70 | im -= in[n]*global_cache.sin_vals[idx]; // sin(t) |
| 71 | } |
| 72 | |
| 73 | out[k*2 + 0] = re; |
| 74 | out[k*2 + 1] = im; |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | // Cooley-Tukey FFT |
| 79 | // poor man's implementation - use something better |
| 80 | // input is real-valued |
| 81 | // output is complex-valued |
| 82 | static void fft(float* in, int N, float* out) { |
| 83 | if (N == 1) { |
| 84 | out[0] = in[0]; |
| 85 | out[1] = 0; |
| 86 | return; |
| 87 | } |
| 88 | |
| 89 | const int half_N = N / 2; |
| 90 | if (N - half_N*2 == 1) { |
| 91 | dft(in, N, out); |
| 92 | return; |
| 93 | } |
| 94 | |
| 95 | float* even = in + N; |
| 96 | for (int i = 0; i < half_N; ++i) { |
| 97 | even[i]= in[2*i]; |
| 98 | } |
| 99 | float* even_fft = out + 2 * N; |
| 100 | fft(in: even, N: half_N, out: even_fft); |
| 101 | |
| 102 | float* odd = even; |
| 103 | for (int i = 0; i < half_N; ++i) { |
| 104 | odd[i] = in[2*i + 1]; |
| 105 | } |
| 106 | float* odd_fft = even_fft + N; |
| 107 | fft(in: odd, N: half_N, out: odd_fft); |
| 108 | |
| 109 | const int sin_cos_step = SIN_COS_N_COUNT / N; |
| 110 | for (int k = 0; k < half_N; k++) { |
| 111 | int idx = k * sin_cos_step; // t = 2*M_PI*k/N |
| 112 | float re = global_cache.cos_vals[idx]; // cos(t) |
| 113 | float im = -global_cache.sin_vals[idx]; // sin(t) |
| 114 | |
| 115 | float re_odd = odd_fft[2*k + 0]; |
| 116 | float im_odd = odd_fft[2*k + 1]; |
| 117 | |
| 118 | out[2*k + 0] = even_fft[2*k + 0] + re*re_odd - im*im_odd; |
| 119 | out[2*k + 1] = even_fft[2*k + 1] + re*im_odd + im*re_odd; |
| 120 | |
| 121 | out[2*(k + half_N) + 0] = even_fft[2*k + 0] - re*re_odd + im*im_odd; |
| 122 | out[2*(k + half_N) + 1] = even_fft[2*k + 1] - re*im_odd - im*re_odd; |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | static void log_mel_spectrogram_worker_thread(int ith, const float * hann, const std::vector<float> & samples, |
| 127 | int n_samples, int frame_size, int frame_step, int n_threads, |
| 128 | const whisper_filters & filters, whisper_mel & mel) { |
| 129 | std::vector<float> fft_in(frame_size * 2, 0.0); |
| 130 | std::vector<float> fft_out(frame_size * 2 * 2 * 2); |
| 131 | |
| 132 | int n_fft = filters.n_fft; |
| 133 | int i = ith; |
| 134 | |
| 135 | // make sure n_fft == 1 + (WHISPER_N_FFT / 2), bin_0 to bin_nyquist |
| 136 | WHISPER_ASSERT(n_fft == 1 + (frame_size / 2)); |
| 137 | |
| 138 | // calculate FFT only when fft_in are not all zero |
| 139 | for (; i < std::min(a: n_samples / frame_step + 1, b: mel.n_len); i += n_threads) { |
| 140 | const int offset = i * frame_step; |
| 141 | |
| 142 | // apply Hann window (~10% faster) |
| 143 | for (int j = 0; j < std::min(a: frame_size, b: n_samples - offset); j++) { |
| 144 | fft_in[j] = hann[j] * samples[offset + j]; |
| 145 | } |
| 146 | |
| 147 | // fill the rest with zeros |
| 148 | if (n_samples - offset < frame_size) { |
| 149 | std::fill(first: fft_in.begin() + (n_samples - offset), last: fft_in.end(), value: 0.0); |
| 150 | } |
| 151 | |
| 152 | // FFT |
| 153 | fft(in: fft_in.data(), N: frame_size, out: fft_out.data()); |
| 154 | |
| 155 | // Calculate modulus^2 of complex numbers |
| 156 | // Use pow(fft_out[2 * j + 0], 2) + pow(fft_out[2 * j + 1], 2) causes inference quality problem? Interesting. |
| 157 | for (int j = 0; j < n_fft; j++) { |
| 158 | fft_out[j] = (fft_out[2 * j + 0] * fft_out[2 * j + 0] + fft_out[2 * j + 1] * fft_out[2 * j + 1]); |
| 159 | } |
| 160 | |
| 161 | // mel spectrogram |
| 162 | for (int j = 0; j < mel.n_mel; j++) { |
| 163 | double sum = 0.0; |
| 164 | // unroll loop (suggested by GH user @lunixbochs) |
| 165 | int k = 0; |
| 166 | for (k = 0; k < n_fft - 3; k += 4) { |
| 167 | sum += |
| 168 | fft_out[k + 0] * filters.data[j * n_fft + k + 0] + |
| 169 | fft_out[k + 1] * filters.data[j * n_fft + k + 1] + |
| 170 | fft_out[k + 2] * filters.data[j * n_fft + k + 2] + |
| 171 | fft_out[k + 3] * filters.data[j * n_fft + k + 3]; |
| 172 | } |
| 173 | // handle n_fft remainder |
| 174 | for (; k < n_fft; k++) { |
| 175 | sum += fft_out[k] * filters.data[j * n_fft + k]; |
| 176 | } |
| 177 | sum = log10(x: std::max(a: sum, b: 1e-10)); |
| 178 | mel.data[j * mel.n_len + i] = sum; |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | // Otherwise fft_out are all zero |
| 183 | double sum = log10(x: 1e-10); |
| 184 | for (; i < mel.n_len; i += n_threads) { |
| 185 | for (int j = 0; j < mel.n_mel; j++) { |
| 186 | mel.data[j * mel.n_len + i] = sum; |
| 187 | } |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | // ref: https://github.com/openai/whisper/blob/main/whisper/audio.py#L110-L157 |
| 192 | static bool log_mel_spectrogram( |
| 193 | const float * samples, |
| 194 | const int n_samples, |
| 195 | const int /*sample_rate*/, |
| 196 | const int frame_size, |
| 197 | const int frame_step, |
| 198 | const int n_mel, |
| 199 | const int n_threads, |
| 200 | const whisper_filters & filters, |
| 201 | const bool debug, |
| 202 | whisper_mel & mel) { |
| 203 | //const int64_t t_start_us = ggml_time_us(); |
| 204 | |
| 205 | // Hann window |
| 206 | WHISPER_ASSERT(frame_size == WHISPER_N_FFT && "Unsupported frame_size" ); |
| 207 | const float * hann = global_cache.hann_window; |
| 208 | |
| 209 | // Calculate the length of padding |
| 210 | int64_t stage_1_pad = WHISPER_SAMPLE_RATE * 30; |
| 211 | int64_t stage_2_pad = frame_size / 2; |
| 212 | |
| 213 | // Initialize a vector and copy data from C array to it. |
| 214 | std::vector<float> samples_padded; |
| 215 | samples_padded.resize(new_size: n_samples + stage_1_pad + stage_2_pad * 2); |
| 216 | std::copy(first: samples, last: samples + n_samples, result: samples_padded.begin() + stage_2_pad); |
| 217 | |
| 218 | // pad 30 seconds of zeros at the end of audio (480,000 samples) + reflective pad 200 samples at the end of audio |
| 219 | std::fill(first: samples_padded.begin() + n_samples + stage_2_pad, last: samples_padded.begin() + n_samples + stage_1_pad + 2 * stage_2_pad, value: 0); |
| 220 | |
| 221 | // reflective pad 200 samples at the beginning of audio |
| 222 | std::reverse_copy(first: samples + 1, last: samples + 1 + stage_2_pad, result: samples_padded.begin()); |
| 223 | |
| 224 | mel.n_mel = n_mel; |
| 225 | // https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/SpectralOps.cpp#L936 |
| 226 | // Calculate number of frames + remove the last frame |
| 227 | mel.n_len = (samples_padded.size() - frame_size) / frame_step; |
| 228 | // Calculate semi-padded sample length to ensure compatibility |
| 229 | mel.n_len_org = 1 + (n_samples + stage_2_pad - frame_size) / frame_step; |
| 230 | mel.data.resize(new_size: mel.n_mel * mel.n_len); |
| 231 | |
| 232 | { |
| 233 | std::vector<std::thread> workers(n_threads - 1); |
| 234 | for (int iw = 0; iw < n_threads - 1; ++iw) { |
| 235 | workers[iw] = std::thread( |
| 236 | log_mel_spectrogram_worker_thread, iw + 1, hann, std::cref(t: samples_padded), |
| 237 | n_samples + stage_2_pad, frame_size, frame_step, n_threads, |
| 238 | std::cref(t: filters), std::ref(t&: mel)); |
| 239 | } |
| 240 | |
| 241 | // main thread |
| 242 | log_mel_spectrogram_worker_thread(ith: 0, hann, samples: samples_padded, n_samples: n_samples + stage_2_pad, frame_size, frame_step, n_threads, filters, mel); |
| 243 | |
| 244 | for (int iw = 0; iw < n_threads - 1; ++iw) { |
| 245 | workers[iw].join(); |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | // clamping and normalization |
| 250 | double mmax = -1e20; |
| 251 | for (int i = 0; i < mel.n_mel*mel.n_len; i++) { |
| 252 | if (mel.data[i] > mmax) { |
| 253 | mmax = mel.data[i]; |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | mmax -= 8.0; |
| 258 | |
| 259 | for (int i = 0; i < mel.n_mel*mel.n_len; i++) { |
| 260 | if (mel.data[i] < mmax) { |
| 261 | mel.data[i] = mmax; |
| 262 | } |
| 263 | |
| 264 | mel.data[i] = (mel.data[i] + 4.0)/4.0; |
| 265 | } |
| 266 | |
| 267 | // Dump log_mel_spectrogram |
| 268 | if (debug) { |
| 269 | std::ofstream outFile("log_mel_spectrogram.json" ); |
| 270 | outFile << "[" ; |
| 271 | for (uint64_t i = 0; i < mel.data.size() - 1; i++) { |
| 272 | outFile << mel.data[i] << ", " ; |
| 273 | } |
| 274 | outFile << mel.data[mel.data.size() - 1] << "]" ; |
| 275 | outFile.close(); |
| 276 | } |
| 277 | |
| 278 | return true; |
| 279 | } |
| 280 | |
| 281 | bool preprocess_audio( |
| 282 | const float * samples, |
| 283 | size_t n_samples, |
| 284 | const whisper_filters & filters, |
| 285 | std::vector<whisper_mel> & output) { |
| 286 | |
| 287 | if (n_samples == 0) { |
| 288 | // empty audio |
| 289 | return false; |
| 290 | } |
| 291 | |
| 292 | whisper_mel out_full; |
| 293 | bool ok = log_mel_spectrogram( |
| 294 | samples, |
| 295 | n_samples, |
| 296 | COMMON_SAMPLE_RATE, |
| 297 | WHISPER_N_FFT, |
| 298 | WHISPER_HOP_LENGTH, |
| 299 | n_mel: filters.n_mel, |
| 300 | n_threads: 4, // n_threads |
| 301 | filters, |
| 302 | debug: false, // debug |
| 303 | mel&: out_full); |
| 304 | if (!ok) { |
| 305 | return false; |
| 306 | } |
| 307 | |
| 308 | // because the cgraph in clip.cpp only accepts 3000 frames each, we need to split the mel |
| 309 | // we always expect the mel to have 3000 silent frames at the end |
| 310 | // printf("n_len %d\n", out_full.n_len); |
| 311 | const size_t frames_per_chunk = 3000; |
| 312 | GGML_ASSERT((size_t)out_full.n_len > frames_per_chunk); |
| 313 | for (size_t off = 0; off < (size_t)out_full.n_len; off += frames_per_chunk) { |
| 314 | int n_len = std::min(a: frames_per_chunk, b: (size_t)out_full.n_len - off); |
| 315 | if ((size_t)n_len < frames_per_chunk) { |
| 316 | break; // last uncomplete chunk will always be a padded chunk, safe to ignore |
| 317 | } |
| 318 | |
| 319 | whisper_mel out_chunk; |
| 320 | out_chunk.n_len = n_len; |
| 321 | out_chunk.n_mel = out_full.n_mel; |
| 322 | out_chunk.n_len_org = out_full.n_mel; // unused |
| 323 | out_chunk.data.reserve(n: out_chunk.n_mel * out_chunk.n_len); |
| 324 | |
| 325 | for (int i = 0; i < out_full.n_mel; i++) { |
| 326 | auto src = out_full.data.begin() + i*out_full.n_len + off; |
| 327 | out_chunk.data.insert(position: out_chunk.data.end(), first: src, last: src + frames_per_chunk); |
| 328 | } |
| 329 | |
| 330 | output.push_back(x: std::move(out_chunk)); |
| 331 | } |
| 332 | |
| 333 | return true; |
| 334 | } |
| 335 | |
| 336 | } // namespace whisper_preprocessor |
| 337 | |
| 338 | |
| 339 | // precalculated mel filter banks |
| 340 | // values are multiplied by 1000.0 to save space, and will be divided by 1000.0 in the end of the function |
| 341 | // |
| 342 | // generated from python code: |
| 343 | // |
| 344 | // from numpy import load |
| 345 | // data = load('mel_filters.npz') |
| 346 | // lst = data.files |
| 347 | // for item in lst: |
| 348 | // print(item) |
| 349 | // print(data[item].shape) |
| 350 | // n_mel = data[item].shape[0] |
| 351 | // n_fft = data[item].shape[1] |
| 352 | // for i, row in enumerate(data[item]): |
| 353 | // for j, val in enumerate(row): |
| 354 | // val = val * 1000.0 |
| 355 | // if val != 0: |
| 356 | // print(f"data[{i*n_fft + j}] = {val:.6f};") |
| 357 | |
| 358 | namespace whisper_precalc_filters { |
| 359 | |
| 360 | whisper_preprocessor::whisper_filters get_128_bins() { |
| 361 | whisper_preprocessor::whisper_filters filters; |
| 362 | filters.n_mel = 128; |
| 363 | filters.n_fft = 201; |
| 364 | std::vector data(filters.n_mel * filters.n_fft, 0.0f); |
| 365 | |
| 366 | data[1] = 12.37398665; |
| 367 | data[202] = 30.39256483; |
| 368 | data[404] = 24.74797331; |
| 369 | data[605] = 18.01857911; |
| 370 | data[807] = 37.12195903; |
| 371 | data[1008] = 5.64459199; |
| 372 | data[1009] = 6.72939420; |
| 373 | data[1210] = 36.03715822; |
| 374 | data[1412] = 19.10337992; |
| 375 | data[1613] = 23.66316877; |
| 376 | data[1815] = 31.47736564; |
| 377 | data[2016] = 11.28918398; |
| 378 | data[2017] = 1.08480197; |
| 379 | data[2218] = 41.68175161; |
| 380 | data[2420] = 13.45878839; |
| 381 | data[2621] = 29.30776216; |
| 382 | data[2823] = 25.83277412; |
| 383 | data[3024] = 16.93377644; |
| 384 | data[3226] = 38.20675984; |
| 385 | data[3427] = 4.55979025; |
| 386 | data[3428] = 7.81419594; |
| 387 | data[3629] = 34.95235741; |
| 388 | data[3831] = 20.18818259; |
| 389 | data[4032] = 22.57836796; |
| 390 | data[4234] = 32.56217018; |
| 391 | data[4435] = 10.20438317; |
| 392 | data[4436] = 2.16960395; |
| 393 | data[4637] = 40.59694707; |
| 394 | data[4839] = 14.54358920; |
| 395 | data[5040] = 28.22295949; |
| 396 | data[5242] = 26.91757679; |
| 397 | data[5443] = 15.84897563; |
| 398 | data[5645] = 39.29156065; |
| 399 | data[5846] = 3.47498828; |
| 400 | data[5847] = 8.89899861; |
| 401 | data[6048] = 33.86755288; |
| 402 | data[6250] = 21.27298526; |
| 403 | data[6451] = 21.49356715; |
| 404 | data[6653] = 33.64697099; |
| 405 | data[6854] = 9.11958050; |
| 406 | data[6855] = 3.25440569; |
| 407 | data[7056] = 39.51214626; |
| 408 | data[7258] = 15.62839188; |
| 409 | data[7459] = 27.13815868; |
| 410 | data[7661] = 28.00237760; |
| 411 | data[7862] = 14.76417296; |
| 412 | data[8064] = 40.37636518; |
| 413 | data[8265] = 2.38068704; |
| 414 | data[8266] = 10.20263787; |
| 415 | data[8467] = 31.61146119; |
| 416 | data[8669] = 24.54700135; |
| 417 | data[8870] = 15.32919332; |
| 418 | data[8871] = 1.66583748; |
| 419 | data[9072] = 36.72905266; |
| 420 | data[9274] = 20.09709924; |
| 421 | data[9475] = 16.93102531; |
| 422 | data[9476] = 2.90265540; |
| 423 | data[9677] = 32.84499049; |
| 424 | data[9879] = 23.52004871; |
| 425 | data[10080] = 11.03894413; |
| 426 | data[10081] = 10.72582975; |
| 427 | data[10282] = 22.71829173; |
| 428 | data[10484] = 32.27872774; |
| 429 | data[10685] = 0.11626833; |
| 430 | data[10686] = 22.85348251; |
| 431 | data[10887] = 8.56344029; |
| 432 | data[10888] = 14.97978810; |
| 433 | data[11089] = 15.51398356; |
| 434 | data[11090] = 8.51490628; |
| 435 | data[11291] = 21.10680379; |
| 436 | data[11292] = 3.32652032; |
| 437 | data[11493] = 25.47064796; |
| 438 | data[11695] = 27.35907957; |
| 439 | data[11896] = 0.65853616; |
| 440 | data[11897] = 23.83812517; |
| 441 | data[12098] = 3.44359246; |
| 442 | data[12099] = 21.22455277; |
| 443 | data[12300] = 5.35842171; |
| 444 | data[12301] = 19.42555793; |
| 445 | data[12502] = 6.49324711; |
| 446 | data[12503] = 18.35542172; |
| 447 | data[12704] = 6.93138083; |
| 448 | data[12705] = 17.93504693; |
| 449 | data[12906] = 6.74968259; |
| 450 | data[12907] = 18.09151843; |
| 451 | data[13108] = 6.01899112; |
| 452 | data[13109] = 18.75767298; |
| 453 | data[13310] = 4.80452832; |
| 454 | data[13311] = 19.87172849; |
| 455 | data[13512] = 3.16627859; |
| 456 | data[13513] = 21.37690969; |
| 457 | data[13514] = 1.25317345; |
| 458 | data[13714] = 1.15934468; |
| 459 | data[13715] = 20.80361731; |
| 460 | data[13716] = 4.04486805; |
| 461 | data[13917] = 17.55363122; |
| 462 | data[13918] = 7.08320038; |
| 463 | data[14119] = 14.07538634; |
| 464 | data[14120] = 10.32655034; |
| 465 | data[14321] = 10.40921453; |
| 466 | data[14322] = 13.73696327; |
| 467 | data[14523] = 6.59187697; |
| 468 | data[14524] = 17.27988198; |
| 469 | data[14525] = 1.46804214; |
| 470 | data[14725] = 2.65681883; |
| 471 | data[14726] = 18.09193194; |
| 472 | data[14727] = 5.85655728; |
| 473 | data[14928] = 13.34277913; |
| 474 | data[14929] = 10.28267574; |
| 475 | data[15130] = 8.56800377; |
| 476 | data[15131] = 14.72230814; |
| 477 | data[15132] = 1.04039861; |
| 478 | data[15332] = 3.79085587; |
| 479 | data[15333] = 17.14678481; |
| 480 | data[15334] = 6.11609267; |
| 481 | data[15535] = 11.75929047; |
| 482 | data[15536] = 11.13393717; |
| 483 | data[15737] = 6.43857848; |
| 484 | data[15738] = 16.07806236; |
| 485 | data[15739] = 4.23917221; |
| 486 | data[15939] = 1.19989377; |
| 487 | data[15940] = 12.75671553; |
| 488 | data[15941] = 9.65298992; |
| 489 | data[16142] = 7.06935255; |
| 490 | data[16143] = 14.94054683; |
| 491 | data[16144] = 4.19024844; |
| 492 | data[16344] = 1.51483389; |
| 493 | data[16345] = 12.00899947; |
| 494 | data[16346] = 9.84823331; |
| 495 | data[16547] = 6.10224018; |
| 496 | data[16548] = 15.33857174; |
| 497 | data[16549] = 5.57676842; |
| 498 | data[16749] = 0.36827257; |
| 499 | data[16750] = 9.89749376; |
| 500 | data[16751] = 11.35340426; |
| 501 | data[16752] = 2.05122307; |
| 502 | data[16952] = 3.89297144; |
| 503 | data[16953] = 12.97352277; |
| 504 | data[16954] = 8.06631614; |
| 505 | data[17155] = 6.74493238; |
| 506 | data[17156] = 13.85874674; |
| 507 | data[17157] = 5.41190524; |
| 508 | data[17357] = 0.74220158; |
| 509 | data[17358] = 8.98779090; |
| 510 | data[17359] = 11.37871388; |
| 511 | data[17360] = 3.32958088; |
| 512 | data[17560] = 2.82313535; |
| 513 | data[17561] = 10.68049297; |
| 514 | data[17562] = 9.43340641; |
| 515 | data[17563] = 1.76325557; |
| 516 | data[17763] = 4.39018616; |
| 517 | data[17764] = 11.87758986; |
| 518 | data[17765] = 7.97005836; |
| 519 | data[17766] = 0.66104700; |
| 520 | data[17966] = 5.49466675; |
| 521 | data[17967] = 12.62953598; |
| 522 | data[17968] = 6.93987962; |
| 523 | data[18169] = 6.18401915; |
| 524 | data[18170] = 12.93473132; |
| 525 | data[18171] = 6.29778765; |
| 526 | data[18371] = 0.02325210; |
| 527 | data[18372] = 6.50206627; |
| 528 | data[18373] = 12.32661773; |
| 529 | data[18374] = 6.00216538; |
| 530 | data[18574] = 0.31548753; |
| 531 | data[18575] = 6.48925547; |
| 532 | data[18576] = 12.04130240; |
| 533 | data[18577] = 6.01462880; |
| 534 | data[18777] = 0.29979556; |
| 535 | data[18778] = 6.18288014; |
| 536 | data[18779] = 12.04272825; |
| 537 | data[18780] = 6.29981188; |
| 538 | data[18781] = 0.55689598; |
| 539 | data[18980] = 0.01120471; |
| 540 | data[18981] = 5.61729167; |
| 541 | data[18982] = 11.22337859; |
| 542 | data[18983] = 6.82516303; |
| 543 | data[18984] = 1.35264499; |
| 544 | data[19184] = 4.82410006; |
| 545 | data[19185] = 10.16623247; |
| 546 | data[19186] = 7.56075513; |
| 547 | data[19187] = 2.34590308; |
| 548 | data[19387] = 3.83235747; |
| 549 | data[19388] = 8.92296247; |
| 550 | data[19389] = 8.47910438; |
| 551 | data[19390] = 3.50978645; |
| 552 | data[19590] = 2.66873185; |
| 553 | data[19591] = 7.51965167; |
| 554 | data[19592] = 9.55500547; |
| 555 | data[19593] = 4.81966138; |
| 556 | data[19594] = 0.08431751; |
| 557 | data[19793] = 1.35767367; |
| 558 | data[19794] = 5.98019501; |
| 559 | data[19795] = 10.60271543; |
| 560 | data[19796] = 6.25298498; |
| 561 | data[19797] = 1.74059917; |
| 562 | data[19997] = 4.32644226; |
| 563 | data[19998] = 8.73131864; |
| 564 | data[19999] = 7.78916525; |
| 565 | data[20000] = 3.48923868; |
| 566 | data[20200] = 2.57835095; |
| 567 | data[20201] = 6.77582854; |
| 568 | data[20202] = 9.40941647; |
| 569 | data[20203] = 5.31194592; |
| 570 | data[20204] = 1.21447595; |
| 571 | data[20403] = 0.75411191; |
| 572 | data[20404] = 4.75395704; |
| 573 | data[20405] = 8.75380263; |
| 574 | data[20406] = 7.19209015; |
| 575 | data[20407] = 3.28754401; |
| 576 | data[20607] = 2.68179690; |
| 577 | data[20608] = 6.49331464; |
| 578 | data[20609] = 9.11457930; |
| 579 | data[20610] = 5.39387390; |
| 580 | data[20611] = 1.67316827; |
| 581 | data[20810] = 0.57394296; |
| 582 | data[20811] = 4.20600036; |
| 583 | data[20812] = 7.83805829; |
| 584 | data[20813] = 7.52023002; |
| 585 | data[20814] = 3.97470826; |
| 586 | data[20815] = 0.42918732; |
| 587 | data[21014] = 1.90464477; |
| 588 | data[21015] = 5.36569161; |
| 589 | data[21016] = 8.82673822; |
| 590 | data[21017] = 6.27609482; |
| 591 | data[21018] = 2.89750961; |
| 592 | data[21218] = 2.89885257; |
| 593 | data[21219] = 6.19694078; |
| 594 | data[21220] = 8.56699049; |
| 595 | data[21221] = 5.34748193; |
| 596 | data[21222] = 2.12797290; |
| 597 | data[21421] = 0.44750227; |
| 598 | data[21422] = 3.59030394; |
| 599 | data[21423] = 6.73310598; |
| 600 | data[21424] = 7.77023612; |
| 601 | data[21425] = 4.70231380; |
| 602 | data[21426] = 1.63439126; |
| 603 | data[21625] = 1.01536023; |
| 604 | data[21626] = 4.01018746; |
| 605 | data[21627] = 7.00501446; |
| 606 | data[21628] = 7.23442994; |
| 607 | data[21629] = 4.31095669; |
| 608 | data[21630] = 1.38748321; |
| 609 | data[21829] = 1.33348850; |
| 610 | data[21830] = 4.18730825; |
| 611 | data[21831] = 7.04112789; |
| 612 | data[21832] = 6.93188375; |
| 613 | data[21833] = 4.14605811; |
| 614 | data[21834] = 1.36023236; |
| 615 | data[22033] = 1.42879714; |
| 616 | data[22034] = 4.14824858; |
| 617 | data[22035] = 6.86769979; |
| 618 | data[22036] = 6.83705276; |
| 619 | data[22037] = 4.18239459; |
| 620 | data[22038] = 1.52773573; |
| 621 | data[22237] = 1.32610439; |
| 622 | data[22238] = 3.91751388; |
| 623 | data[22239] = 6.50892360; |
| 624 | data[22240] = 6.92639686; |
| 625 | data[22241] = 4.39672917; |
| 626 | data[22242] = 1.86706171; |
| 627 | data[22441] = 1.04827771; |
| 628 | data[22442] = 3.51767405; |
| 629 | data[22443] = 5.98707050; |
| 630 | data[22444] = 7.17824046; |
| 631 | data[22445] = 4.76767914; |
| 632 | data[22446] = 2.35711760; |
| 633 | data[22645] = 0.61636406; |
| 634 | data[22646] = 2.96949223; |
| 635 | data[22647] = 5.32262027; |
| 636 | data[22648] = 7.57265091; |
| 637 | data[22649] = 5.27558755; |
| 638 | data[22650] = 2.97852419; |
| 639 | data[22651] = 0.68146095; |
| 640 | data[22849] = 0.04971400; |
| 641 | data[22850] = 2.29204819; |
| 642 | data[22851] = 4.53438237; |
| 643 | data[22852] = 6.77671656; |
| 644 | data[22853] = 5.90240723; |
| 645 | data[22854] = 3.71349836; |
| 646 | data[22855] = 1.52458926; |
| 647 | data[23054] = 1.50285335; |
| 648 | data[23055] = 3.63961048; |
| 649 | data[23056] = 5.77636715; |
| 650 | data[23057] = 6.63159089; |
| 651 | data[23058] = 4.54574358; |
| 652 | data[23059] = 2.45989650; |
| 653 | data[23060] = 0.37404924; |
| 654 | data[23258] = 0.61795861; |
| 655 | data[23259] = 2.65410915; |
| 656 | data[23260] = 4.69025923; |
| 657 | data[23261] = 6.72641024; |
| 658 | data[23262] = 5.46034705; |
| 659 | data[23263] = 3.47270933; |
| 660 | data[23264] = 1.48507138; |
| 661 | data[23463] = 1.59233576; |
| 662 | data[23464] = 3.53261665; |
| 663 | data[23465] = 5.47289755; |
| 664 | data[23466] = 6.44368259; |
| 665 | data[23467] = 4.54962999; |
| 666 | data[23468] = 2.65557761; |
| 667 | data[23469] = 0.76152512; |
| 668 | data[23667] = 0.46749352; |
| 669 | data[23668] = 2.31641904; |
| 670 | data[23669] = 4.16534441; |
| 671 | data[23670] = 6.01426978; |
| 672 | data[23671] = 5.67844696; |
| 673 | data[23672] = 3.87357362; |
| 674 | data[23673] = 2.06870004; |
| 675 | data[23674] = 0.26382666; |
| 676 | data[23872] = 1.05349103; |
| 677 | data[23873] = 2.81536230; |
| 678 | data[23874] = 4.57723346; |
| 679 | data[23875] = 6.33910485; |
| 680 | data[23876] = 5.12815686; |
| 681 | data[23877] = 3.40826320; |
| 682 | data[23878] = 1.68837002; |
| 683 | data[24077] = 1.43350090; |
| 684 | data[24078] = 3.11241671; |
| 685 | data[24079] = 4.79133241; |
| 686 | data[24080] = 6.40943693; |
| 687 | data[24081] = 4.77052201; |
| 688 | data[24082] = 3.13160778; |
| 689 | data[24083] = 1.49269309; |
| 690 | data[24281] = 0.02932359; |
| 691 | data[24282] = 1.62918994; |
| 692 | data[24283] = 3.22905602; |
| 693 | data[24284] = 4.82892245; |
| 694 | data[24285] = 6.14671456; |
| 695 | data[24286] = 4.58496623; |
| 696 | data[24287] = 3.02321767; |
| 697 | data[24288] = 1.46146910; |
| 698 | data[24486] = 0.13601698; |
| 699 | data[24487] = 1.66055572; |
| 700 | data[24488] = 3.18509457; |
| 701 | data[24489] = 4.70963307; |
| 702 | data[24490] = 6.04072399; |
| 703 | data[24491] = 4.55250870; |
| 704 | data[24492] = 3.06429295; |
| 705 | data[24493] = 1.57607743; |
| 706 | data[24494] = 0.08786193; |
| 707 | data[24691] = 0.09328097; |
| 708 | data[24692] = 1.54603878; |
| 709 | data[24693] = 2.99879676; |
| 710 | data[24694] = 4.45155473; |
| 711 | data[24695] = 5.90431225; |
| 712 | data[24696] = 4.65566106; |
| 713 | data[24697] = 3.23751615; |
| 714 | data[24698] = 1.81937125; |
| 715 | data[24699] = 0.40122634; |
| 716 | data[24897] = 1.30262633; |
| 717 | data[24898] = 2.68698297; |
| 718 | data[24899] = 4.07133950; |
| 719 | data[24900] = 5.45569602; |
| 720 | data[24901] = 4.87832492; |
| 721 | data[24902] = 3.52695142; |
| 722 | data[24903] = 2.17557792; |
| 723 | data[24904] = 0.82420459; |
| 724 | data[25102] = 0.94595028; |
| 725 | data[25103] = 2.26512621; |
| 726 | data[25104] = 3.58430226; |
| 727 | data[25105] = 4.90347855; |
| 728 | data[25106] = 5.20569785; |
| 729 | data[25107] = 3.91795207; |
| 730 | data[25108] = 2.63020652; |
| 731 | data[25109] = 1.34246063; |
| 732 | data[25110] = 0.05471494; |
| 733 | data[25307] = 0.49037894; |
| 734 | data[25308] = 1.74744334; |
| 735 | data[25309] = 3.00450763; |
| 736 | data[25310] = 4.26157191; |
| 737 | data[25311] = 5.51863620; |
| 738 | data[25312] = 4.39707236; |
| 739 | data[25313] = 3.16995848; |
| 740 | data[25314] = 1.94284460; |
| 741 | data[25315] = 0.71573065; |
| 742 | data[25513] = 1.14698056; |
| 743 | data[25514] = 2.34485767; |
| 744 | data[25515] = 3.54273478; |
| 745 | data[25516] = 4.74061165; |
| 746 | data[25517] = 4.95198462; |
| 747 | data[25518] = 3.78264743; |
| 748 | data[25519] = 2.61331047; |
| 749 | data[25520] = 1.44397374; |
| 750 | data[25521] = 0.27463681; |
| 751 | data[25718] = 0.47569509; |
| 752 | data[25719] = 1.61717169; |
| 753 | data[25720] = 2.75864848; |
| 754 | data[25721] = 3.90012516; |
| 755 | data[25722] = 5.04160160; |
| 756 | data[25723] = 4.45712078; |
| 757 | data[25724] = 3.34284059; |
| 758 | data[25725] = 2.22856039; |
| 759 | data[25726] = 1.11428020; |
| 760 | |
| 761 | for (auto & val : data) { |
| 762 | val /= 1000.0f; |
| 763 | } |
| 764 | |
| 765 | filters.data = std::move(data); |
| 766 | return filters; |
| 767 | } |
| 768 | |
| 769 | } // namespace whisper_precalc_filters |
| 770 | |