1#include "mtmd-audio.h"
2
3#define _USE_MATH_DEFINES // for M_PI
4#include <cmath>
5#include <cstdint>
6#include <cstring>
7#include <thread>
8#include <vector>
9#include <fstream>
10#include <algorithm>
11
12// most of the code here is copied from whisper.cpp
13
14// align x to upper multiple of n
15#define _ALIGN(x, n) ((((x) + (n) - 1) / (n)) * (n))
16
17namespace whisper_preprocessor {
18
19#define SIN_COS_N_COUNT WHISPER_N_FFT
20namespace {
21struct whisper_global_cache {
22 // In FFT, we frequently use sine and cosine operations with the same values.
23 // We can use precalculated values to speed up the process.
24 float sin_vals[SIN_COS_N_COUNT];
25 float cos_vals[SIN_COS_N_COUNT];
26
27 // Hann window (Use cosf to eliminate difference)
28 // ref: https://pytorch.org/docs/stable/generated/torch.hann_window.html
29 // ref: https://github.com/openai/whisper/blob/main/whisper/audio.py#L147
30 float hann_window[WHISPER_N_FFT];
31
32 whisper_global_cache() {
33 fill_sin_cos_table();
34 fill_hann_window(length: sizeof(hann_window)/sizeof(hann_window[0]), periodic: true, output: hann_window);
35 }
36
37 void fill_sin_cos_table() {
38 for (int i = 0; i < SIN_COS_N_COUNT; i++) {
39 double theta = (2 * M_PI * i) / SIN_COS_N_COUNT;
40 sin_vals[i] = sinf(x: theta);
41 cos_vals[i] = cosf(x: theta);
42 }
43 }
44
45 void fill_hann_window(int length, bool periodic, float * output) {
46 int offset = -1;
47 if (periodic) {
48 offset = 0;
49 }
50 for (int i = 0; i < length; i++) {
51 output[i] = 0.5 * (1.0 - cosf(x: (2.0 * M_PI * i) / (length + offset)));
52 }
53 }
54} global_cache;
55}
56
57// naive Discrete Fourier Transform
58// input is real-valued
59// output is complex-valued
60static void dft(const float* in, int N, float* out) {
61 const int sin_cos_step = SIN_COS_N_COUNT / N;
62
63 for (int k = 0; k < N; k++) {
64 float re = 0;
65 float im = 0;
66
67 for (int n = 0; n < N; n++) {
68 int idx = (k * n * sin_cos_step) % (SIN_COS_N_COUNT); // t = 2*M_PI*k*n/N
69 re += in[n]*global_cache.cos_vals[idx]; // cos(t)
70 im -= in[n]*global_cache.sin_vals[idx]; // sin(t)
71 }
72
73 out[k*2 + 0] = re;
74 out[k*2 + 1] = im;
75 }
76}
77
78// Cooley-Tukey FFT
79// poor man's implementation - use something better
80// input is real-valued
81// output is complex-valued
82static void fft(float* in, int N, float* out) {
83 if (N == 1) {
84 out[0] = in[0];
85 out[1] = 0;
86 return;
87 }
88
89 const int half_N = N / 2;
90 if (N - half_N*2 == 1) {
91 dft(in, N, out);
92 return;
93 }
94
95 float* even = in + N;
96 for (int i = 0; i < half_N; ++i) {
97 even[i]= in[2*i];
98 }
99 float* even_fft = out + 2 * N;
100 fft(in: even, N: half_N, out: even_fft);
101
102 float* odd = even;
103 for (int i = 0; i < half_N; ++i) {
104 odd[i] = in[2*i + 1];
105 }
106 float* odd_fft = even_fft + N;
107 fft(in: odd, N: half_N, out: odd_fft);
108
109 const int sin_cos_step = SIN_COS_N_COUNT / N;
110 for (int k = 0; k < half_N; k++) {
111 int idx = k * sin_cos_step; // t = 2*M_PI*k/N
112 float re = global_cache.cos_vals[idx]; // cos(t)
113 float im = -global_cache.sin_vals[idx]; // sin(t)
114
115 float re_odd = odd_fft[2*k + 0];
116 float im_odd = odd_fft[2*k + 1];
117
118 out[2*k + 0] = even_fft[2*k + 0] + re*re_odd - im*im_odd;
119 out[2*k + 1] = even_fft[2*k + 1] + re*im_odd + im*re_odd;
120
121 out[2*(k + half_N) + 0] = even_fft[2*k + 0] - re*re_odd + im*im_odd;
122 out[2*(k + half_N) + 1] = even_fft[2*k + 1] - re*im_odd - im*re_odd;
123 }
124}
125
126static void log_mel_spectrogram_worker_thread(int ith, const float * hann, const std::vector<float> & samples,
127 int n_samples, int frame_size, int frame_step, int n_threads,
128 const whisper_filters & filters, whisper_mel & mel) {
129 std::vector<float> fft_in(frame_size * 2, 0.0);
130 std::vector<float> fft_out(frame_size * 2 * 2 * 2);
131
132 int n_fft = filters.n_fft;
133 int i = ith;
134
135 // make sure n_fft == 1 + (WHISPER_N_FFT / 2), bin_0 to bin_nyquist
136 WHISPER_ASSERT(n_fft == 1 + (frame_size / 2));
137
138 // calculate FFT only when fft_in are not all zero
139 for (; i < std::min(a: n_samples / frame_step + 1, b: mel.n_len); i += n_threads) {
140 const int offset = i * frame_step;
141
142 // apply Hann window (~10% faster)
143 for (int j = 0; j < std::min(a: frame_size, b: n_samples - offset); j++) {
144 fft_in[j] = hann[j] * samples[offset + j];
145 }
146
147 // fill the rest with zeros
148 if (n_samples - offset < frame_size) {
149 std::fill(first: fft_in.begin() + (n_samples - offset), last: fft_in.end(), value: 0.0);
150 }
151
152 // FFT
153 fft(in: fft_in.data(), N: frame_size, out: fft_out.data());
154
155 // Calculate modulus^2 of complex numbers
156 // Use pow(fft_out[2 * j + 0], 2) + pow(fft_out[2 * j + 1], 2) causes inference quality problem? Interesting.
157 for (int j = 0; j < n_fft; j++) {
158 fft_out[j] = (fft_out[2 * j + 0] * fft_out[2 * j + 0] + fft_out[2 * j + 1] * fft_out[2 * j + 1]);
159 }
160
161 // mel spectrogram
162 for (int j = 0; j < mel.n_mel; j++) {
163 double sum = 0.0;
164 // unroll loop (suggested by GH user @lunixbochs)
165 int k = 0;
166 for (k = 0; k < n_fft - 3; k += 4) {
167 sum +=
168 fft_out[k + 0] * filters.data[j * n_fft + k + 0] +
169 fft_out[k + 1] * filters.data[j * n_fft + k + 1] +
170 fft_out[k + 2] * filters.data[j * n_fft + k + 2] +
171 fft_out[k + 3] * filters.data[j * n_fft + k + 3];
172 }
173 // handle n_fft remainder
174 for (; k < n_fft; k++) {
175 sum += fft_out[k] * filters.data[j * n_fft + k];
176 }
177 sum = log10(x: std::max(a: sum, b: 1e-10));
178 mel.data[j * mel.n_len + i] = sum;
179 }
180 }
181
182 // Otherwise fft_out are all zero
183 double sum = log10(x: 1e-10);
184 for (; i < mel.n_len; i += n_threads) {
185 for (int j = 0; j < mel.n_mel; j++) {
186 mel.data[j * mel.n_len + i] = sum;
187 }
188 }
189}
190
191// ref: https://github.com/openai/whisper/blob/main/whisper/audio.py#L110-L157
192static bool log_mel_spectrogram(
193 const float * samples,
194 const int n_samples,
195 const int /*sample_rate*/,
196 const int frame_size,
197 const int frame_step,
198 const int n_mel,
199 const int n_threads,
200 const whisper_filters & filters,
201 const bool debug,
202 whisper_mel & mel) {
203 //const int64_t t_start_us = ggml_time_us();
204
205 // Hann window
206 WHISPER_ASSERT(frame_size == WHISPER_N_FFT && "Unsupported frame_size");
207 const float * hann = global_cache.hann_window;
208
209 // Calculate the length of padding
210 int64_t stage_1_pad = WHISPER_SAMPLE_RATE * 30;
211 int64_t stage_2_pad = frame_size / 2;
212
213 // Initialize a vector and copy data from C array to it.
214 std::vector<float> samples_padded;
215 samples_padded.resize(new_size: n_samples + stage_1_pad + stage_2_pad * 2);
216 std::copy(first: samples, last: samples + n_samples, result: samples_padded.begin() + stage_2_pad);
217
218 // pad 30 seconds of zeros at the end of audio (480,000 samples) + reflective pad 200 samples at the end of audio
219 std::fill(first: samples_padded.begin() + n_samples + stage_2_pad, last: samples_padded.begin() + n_samples + stage_1_pad + 2 * stage_2_pad, value: 0);
220
221 // reflective pad 200 samples at the beginning of audio
222 std::reverse_copy(first: samples + 1, last: samples + 1 + stage_2_pad, result: samples_padded.begin());
223
224 mel.n_mel = n_mel;
225 // https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/SpectralOps.cpp#L936
226 // Calculate number of frames + remove the last frame
227 mel.n_len = (samples_padded.size() - frame_size) / frame_step;
228 // Calculate semi-padded sample length to ensure compatibility
229 mel.n_len_org = 1 + (n_samples + stage_2_pad - frame_size) / frame_step;
230 mel.data.resize(new_size: mel.n_mel * mel.n_len);
231
232 {
233 std::vector<std::thread> workers(n_threads - 1);
234 for (int iw = 0; iw < n_threads - 1; ++iw) {
235 workers[iw] = std::thread(
236 log_mel_spectrogram_worker_thread, iw + 1, hann, std::cref(t: samples_padded),
237 n_samples + stage_2_pad, frame_size, frame_step, n_threads,
238 std::cref(t: filters), std::ref(t&: mel));
239 }
240
241 // main thread
242 log_mel_spectrogram_worker_thread(ith: 0, hann, samples: samples_padded, n_samples: n_samples + stage_2_pad, frame_size, frame_step, n_threads, filters, mel);
243
244 for (int iw = 0; iw < n_threads - 1; ++iw) {
245 workers[iw].join();
246 }
247 }
248
249 // clamping and normalization
250 double mmax = -1e20;
251 for (int i = 0; i < mel.n_mel*mel.n_len; i++) {
252 if (mel.data[i] > mmax) {
253 mmax = mel.data[i];
254 }
255 }
256
257 mmax -= 8.0;
258
259 for (int i = 0; i < mel.n_mel*mel.n_len; i++) {
260 if (mel.data[i] < mmax) {
261 mel.data[i] = mmax;
262 }
263
264 mel.data[i] = (mel.data[i] + 4.0)/4.0;
265 }
266
267 // Dump log_mel_spectrogram
268 if (debug) {
269 std::ofstream outFile("log_mel_spectrogram.json");
270 outFile << "[";
271 for (uint64_t i = 0; i < mel.data.size() - 1; i++) {
272 outFile << mel.data[i] << ", ";
273 }
274 outFile << mel.data[mel.data.size() - 1] << "]";
275 outFile.close();
276 }
277
278 return true;
279}
280
281bool preprocess_audio(
282 const float * samples,
283 size_t n_samples,
284 const whisper_filters & filters,
285 std::vector<whisper_mel> & output) {
286
287 if (n_samples == 0) {
288 // empty audio
289 return false;
290 }
291
292 whisper_mel out_full;
293 bool ok = log_mel_spectrogram(
294 samples,
295 n_samples,
296 COMMON_SAMPLE_RATE,
297 WHISPER_N_FFT,
298 WHISPER_HOP_LENGTH,
299 n_mel: filters.n_mel,
300 n_threads: 4, // n_threads
301 filters,
302 debug: false, // debug
303 mel&: out_full);
304 if (!ok) {
305 return false;
306 }
307
308 // because the cgraph in clip.cpp only accepts 3000 frames each, we need to split the mel
309 // we always expect the mel to have 3000 silent frames at the end
310 // printf("n_len %d\n", out_full.n_len);
311 const size_t frames_per_chunk = 3000;
312 GGML_ASSERT((size_t)out_full.n_len > frames_per_chunk);
313 for (size_t off = 0; off < (size_t)out_full.n_len; off += frames_per_chunk) {
314 int n_len = std::min(a: frames_per_chunk, b: (size_t)out_full.n_len - off);
315 if ((size_t)n_len < frames_per_chunk) {
316 break; // last uncomplete chunk will always be a padded chunk, safe to ignore
317 }
318
319 whisper_mel out_chunk;
320 out_chunk.n_len = n_len;
321 out_chunk.n_mel = out_full.n_mel;
322 out_chunk.n_len_org = out_full.n_mel; // unused
323 out_chunk.data.reserve(n: out_chunk.n_mel * out_chunk.n_len);
324
325 for (int i = 0; i < out_full.n_mel; i++) {
326 auto src = out_full.data.begin() + i*out_full.n_len + off;
327 out_chunk.data.insert(position: out_chunk.data.end(), first: src, last: src + frames_per_chunk);
328 }
329
330 output.push_back(x: std::move(out_chunk));
331 }
332
333 return true;
334}
335
336} // namespace whisper_preprocessor
337
338
339// precalculated mel filter banks
340// values are multiplied by 1000.0 to save space, and will be divided by 1000.0 in the end of the function
341//
342// generated from python code:
343//
344// from numpy import load
345// data = load('mel_filters.npz')
346// lst = data.files
347// for item in lst:
348// print(item)
349// print(data[item].shape)
350// n_mel = data[item].shape[0]
351// n_fft = data[item].shape[1]
352// for i, row in enumerate(data[item]):
353// for j, val in enumerate(row):
354// val = val * 1000.0
355// if val != 0:
356// print(f"data[{i*n_fft + j}] = {val:.6f};")
357
358namespace whisper_precalc_filters {
359
360whisper_preprocessor::whisper_filters get_128_bins() {
361 whisper_preprocessor::whisper_filters filters;
362 filters.n_mel = 128;
363 filters.n_fft = 201;
364 std::vector data(filters.n_mel * filters.n_fft, 0.0f);
365
366 data[1] = 12.37398665;
367 data[202] = 30.39256483;
368 data[404] = 24.74797331;
369 data[605] = 18.01857911;
370 data[807] = 37.12195903;
371 data[1008] = 5.64459199;
372 data[1009] = 6.72939420;
373 data[1210] = 36.03715822;
374 data[1412] = 19.10337992;
375 data[1613] = 23.66316877;
376 data[1815] = 31.47736564;
377 data[2016] = 11.28918398;
378 data[2017] = 1.08480197;
379 data[2218] = 41.68175161;
380 data[2420] = 13.45878839;
381 data[2621] = 29.30776216;
382 data[2823] = 25.83277412;
383 data[3024] = 16.93377644;
384 data[3226] = 38.20675984;
385 data[3427] = 4.55979025;
386 data[3428] = 7.81419594;
387 data[3629] = 34.95235741;
388 data[3831] = 20.18818259;
389 data[4032] = 22.57836796;
390 data[4234] = 32.56217018;
391 data[4435] = 10.20438317;
392 data[4436] = 2.16960395;
393 data[4637] = 40.59694707;
394 data[4839] = 14.54358920;
395 data[5040] = 28.22295949;
396 data[5242] = 26.91757679;
397 data[5443] = 15.84897563;
398 data[5645] = 39.29156065;
399 data[5846] = 3.47498828;
400 data[5847] = 8.89899861;
401 data[6048] = 33.86755288;
402 data[6250] = 21.27298526;
403 data[6451] = 21.49356715;
404 data[6653] = 33.64697099;
405 data[6854] = 9.11958050;
406 data[6855] = 3.25440569;
407 data[7056] = 39.51214626;
408 data[7258] = 15.62839188;
409 data[7459] = 27.13815868;
410 data[7661] = 28.00237760;
411 data[7862] = 14.76417296;
412 data[8064] = 40.37636518;
413 data[8265] = 2.38068704;
414 data[8266] = 10.20263787;
415 data[8467] = 31.61146119;
416 data[8669] = 24.54700135;
417 data[8870] = 15.32919332;
418 data[8871] = 1.66583748;
419 data[9072] = 36.72905266;
420 data[9274] = 20.09709924;
421 data[9475] = 16.93102531;
422 data[9476] = 2.90265540;
423 data[9677] = 32.84499049;
424 data[9879] = 23.52004871;
425 data[10080] = 11.03894413;
426 data[10081] = 10.72582975;
427 data[10282] = 22.71829173;
428 data[10484] = 32.27872774;
429 data[10685] = 0.11626833;
430 data[10686] = 22.85348251;
431 data[10887] = 8.56344029;
432 data[10888] = 14.97978810;
433 data[11089] = 15.51398356;
434 data[11090] = 8.51490628;
435 data[11291] = 21.10680379;
436 data[11292] = 3.32652032;
437 data[11493] = 25.47064796;
438 data[11695] = 27.35907957;
439 data[11896] = 0.65853616;
440 data[11897] = 23.83812517;
441 data[12098] = 3.44359246;
442 data[12099] = 21.22455277;
443 data[12300] = 5.35842171;
444 data[12301] = 19.42555793;
445 data[12502] = 6.49324711;
446 data[12503] = 18.35542172;
447 data[12704] = 6.93138083;
448 data[12705] = 17.93504693;
449 data[12906] = 6.74968259;
450 data[12907] = 18.09151843;
451 data[13108] = 6.01899112;
452 data[13109] = 18.75767298;
453 data[13310] = 4.80452832;
454 data[13311] = 19.87172849;
455 data[13512] = 3.16627859;
456 data[13513] = 21.37690969;
457 data[13514] = 1.25317345;
458 data[13714] = 1.15934468;
459 data[13715] = 20.80361731;
460 data[13716] = 4.04486805;
461 data[13917] = 17.55363122;
462 data[13918] = 7.08320038;
463 data[14119] = 14.07538634;
464 data[14120] = 10.32655034;
465 data[14321] = 10.40921453;
466 data[14322] = 13.73696327;
467 data[14523] = 6.59187697;
468 data[14524] = 17.27988198;
469 data[14525] = 1.46804214;
470 data[14725] = 2.65681883;
471 data[14726] = 18.09193194;
472 data[14727] = 5.85655728;
473 data[14928] = 13.34277913;
474 data[14929] = 10.28267574;
475 data[15130] = 8.56800377;
476 data[15131] = 14.72230814;
477 data[15132] = 1.04039861;
478 data[15332] = 3.79085587;
479 data[15333] = 17.14678481;
480 data[15334] = 6.11609267;
481 data[15535] = 11.75929047;
482 data[15536] = 11.13393717;
483 data[15737] = 6.43857848;
484 data[15738] = 16.07806236;
485 data[15739] = 4.23917221;
486 data[15939] = 1.19989377;
487 data[15940] = 12.75671553;
488 data[15941] = 9.65298992;
489 data[16142] = 7.06935255;
490 data[16143] = 14.94054683;
491 data[16144] = 4.19024844;
492 data[16344] = 1.51483389;
493 data[16345] = 12.00899947;
494 data[16346] = 9.84823331;
495 data[16547] = 6.10224018;
496 data[16548] = 15.33857174;
497 data[16549] = 5.57676842;
498 data[16749] = 0.36827257;
499 data[16750] = 9.89749376;
500 data[16751] = 11.35340426;
501 data[16752] = 2.05122307;
502 data[16952] = 3.89297144;
503 data[16953] = 12.97352277;
504 data[16954] = 8.06631614;
505 data[17155] = 6.74493238;
506 data[17156] = 13.85874674;
507 data[17157] = 5.41190524;
508 data[17357] = 0.74220158;
509 data[17358] = 8.98779090;
510 data[17359] = 11.37871388;
511 data[17360] = 3.32958088;
512 data[17560] = 2.82313535;
513 data[17561] = 10.68049297;
514 data[17562] = 9.43340641;
515 data[17563] = 1.76325557;
516 data[17763] = 4.39018616;
517 data[17764] = 11.87758986;
518 data[17765] = 7.97005836;
519 data[17766] = 0.66104700;
520 data[17966] = 5.49466675;
521 data[17967] = 12.62953598;
522 data[17968] = 6.93987962;
523 data[18169] = 6.18401915;
524 data[18170] = 12.93473132;
525 data[18171] = 6.29778765;
526 data[18371] = 0.02325210;
527 data[18372] = 6.50206627;
528 data[18373] = 12.32661773;
529 data[18374] = 6.00216538;
530 data[18574] = 0.31548753;
531 data[18575] = 6.48925547;
532 data[18576] = 12.04130240;
533 data[18577] = 6.01462880;
534 data[18777] = 0.29979556;
535 data[18778] = 6.18288014;
536 data[18779] = 12.04272825;
537 data[18780] = 6.29981188;
538 data[18781] = 0.55689598;
539 data[18980] = 0.01120471;
540 data[18981] = 5.61729167;
541 data[18982] = 11.22337859;
542 data[18983] = 6.82516303;
543 data[18984] = 1.35264499;
544 data[19184] = 4.82410006;
545 data[19185] = 10.16623247;
546 data[19186] = 7.56075513;
547 data[19187] = 2.34590308;
548 data[19387] = 3.83235747;
549 data[19388] = 8.92296247;
550 data[19389] = 8.47910438;
551 data[19390] = 3.50978645;
552 data[19590] = 2.66873185;
553 data[19591] = 7.51965167;
554 data[19592] = 9.55500547;
555 data[19593] = 4.81966138;
556 data[19594] = 0.08431751;
557 data[19793] = 1.35767367;
558 data[19794] = 5.98019501;
559 data[19795] = 10.60271543;
560 data[19796] = 6.25298498;
561 data[19797] = 1.74059917;
562 data[19997] = 4.32644226;
563 data[19998] = 8.73131864;
564 data[19999] = 7.78916525;
565 data[20000] = 3.48923868;
566 data[20200] = 2.57835095;
567 data[20201] = 6.77582854;
568 data[20202] = 9.40941647;
569 data[20203] = 5.31194592;
570 data[20204] = 1.21447595;
571 data[20403] = 0.75411191;
572 data[20404] = 4.75395704;
573 data[20405] = 8.75380263;
574 data[20406] = 7.19209015;
575 data[20407] = 3.28754401;
576 data[20607] = 2.68179690;
577 data[20608] = 6.49331464;
578 data[20609] = 9.11457930;
579 data[20610] = 5.39387390;
580 data[20611] = 1.67316827;
581 data[20810] = 0.57394296;
582 data[20811] = 4.20600036;
583 data[20812] = 7.83805829;
584 data[20813] = 7.52023002;
585 data[20814] = 3.97470826;
586 data[20815] = 0.42918732;
587 data[21014] = 1.90464477;
588 data[21015] = 5.36569161;
589 data[21016] = 8.82673822;
590 data[21017] = 6.27609482;
591 data[21018] = 2.89750961;
592 data[21218] = 2.89885257;
593 data[21219] = 6.19694078;
594 data[21220] = 8.56699049;
595 data[21221] = 5.34748193;
596 data[21222] = 2.12797290;
597 data[21421] = 0.44750227;
598 data[21422] = 3.59030394;
599 data[21423] = 6.73310598;
600 data[21424] = 7.77023612;
601 data[21425] = 4.70231380;
602 data[21426] = 1.63439126;
603 data[21625] = 1.01536023;
604 data[21626] = 4.01018746;
605 data[21627] = 7.00501446;
606 data[21628] = 7.23442994;
607 data[21629] = 4.31095669;
608 data[21630] = 1.38748321;
609 data[21829] = 1.33348850;
610 data[21830] = 4.18730825;
611 data[21831] = 7.04112789;
612 data[21832] = 6.93188375;
613 data[21833] = 4.14605811;
614 data[21834] = 1.36023236;
615 data[22033] = 1.42879714;
616 data[22034] = 4.14824858;
617 data[22035] = 6.86769979;
618 data[22036] = 6.83705276;
619 data[22037] = 4.18239459;
620 data[22038] = 1.52773573;
621 data[22237] = 1.32610439;
622 data[22238] = 3.91751388;
623 data[22239] = 6.50892360;
624 data[22240] = 6.92639686;
625 data[22241] = 4.39672917;
626 data[22242] = 1.86706171;
627 data[22441] = 1.04827771;
628 data[22442] = 3.51767405;
629 data[22443] = 5.98707050;
630 data[22444] = 7.17824046;
631 data[22445] = 4.76767914;
632 data[22446] = 2.35711760;
633 data[22645] = 0.61636406;
634 data[22646] = 2.96949223;
635 data[22647] = 5.32262027;
636 data[22648] = 7.57265091;
637 data[22649] = 5.27558755;
638 data[22650] = 2.97852419;
639 data[22651] = 0.68146095;
640 data[22849] = 0.04971400;
641 data[22850] = 2.29204819;
642 data[22851] = 4.53438237;
643 data[22852] = 6.77671656;
644 data[22853] = 5.90240723;
645 data[22854] = 3.71349836;
646 data[22855] = 1.52458926;
647 data[23054] = 1.50285335;
648 data[23055] = 3.63961048;
649 data[23056] = 5.77636715;
650 data[23057] = 6.63159089;
651 data[23058] = 4.54574358;
652 data[23059] = 2.45989650;
653 data[23060] = 0.37404924;
654 data[23258] = 0.61795861;
655 data[23259] = 2.65410915;
656 data[23260] = 4.69025923;
657 data[23261] = 6.72641024;
658 data[23262] = 5.46034705;
659 data[23263] = 3.47270933;
660 data[23264] = 1.48507138;
661 data[23463] = 1.59233576;
662 data[23464] = 3.53261665;
663 data[23465] = 5.47289755;
664 data[23466] = 6.44368259;
665 data[23467] = 4.54962999;
666 data[23468] = 2.65557761;
667 data[23469] = 0.76152512;
668 data[23667] = 0.46749352;
669 data[23668] = 2.31641904;
670 data[23669] = 4.16534441;
671 data[23670] = 6.01426978;
672 data[23671] = 5.67844696;
673 data[23672] = 3.87357362;
674 data[23673] = 2.06870004;
675 data[23674] = 0.26382666;
676 data[23872] = 1.05349103;
677 data[23873] = 2.81536230;
678 data[23874] = 4.57723346;
679 data[23875] = 6.33910485;
680 data[23876] = 5.12815686;
681 data[23877] = 3.40826320;
682 data[23878] = 1.68837002;
683 data[24077] = 1.43350090;
684 data[24078] = 3.11241671;
685 data[24079] = 4.79133241;
686 data[24080] = 6.40943693;
687 data[24081] = 4.77052201;
688 data[24082] = 3.13160778;
689 data[24083] = 1.49269309;
690 data[24281] = 0.02932359;
691 data[24282] = 1.62918994;
692 data[24283] = 3.22905602;
693 data[24284] = 4.82892245;
694 data[24285] = 6.14671456;
695 data[24286] = 4.58496623;
696 data[24287] = 3.02321767;
697 data[24288] = 1.46146910;
698 data[24486] = 0.13601698;
699 data[24487] = 1.66055572;
700 data[24488] = 3.18509457;
701 data[24489] = 4.70963307;
702 data[24490] = 6.04072399;
703 data[24491] = 4.55250870;
704 data[24492] = 3.06429295;
705 data[24493] = 1.57607743;
706 data[24494] = 0.08786193;
707 data[24691] = 0.09328097;
708 data[24692] = 1.54603878;
709 data[24693] = 2.99879676;
710 data[24694] = 4.45155473;
711 data[24695] = 5.90431225;
712 data[24696] = 4.65566106;
713 data[24697] = 3.23751615;
714 data[24698] = 1.81937125;
715 data[24699] = 0.40122634;
716 data[24897] = 1.30262633;
717 data[24898] = 2.68698297;
718 data[24899] = 4.07133950;
719 data[24900] = 5.45569602;
720 data[24901] = 4.87832492;
721 data[24902] = 3.52695142;
722 data[24903] = 2.17557792;
723 data[24904] = 0.82420459;
724 data[25102] = 0.94595028;
725 data[25103] = 2.26512621;
726 data[25104] = 3.58430226;
727 data[25105] = 4.90347855;
728 data[25106] = 5.20569785;
729 data[25107] = 3.91795207;
730 data[25108] = 2.63020652;
731 data[25109] = 1.34246063;
732 data[25110] = 0.05471494;
733 data[25307] = 0.49037894;
734 data[25308] = 1.74744334;
735 data[25309] = 3.00450763;
736 data[25310] = 4.26157191;
737 data[25311] = 5.51863620;
738 data[25312] = 4.39707236;
739 data[25313] = 3.16995848;
740 data[25314] = 1.94284460;
741 data[25315] = 0.71573065;
742 data[25513] = 1.14698056;
743 data[25514] = 2.34485767;
744 data[25515] = 3.54273478;
745 data[25516] = 4.74061165;
746 data[25517] = 4.95198462;
747 data[25518] = 3.78264743;
748 data[25519] = 2.61331047;
749 data[25520] = 1.44397374;
750 data[25521] = 0.27463681;
751 data[25718] = 0.47569509;
752 data[25719] = 1.61717169;
753 data[25720] = 2.75864848;
754 data[25721] = 3.90012516;
755 data[25722] = 5.04160160;
756 data[25723] = 4.45712078;
757 data[25724] = 3.34284059;
758 data[25725] = 2.22856039;
759 data[25726] = 1.11428020;
760
761 for (auto & val : data) {
762 val /= 1000.0f;
763 }
764
765 filters.data = std::move(data);
766 return filters;
767}
768
769} // namespace whisper_precalc_filters
770