1 | /* |
2 | * Copyright 2011 The Android Open Source Project |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "include/effects/SkBlurImageFilter.h" |
9 | |
10 | #include <algorithm> |
11 | |
12 | #include "include/core/SkBitmap.h" |
13 | #include "include/core/SkTileMode.h" |
14 | #include "include/private/SkColorData.h" |
15 | #include "include/private/SkNx.h" |
16 | #include "include/private/SkTFitsIn.h" |
17 | #include "src/core/SkArenaAlloc.h" |
18 | #include "src/core/SkAutoPixmapStorage.h" |
19 | #include "src/core/SkGpuBlurUtils.h" |
20 | #include "src/core/SkImageFilter_Base.h" |
21 | #include "src/core/SkOpts.h" |
22 | #include "src/core/SkReadBuffer.h" |
23 | #include "src/core/SkSpecialImage.h" |
24 | #include "src/core/SkWriteBuffer.h" |
25 | |
26 | #if SK_SUPPORT_GPU |
27 | #include "include/gpu/GrContext.h" |
28 | #include "src/gpu/GrTextureProxy.h" |
29 | #include "src/gpu/SkGr.h" |
30 | #endif |
31 | |
32 | namespace { |
33 | |
34 | class SkBlurImageFilterImpl final : public SkImageFilter_Base { |
35 | public: |
36 | SkBlurImageFilterImpl(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, |
37 | sk_sp<SkImageFilter> input, const CropRect* cropRect) |
38 | : INHERITED(&input, 1, cropRect) |
39 | , fSigma{sigmaX, sigmaY} |
40 | , fTileMode(tileMode) {} |
41 | |
42 | SkRect computeFastBounds(const SkRect&) const override; |
43 | |
44 | protected: |
45 | void flatten(SkWriteBuffer&) const override; |
46 | sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override; |
47 | SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, |
48 | MapDirection, const SkIRect* inputRect) const override; |
49 | |
50 | private: |
51 | friend void SkBlurImageFilter::RegisterFlattenables(); |
52 | SK_FLATTENABLE_HOOKS(SkBlurImageFilterImpl) |
53 | |
54 | #if SK_SUPPORT_GPU |
55 | sk_sp<SkSpecialImage> gpuFilter( |
56 | const Context& ctx, SkVector sigma, |
57 | const sk_sp<SkSpecialImage> &input, |
58 | SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const; |
59 | #endif |
60 | |
61 | SkSize fSigma; |
62 | SkTileMode fTileMode; |
63 | |
64 | typedef SkImageFilter_Base INHERITED; |
65 | }; |
66 | |
67 | } // end namespace |
68 | |
69 | static SkTileMode to_sktilemode(SkBlurImageFilter::TileMode tileMode) { |
70 | switch(tileMode) { |
71 | case SkBlurImageFilter::kClamp_TileMode: |
72 | return SkTileMode::kClamp; |
73 | case SkBlurImageFilter::kRepeat_TileMode: |
74 | return SkTileMode::kRepeat; |
75 | case SkBlurImageFilter::kClampToBlack_TileMode: |
76 | // Fall through |
77 | default: |
78 | return SkTileMode::kDecal; |
79 | } |
80 | } |
81 | |
82 | sk_sp<SkImageFilter> SkBlurImageFilter::Make(SkScalar sigmaX, SkScalar sigmaY, |
83 | sk_sp<SkImageFilter> input, |
84 | const SkImageFilter::CropRect* cropRect, |
85 | TileMode tileMode) { |
86 | return Make(sigmaX, sigmaY, to_sktilemode(tileMode), std::move(input), cropRect); |
87 | } |
88 | |
89 | sk_sp<SkImageFilter> SkBlurImageFilter::Make(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, |
90 | sk_sp<SkImageFilter> input, |
91 | const SkImageFilter::CropRect* cropRect) { |
92 | if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) { |
93 | return input; |
94 | } |
95 | return sk_sp<SkImageFilter>( |
96 | new SkBlurImageFilterImpl(sigmaX, sigmaY, tileMode, input, cropRect)); |
97 | } |
98 | |
99 | void SkBlurImageFilter::RegisterFlattenables() { SK_REGISTER_FLATTENABLE(SkBlurImageFilterImpl); } |
100 | |
101 | /////////////////////////////////////////////////////////////////////////////// |
102 | |
103 | sk_sp<SkFlattenable> SkBlurImageFilterImpl::CreateProc(SkReadBuffer& buffer) { |
104 | SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1); |
105 | SkScalar sigmaX = buffer.readScalar(); |
106 | SkScalar sigmaY = buffer.readScalar(); |
107 | SkTileMode tileMode; |
108 | if (buffer.isVersionLT(SkPicturePriv::kTileModeInBlurImageFilter_Version)) { |
109 | tileMode = SkTileMode::kDecal; |
110 | } else if (buffer.isVersionLT(SkPicturePriv::kCleanupImageFilterEnums_Version)) { |
111 | tileMode = to_sktilemode(buffer.read32LE(SkBlurImageFilter::kLast_TileMode)); |
112 | } else { |
113 | tileMode = buffer.read32LE(SkTileMode::kLastTileMode); |
114 | } |
115 | |
116 | static_assert(SkBlurImageFilter::kLast_TileMode == 2, "CreateProc" ); |
117 | |
118 | return SkBlurImageFilter::Make( |
119 | sigmaX, sigmaY, tileMode, common.getInput(0), &common.cropRect()); |
120 | } |
121 | |
122 | void SkBlurImageFilterImpl::flatten(SkWriteBuffer& buffer) const { |
123 | this->INHERITED::flatten(buffer); |
124 | buffer.writeScalar(fSigma.fWidth); |
125 | buffer.writeScalar(fSigma.fHeight); |
126 | |
127 | // Fuzzer sanity checks |
128 | static_assert((int) SkTileMode::kLastTileMode == 3 && SkBlurImageFilter::kLast_TileMode == 2, |
129 | "SkBlurImageFilterImpl::flatten" ); |
130 | SkASSERT(fTileMode <= SkTileMode::kLastTileMode); |
131 | buffer.writeInt(static_cast<int>(fTileMode)); |
132 | } |
133 | |
134 | // This is defined by the SVG spec: |
135 | // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement |
136 | static int calculate_window(double sigma) { |
137 | // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow |
138 | // using the Gauss filter. It also limits the size of buffers used hold intermediate values. |
139 | // Explanation of maximums: |
140 | // sum0 = window * 255 |
141 | // sum1 = window * sum0 -> window * window * 255 |
142 | // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255 |
143 | // |
144 | // The value window^3 * 255 must fit in a uint32_t. So, |
145 | // window^3 < 2^32. window = 255. |
146 | // |
147 | // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5) |
148 | // For window <= 255, the largest value for sigma is 136. |
149 | sigma = SkTPin(sigma, 0.0, 136.0); |
150 | auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5)); |
151 | return std::max(1, possibleWindow); |
152 | } |
153 | |
154 | // Calculating the border is tricky. The border is the distance in pixels between the first dst |
155 | // pixel and the first src pixel (or the last src pixel and the last dst pixel). |
156 | // I will go through the odd case which is simpler, and then through the even case. Given a |
157 | // stack of filters seven wide for the odd case of three passes. |
158 | // |
159 | // S |
160 | // aaaAaaa |
161 | // bbbBbbb |
162 | // cccCccc |
163 | // D |
164 | // |
165 | // The furthest changed pixel is when the filters are in the following configuration. |
166 | // |
167 | // S |
168 | // aaaAaaa |
169 | // bbbBbbb |
170 | // cccCccc |
171 | // D |
172 | // |
173 | // The A pixel is calculated using the value S, the B uses A, and the C uses B, and |
174 | // finally D is C. So, with a window size of seven the border is nine. In the odd case, the |
175 | // border is 3*((window - 1)/2). |
176 | // |
177 | // For even cases the filter stack is more complicated. The spec specifies two passes |
178 | // of even filters and a final pass of odd filters. A stack for a width of six looks like |
179 | // this. |
180 | // |
181 | // S |
182 | // aaaAaa |
183 | // bbBbbb |
184 | // cccCccc |
185 | // D |
186 | // |
187 | // The furthest pixel looks like this. |
188 | // |
189 | // S |
190 | // aaaAaa |
191 | // bbBbbb |
192 | // cccCccc |
193 | // D |
194 | // |
195 | // For a window of six, the border value is eight. In the even case the border is 3 * |
196 | // (window/2) - 1. |
197 | static int calculate_border(int window) { |
198 | return (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1; |
199 | } |
200 | |
201 | static int calculate_buffer(int window) { |
202 | int bufferSize = window - 1; |
203 | return (window & 1) == 1 ? 3 * bufferSize : 3 * bufferSize + 1; |
204 | } |
205 | |
206 | // blur_one_direction implements the common three pass box filter approximation of Gaussian blur, |
207 | // but combines all three passes into a single pass. This approach is facilitated by three circular |
208 | // buffers the width of the window which track values for trailing edges of each of the three |
209 | // passes. This allows the algorithm to use more precision in the calculation because the values |
210 | // are not rounded each pass. And this implementation also avoids a trap that's easy to fall |
211 | // into resulting in blending in too many zeroes near the edge. |
212 | // |
213 | // In general, a window sum has the form: |
214 | // sum_n+1 = sum_n + leading_edge - trailing_edge. |
215 | // If instead we do the subtraction at the end of the previous iteration, we can just |
216 | // calculate the sums instead of having to do the subtractions too. |
217 | // |
218 | // In previous iteration: |
219 | // sum_n+1 = sum_n - trailing_edge. |
220 | // |
221 | // In this iteration: |
222 | // sum_n+1 = sum_n + leading_edge. |
223 | // |
224 | // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the |
225 | // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the |
226 | // three passes at the same time has the form: |
227 | // |
228 | // sum0_n+1 = sum0_n + leading edge |
229 | // sum1_n+1 = sum1_n + sum0_n+1 |
230 | // sum2_n+1 = sum2_n + sum1_n+1 |
231 | // |
232 | // sum2_n+1 / window^3 is the new value of the destination pixel. |
233 | // |
234 | // Reduce the sums by the trailing edges which were stored in the circular buffers, |
235 | // for the next go around. This is the case for odd sized windows, even windows the the third |
236 | // circular buffer is one larger then the first two circular buffers. |
237 | // |
238 | // sum2_n+2 = sum2_n+1 - buffer2[i]; |
239 | // buffer2[i] = sum1; |
240 | // sum1_n+2 = sum1_n+1 - buffer1[i]; |
241 | // buffer1[i] = sum0; |
242 | // sum0_n+2 = sum0_n+1 - buffer0[i]; |
243 | // buffer0[i] = leading edge |
244 | // |
245 | // This is all encapsulated in the processValue function below. |
246 | // |
247 | using Pass0And1 = Sk4u[2]; |
248 | // The would be dLeft parameter is assumed to be 0. |
249 | static void blur_one_direction(Sk4u* buffer, int window, |
250 | int srcLeft, int srcRight, int dstRight, |
251 | const uint32_t* src, int srcXStride, int srcYStride, int srcH, |
252 | uint32_t* dst, int dstXStride, int dstYStride) { |
253 | |
254 | // The circular buffers are one less than the window. |
255 | auto pass0Count = window - 1, |
256 | pass1Count = window - 1, |
257 | pass2Count = (window & 1) == 1 ? window - 1 : window; |
258 | |
259 | Pass0And1* buffer01Start = (Pass0And1*)buffer; |
260 | Sk4u* buffer2Start = buffer + pass0Count + pass1Count; |
261 | Pass0And1* buffer01End = (Pass0And1*)buffer2Start; |
262 | Sk4u* buffer2End = buffer2Start + pass2Count; |
263 | |
264 | // If the window is odd then the divisor is just window ^ 3 otherwise, |
265 | // it is window * window * (window + 1) = window ^ 3 + window ^ 2; |
266 | auto window2 = window * window; |
267 | auto window3 = window2 * window; |
268 | auto divisor = (window & 1) == 1 ? window3 : window3 + window2; |
269 | |
270 | // NB the sums in the blur code use the following technique to avoid |
271 | // adding 1/2 to round the divide. |
272 | // |
273 | // Sum/d + 1/2 == (Sum + h) / d |
274 | // Sum + d(1/2) == Sum + h |
275 | // h == (1/2)d |
276 | // |
277 | // But the d/2 it self should be rounded. |
278 | // h == d/2 + 1/2 == (d + 1) / 2 |
279 | // |
280 | // weight = 1 / d * 2 ^ 32 |
281 | auto weight = static_cast<uint32_t>(round(1.0 / divisor * (1ull << 32))); |
282 | auto half = static_cast<uint32_t>((divisor + 1) / 2); |
283 | |
284 | auto border = calculate_border(window); |
285 | |
286 | // Calculate the start and end of the source pixels with respect to the destination start. |
287 | auto srcStart = srcLeft - border, |
288 | srcEnd = srcRight - border, |
289 | dstEnd = dstRight; |
290 | |
291 | for (auto y = 0; y < srcH; y++) { |
292 | auto buffer01Cursor = buffer01Start; |
293 | auto buffer2Cursor = buffer2Start; |
294 | |
295 | Sk4u sum0{0u}; |
296 | Sk4u sum1{0u}; |
297 | Sk4u sum2{half}; |
298 | |
299 | sk_bzero(buffer01Start, (buffer2End - (Sk4u *) (buffer01Start)) * sizeof(*buffer2Start)); |
300 | |
301 | // Given an expanded input pixel, move the window ahead using the leadingEdge value. |
302 | auto processValue = [&](const Sk4u& leadingEdge) -> Sk4u { |
303 | sum0 += leadingEdge; |
304 | sum1 += sum0; |
305 | sum2 += sum1; |
306 | |
307 | Sk4u value = sum2.mulHi(weight); |
308 | |
309 | sum2 -= *buffer2Cursor; |
310 | *buffer2Cursor = sum1; |
311 | buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : buffer2Start; |
312 | |
313 | sum1 -= (*buffer01Cursor)[1]; |
314 | (*buffer01Cursor)[1] = sum0; |
315 | sum0 -= (*buffer01Cursor)[0]; |
316 | (*buffer01Cursor)[0] = leadingEdge; |
317 | buffer01Cursor = |
318 | (buffer01Cursor + 1) < buffer01End ? buffer01Cursor + 1 : buffer01Start; |
319 | |
320 | return value; |
321 | }; |
322 | |
323 | auto srcIdx = srcStart; |
324 | auto dstIdx = 0; |
325 | const uint32_t* srcCursor = src; |
326 | uint32_t* dstCursor = dst; |
327 | |
328 | // The destination pixels are not effected by the src pixels, |
329 | // change to zero as per the spec. |
330 | // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro |
331 | while (dstIdx < srcIdx) { |
332 | *dstCursor = 0; |
333 | dstCursor += dstXStride; |
334 | SK_PREFETCH(dstCursor); |
335 | dstIdx++; |
336 | } |
337 | |
338 | // The edge of the source is before the edge of the destination. Calculate the sums for |
339 | // the pixels before the start of the destination. |
340 | while (dstIdx > srcIdx) { |
341 | Sk4u leadingEdge = srcIdx < srcEnd ? SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)) : 0; |
342 | (void) processValue(leadingEdge); |
343 | srcCursor += srcXStride; |
344 | srcIdx++; |
345 | } |
346 | |
347 | // The dstIdx and srcIdx are in sync now; the code just uses the dstIdx for both now. |
348 | // Consume the source generating pixels to dst. |
349 | auto loopEnd = std::min(dstEnd, srcEnd); |
350 | while (dstIdx < loopEnd) { |
351 | Sk4u leadingEdge = SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)); |
352 | SkNx_cast<uint8_t>(processValue(leadingEdge)).store(dstCursor); |
353 | srcCursor += srcXStride; |
354 | dstCursor += dstXStride; |
355 | SK_PREFETCH(dstCursor); |
356 | dstIdx++; |
357 | } |
358 | |
359 | // The leading edge is beyond the end of the source. Assume that the pixels |
360 | // are now 0x0000 until the end of the destination. |
361 | loopEnd = dstEnd; |
362 | while (dstIdx < loopEnd) { |
363 | SkNx_cast<uint8_t>(processValue(0u)).store(dstCursor); |
364 | dstCursor += dstXStride; |
365 | SK_PREFETCH(dstCursor); |
366 | dstIdx++; |
367 | } |
368 | |
369 | src += srcYStride; |
370 | dst += dstYStride; |
371 | } |
372 | } |
373 | |
374 | static sk_sp<SkSpecialImage> copy_image_with_bounds( |
375 | const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input, |
376 | SkIRect srcBounds, SkIRect dstBounds) { |
377 | SkBitmap inputBM; |
378 | if (!input->getROPixels(&inputBM)) { |
379 | return nullptr; |
380 | } |
381 | |
382 | if (inputBM.colorType() != kN32_SkColorType) { |
383 | return nullptr; |
384 | } |
385 | |
386 | SkBitmap src; |
387 | inputBM.extractSubset(&src, srcBounds); |
388 | |
389 | // Make everything relative to the destination bounds. |
390 | srcBounds.offset(-dstBounds.x(), -dstBounds.y()); |
391 | dstBounds.offset(-dstBounds.x(), -dstBounds.y()); |
392 | |
393 | auto srcW = srcBounds.width(), |
394 | dstW = dstBounds.width(), |
395 | dstH = dstBounds.height(); |
396 | |
397 | SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType()); |
398 | |
399 | SkBitmap dst; |
400 | if (!dst.tryAllocPixels(dstInfo)) { |
401 | return nullptr; |
402 | } |
403 | |
404 | // There is no blurring to do, but we still need to copy the source while accounting for the |
405 | // dstBounds. Remember that the src was intersected with the dst. |
406 | int y = 0; |
407 | size_t dstWBytes = dstW * sizeof(uint32_t); |
408 | for (;y < srcBounds.top(); y++) { |
409 | sk_bzero(dst.getAddr32(0, y), dstWBytes); |
410 | } |
411 | |
412 | for (;y < srcBounds.bottom(); y++) { |
413 | int x = 0; |
414 | uint32_t* dstPtr = dst.getAddr32(0, y); |
415 | for (;x < srcBounds.left(); x++) { |
416 | *dstPtr++ = 0; |
417 | } |
418 | |
419 | memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()), |
420 | srcW * sizeof(uint32_t)); |
421 | |
422 | dstPtr += srcW; |
423 | x += srcW; |
424 | |
425 | for (;x < dstBounds.right(); x++) { |
426 | *dstPtr++ = 0; |
427 | } |
428 | } |
429 | |
430 | for (;y < dstBounds.bottom(); y++) { |
431 | sk_bzero(dst.getAddr32(0, y), dstWBytes); |
432 | } |
433 | |
434 | return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), |
435 | dstBounds.height()), |
436 | dst, ctx.surfaceProps()); |
437 | } |
438 | |
439 | // TODO: Implement CPU backend for different fTileMode. |
440 | static sk_sp<SkSpecialImage> cpu_blur( |
441 | const SkImageFilter_Base::Context& ctx, |
442 | SkVector sigma, const sk_sp<SkSpecialImage> &input, |
443 | SkIRect srcBounds, SkIRect dstBounds) { |
444 | auto windowW = calculate_window(sigma.x()), |
445 | windowH = calculate_window(sigma.y()); |
446 | |
447 | if (windowW <= 1 && windowH <= 1) { |
448 | return copy_image_with_bounds(ctx, input, srcBounds, dstBounds); |
449 | } |
450 | |
451 | SkBitmap inputBM; |
452 | |
453 | if (!input->getROPixels(&inputBM)) { |
454 | return nullptr; |
455 | } |
456 | |
457 | if (inputBM.colorType() != kN32_SkColorType) { |
458 | return nullptr; |
459 | } |
460 | |
461 | SkBitmap src; |
462 | inputBM.extractSubset(&src, srcBounds); |
463 | |
464 | // Make everything relative to the destination bounds. |
465 | srcBounds.offset(-dstBounds.x(), -dstBounds.y()); |
466 | dstBounds.offset(-dstBounds.x(), -dstBounds.y()); |
467 | |
468 | auto srcW = srcBounds.width(), |
469 | srcH = srcBounds.height(), |
470 | dstW = dstBounds.width(), |
471 | dstH = dstBounds.height(); |
472 | |
473 | SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH); |
474 | |
475 | SkBitmap dst; |
476 | if (!dst.tryAllocPixels(dstInfo)) { |
477 | return nullptr; |
478 | } |
479 | |
480 | auto bufferSizeW = calculate_buffer(windowW), |
481 | bufferSizeH = calculate_buffer(windowH); |
482 | |
483 | // The amount 1024 is enough for buffers up to 10 sigma. The tmp bitmap will be |
484 | // allocated on the heap. |
485 | SkSTArenaAlloc<1024> alloc; |
486 | Sk4u* buffer = alloc.makeArrayDefault<Sk4u>(std::max(bufferSizeW, bufferSizeH)); |
487 | |
488 | // Basic Plan: The three cases to handle |
489 | // * Horizontal and Vertical - blur horizontally while copying values from the source to |
490 | // the destination. Then, do an in-place vertical blur. |
491 | // * Horizontal only - blur horizontally copying values from the source to the destination. |
492 | // * Vertical only - blur vertically copying values from the source to the destination. |
493 | |
494 | // Default to vertical only blur case. If a horizontal blur is needed, then these values |
495 | // will be adjusted while doing the horizontal blur. |
496 | auto intermediateSrc = static_cast<uint32_t *>(src.getPixels()); |
497 | auto intermediateRowBytesAsPixels = src.rowBytesAsPixels(); |
498 | auto intermediateWidth = srcW; |
499 | |
500 | // Because the border is calculated before the fork of the GPU/CPU path. The border is |
501 | // the maximum of the two rendering methods. In the case where sigma is zero, then the |
502 | // src and dst left values are the same. If sigma is small resulting in a window size of |
503 | // 1, then border calculations add some pixels which will always be zero. Inset the |
504 | // destination by those zero pixels. This case is very rare. |
505 | auto intermediateDst = dst.getAddr32(srcBounds.left(), 0); |
506 | |
507 | // The following code is executed very rarely, I have never seen it in a real web |
508 | // page. If sigma is small but not zero then shared GPU/CPU border calculation |
509 | // code adds extra pixels for the border. Just clear everything to clear those pixels. |
510 | // This solution is overkill, but very simple. |
511 | if (windowW == 1 || windowH == 1) { |
512 | dst.eraseColor(0); |
513 | } |
514 | |
515 | if (windowW > 1) { |
516 | // Make int64 to avoid overflow in multiplication below. |
517 | int64_t shift = srcBounds.top() - dstBounds.top(); |
518 | |
519 | // For the horizontal blur, starts part way down in anticipation of the vertical blur. |
520 | // For a vertical sigma of zero shift should be zero. But, for small sigma, |
521 | // shift may be > 0 but the vertical window could be 1. |
522 | intermediateSrc = static_cast<uint32_t *>(dst.getPixels()) |
523 | + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0); |
524 | intermediateRowBytesAsPixels = dst.rowBytesAsPixels(); |
525 | intermediateWidth = dstW; |
526 | intermediateDst = static_cast<uint32_t *>(dst.getPixels()); |
527 | |
528 | blur_one_direction( |
529 | buffer, windowW, |
530 | srcBounds.left(), srcBounds.right(), dstBounds.right(), |
531 | static_cast<uint32_t *>(src.getPixels()), 1, src.rowBytesAsPixels(), srcH, |
532 | intermediateSrc, 1, intermediateRowBytesAsPixels); |
533 | } |
534 | |
535 | if (windowH > 1) { |
536 | blur_one_direction( |
537 | buffer, windowH, |
538 | srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(), |
539 | intermediateSrc, intermediateRowBytesAsPixels, 1, intermediateWidth, |
540 | intermediateDst, dst.rowBytesAsPixels(), 1); |
541 | } |
542 | |
543 | return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), |
544 | dstBounds.height()), |
545 | dst, ctx.surfaceProps()); |
546 | } |
547 | |
548 | // This rather arbitrary-looking value results in a maximum box blur kernel size |
549 | // of 1000 pixels on the raster path, which matches the WebKit and Firefox |
550 | // implementations. Since the GPU path does not compute a box blur, putting |
551 | // the limit on sigma ensures consistent behaviour between the GPU and |
552 | // raster paths. |
553 | #define MAX_SIGMA SkIntToScalar(532) |
554 | |
555 | static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) { |
556 | SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height()); |
557 | ctm.mapVectors(&sigma, 1); |
558 | sigma.fX = std::min(SkScalarAbs(sigma.fX), MAX_SIGMA); |
559 | sigma.fY = std::min(SkScalarAbs(sigma.fY), MAX_SIGMA); |
560 | return sigma; |
561 | } |
562 | |
563 | sk_sp<SkSpecialImage> SkBlurImageFilterImpl::onFilterImage(const Context& ctx, |
564 | SkIPoint* offset) const { |
565 | SkIPoint inputOffset = SkIPoint::Make(0, 0); |
566 | |
567 | sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset)); |
568 | if (!input) { |
569 | return nullptr; |
570 | } |
571 | |
572 | SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY, |
573 | input->width(), input->height()); |
574 | |
575 | // Calculate the destination bounds. |
576 | SkIRect dstBounds; |
577 | if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) { |
578 | return nullptr; |
579 | } |
580 | if (!inputBounds.intersect(dstBounds)) { |
581 | return nullptr; |
582 | } |
583 | |
584 | // Save the offset in preparation to make all rectangles relative to the inputOffset. |
585 | SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop); |
586 | |
587 | // Make all bounds relative to the inputOffset. |
588 | inputBounds.offset(-inputOffset); |
589 | dstBounds.offset(-inputOffset); |
590 | |
591 | SkVector sigma = map_sigma(fSigma, ctx.ctm()); |
592 | if (sigma.x() < 0 || sigma.y() < 0) { |
593 | return nullptr; |
594 | } |
595 | |
596 | sk_sp<SkSpecialImage> result; |
597 | #if SK_SUPPORT_GPU |
598 | if (ctx.gpuBacked()) { |
599 | // Ensure the input is in the destination's gamut. This saves us from having to do the |
600 | // xform during the filter itself. |
601 | input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace()); |
602 | result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset, |
603 | &resultOffset); |
604 | } else |
605 | #endif |
606 | { |
607 | // NB 135 is the largest sigma that will not cause a buffer full of 255 mask values to overflow |
608 | // using the Gauss filter. It also limits the size of buffers used hold intermediate values. The |
609 | // additional + 1 added to window represents adding one more leading element before subtracting the |
610 | // trailing element. |
611 | // Explanation of maximums: |
612 | // sum0 = (window + 1) * 255 |
613 | // sum1 = (window + 1) * sum0 -> (window + 1) * (window + 1) * 255 |
614 | // sum2 = (window + 1) * sum1 -> (window + 1) * (window + 1) * (window + 1) * 255 -> window^3 * 255 |
615 | // |
616 | // The value (window + 1)^3 * 255 must fit in a uint32_t. So, |
617 | // (window + 1)^3 * 255 < 2^32. window = 255. |
618 | // |
619 | // window = floor(sigma * 3 * sqrt(2 * kPi) / 4) |
620 | // For window <= 255, the largest value for sigma is 135. |
621 | sigma.fX = SkTPin(sigma.fX, 0.0f, 135.0f); |
622 | sigma.fY = SkTPin(sigma.fY, 0.0f, 135.0f); |
623 | |
624 | result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds); |
625 | } |
626 | |
627 | // Return the resultOffset if the blur succeeded. |
628 | if (result != nullptr) { |
629 | *offset = resultOffset; |
630 | } |
631 | return result; |
632 | } |
633 | |
634 | #if SK_SUPPORT_GPU |
635 | sk_sp<SkSpecialImage> SkBlurImageFilterImpl::gpuFilter( |
636 | const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds, |
637 | SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const { |
638 | if (0 == sigma.x() && 0 == sigma.y()) { |
639 | offset->fX = inputBounds.x() + inputOffset.fX; |
640 | offset->fY = inputBounds.y() + inputOffset.fY; |
641 | return input->makeSubset(inputBounds); |
642 | } |
643 | |
644 | auto context = ctx.getContext(); |
645 | |
646 | GrSurfaceProxyView inputView = input->view(context); |
647 | if (!inputView.proxy()) { |
648 | return nullptr; |
649 | } |
650 | SkASSERT(inputView.asTextureProxy()); |
651 | |
652 | // TODO (michaelludwig) - The color space choice is odd, should it just be ctx.refColorSpace()? |
653 | dstBounds.offset(input->subset().topLeft()); |
654 | inputBounds.offset(input->subset().topLeft()); |
655 | auto renderTargetContext = SkGpuBlurUtils::GaussianBlur( |
656 | context, |
657 | std::move(inputView), |
658 | SkColorTypeToGrColorType(input->colorType()), |
659 | input->alphaType(), |
660 | ctx.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr, |
661 | dstBounds, |
662 | inputBounds, |
663 | sigma.x(), |
664 | sigma.y(), |
665 | fTileMode); |
666 | if (!renderTargetContext) { |
667 | return nullptr; |
668 | } |
669 | |
670 | return SkSpecialImage::MakeDeferredFromGpu(context, |
671 | SkIRect::MakeSize(dstBounds.size()), |
672 | kNeedNewImageUniqueID_SpecialImage, |
673 | renderTargetContext->readSurfaceView(), |
674 | renderTargetContext->colorInfo().colorType(), |
675 | sk_ref_sp(input->getColorSpace()), |
676 | ctx.surfaceProps()); |
677 | } |
678 | #endif |
679 | |
680 | SkRect SkBlurImageFilterImpl::computeFastBounds(const SkRect& src) const { |
681 | SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src; |
682 | bounds.outset(fSigma.width() * 3, fSigma.height() * 3); |
683 | return bounds; |
684 | } |
685 | |
686 | SkIRect SkBlurImageFilterImpl::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, |
687 | MapDirection, const SkIRect* inputRect) const { |
688 | SkVector sigma = map_sigma(fSigma, ctm); |
689 | return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3)); |
690 | } |
691 | |