| 1 | /* |
| 2 | * Copyright 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "include/effects/SkBlurImageFilter.h" |
| 9 | |
| 10 | #include <algorithm> |
| 11 | |
| 12 | #include "include/core/SkBitmap.h" |
| 13 | #include "include/core/SkTileMode.h" |
| 14 | #include "include/private/SkColorData.h" |
| 15 | #include "include/private/SkNx.h" |
| 16 | #include "include/private/SkTFitsIn.h" |
| 17 | #include "src/core/SkArenaAlloc.h" |
| 18 | #include "src/core/SkAutoPixmapStorage.h" |
| 19 | #include "src/core/SkGpuBlurUtils.h" |
| 20 | #include "src/core/SkImageFilter_Base.h" |
| 21 | #include "src/core/SkOpts.h" |
| 22 | #include "src/core/SkReadBuffer.h" |
| 23 | #include "src/core/SkSpecialImage.h" |
| 24 | #include "src/core/SkWriteBuffer.h" |
| 25 | |
| 26 | #if SK_SUPPORT_GPU |
| 27 | #include "include/gpu/GrContext.h" |
| 28 | #include "src/gpu/GrTextureProxy.h" |
| 29 | #include "src/gpu/SkGr.h" |
| 30 | #endif |
| 31 | |
| 32 | namespace { |
| 33 | |
| 34 | class SkBlurImageFilterImpl final : public SkImageFilter_Base { |
| 35 | public: |
| 36 | SkBlurImageFilterImpl(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, |
| 37 | sk_sp<SkImageFilter> input, const CropRect* cropRect) |
| 38 | : INHERITED(&input, 1, cropRect) |
| 39 | , fSigma{sigmaX, sigmaY} |
| 40 | , fTileMode(tileMode) {} |
| 41 | |
| 42 | SkRect computeFastBounds(const SkRect&) const override; |
| 43 | |
| 44 | protected: |
| 45 | void flatten(SkWriteBuffer&) const override; |
| 46 | sk_sp<SkSpecialImage> onFilterImage(const Context&, SkIPoint* offset) const override; |
| 47 | SkIRect onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, |
| 48 | MapDirection, const SkIRect* inputRect) const override; |
| 49 | |
| 50 | private: |
| 51 | friend void SkBlurImageFilter::RegisterFlattenables(); |
| 52 | SK_FLATTENABLE_HOOKS(SkBlurImageFilterImpl) |
| 53 | |
| 54 | #if SK_SUPPORT_GPU |
| 55 | sk_sp<SkSpecialImage> gpuFilter( |
| 56 | const Context& ctx, SkVector sigma, |
| 57 | const sk_sp<SkSpecialImage> &input, |
| 58 | SkIRect inputBounds, SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const; |
| 59 | #endif |
| 60 | |
| 61 | SkSize fSigma; |
| 62 | SkTileMode fTileMode; |
| 63 | |
| 64 | typedef SkImageFilter_Base INHERITED; |
| 65 | }; |
| 66 | |
| 67 | } // end namespace |
| 68 | |
| 69 | static SkTileMode to_sktilemode(SkBlurImageFilter::TileMode tileMode) { |
| 70 | switch(tileMode) { |
| 71 | case SkBlurImageFilter::kClamp_TileMode: |
| 72 | return SkTileMode::kClamp; |
| 73 | case SkBlurImageFilter::kRepeat_TileMode: |
| 74 | return SkTileMode::kRepeat; |
| 75 | case SkBlurImageFilter::kClampToBlack_TileMode: |
| 76 | // Fall through |
| 77 | default: |
| 78 | return SkTileMode::kDecal; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | sk_sp<SkImageFilter> SkBlurImageFilter::Make(SkScalar sigmaX, SkScalar sigmaY, |
| 83 | sk_sp<SkImageFilter> input, |
| 84 | const SkImageFilter::CropRect* cropRect, |
| 85 | TileMode tileMode) { |
| 86 | return Make(sigmaX, sigmaY, to_sktilemode(tileMode), std::move(input), cropRect); |
| 87 | } |
| 88 | |
| 89 | sk_sp<SkImageFilter> SkBlurImageFilter::Make(SkScalar sigmaX, SkScalar sigmaY, SkTileMode tileMode, |
| 90 | sk_sp<SkImageFilter> input, |
| 91 | const SkImageFilter::CropRect* cropRect) { |
| 92 | if (sigmaX < SK_ScalarNearlyZero && sigmaY < SK_ScalarNearlyZero && !cropRect) { |
| 93 | return input; |
| 94 | } |
| 95 | return sk_sp<SkImageFilter>( |
| 96 | new SkBlurImageFilterImpl(sigmaX, sigmaY, tileMode, input, cropRect)); |
| 97 | } |
| 98 | |
| 99 | void SkBlurImageFilter::RegisterFlattenables() { SK_REGISTER_FLATTENABLE(SkBlurImageFilterImpl); } |
| 100 | |
| 101 | /////////////////////////////////////////////////////////////////////////////// |
| 102 | |
| 103 | sk_sp<SkFlattenable> SkBlurImageFilterImpl::CreateProc(SkReadBuffer& buffer) { |
| 104 | SK_IMAGEFILTER_UNFLATTEN_COMMON(common, 1); |
| 105 | SkScalar sigmaX = buffer.readScalar(); |
| 106 | SkScalar sigmaY = buffer.readScalar(); |
| 107 | SkTileMode tileMode; |
| 108 | if (buffer.isVersionLT(SkPicturePriv::kTileModeInBlurImageFilter_Version)) { |
| 109 | tileMode = SkTileMode::kDecal; |
| 110 | } else if (buffer.isVersionLT(SkPicturePriv::kCleanupImageFilterEnums_Version)) { |
| 111 | tileMode = to_sktilemode(buffer.read32LE(SkBlurImageFilter::kLast_TileMode)); |
| 112 | } else { |
| 113 | tileMode = buffer.read32LE(SkTileMode::kLastTileMode); |
| 114 | } |
| 115 | |
| 116 | static_assert(SkBlurImageFilter::kLast_TileMode == 2, "CreateProc" ); |
| 117 | |
| 118 | return SkBlurImageFilter::Make( |
| 119 | sigmaX, sigmaY, tileMode, common.getInput(0), &common.cropRect()); |
| 120 | } |
| 121 | |
| 122 | void SkBlurImageFilterImpl::flatten(SkWriteBuffer& buffer) const { |
| 123 | this->INHERITED::flatten(buffer); |
| 124 | buffer.writeScalar(fSigma.fWidth); |
| 125 | buffer.writeScalar(fSigma.fHeight); |
| 126 | |
| 127 | // Fuzzer sanity checks |
| 128 | static_assert((int) SkTileMode::kLastTileMode == 3 && SkBlurImageFilter::kLast_TileMode == 2, |
| 129 | "SkBlurImageFilterImpl::flatten" ); |
| 130 | SkASSERT(fTileMode <= SkTileMode::kLastTileMode); |
| 131 | buffer.writeInt(static_cast<int>(fTileMode)); |
| 132 | } |
| 133 | |
| 134 | // This is defined by the SVG spec: |
| 135 | // https://drafts.fxtf.org/filter-effects/#feGaussianBlurElement |
| 136 | static int calculate_window(double sigma) { |
| 137 | // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow |
| 138 | // using the Gauss filter. It also limits the size of buffers used hold intermediate values. |
| 139 | // Explanation of maximums: |
| 140 | // sum0 = window * 255 |
| 141 | // sum1 = window * sum0 -> window * window * 255 |
| 142 | // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255 |
| 143 | // |
| 144 | // The value window^3 * 255 must fit in a uint32_t. So, |
| 145 | // window^3 < 2^32. window = 255. |
| 146 | // |
| 147 | // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5) |
| 148 | // For window <= 255, the largest value for sigma is 136. |
| 149 | sigma = SkTPin(sigma, 0.0, 136.0); |
| 150 | auto possibleWindow = static_cast<int>(floor(sigma * 3 * sqrt(2 * SK_DoublePI) / 4 + 0.5)); |
| 151 | return std::max(1, possibleWindow); |
| 152 | } |
| 153 | |
| 154 | // Calculating the border is tricky. The border is the distance in pixels between the first dst |
| 155 | // pixel and the first src pixel (or the last src pixel and the last dst pixel). |
| 156 | // I will go through the odd case which is simpler, and then through the even case. Given a |
| 157 | // stack of filters seven wide for the odd case of three passes. |
| 158 | // |
| 159 | // S |
| 160 | // aaaAaaa |
| 161 | // bbbBbbb |
| 162 | // cccCccc |
| 163 | // D |
| 164 | // |
| 165 | // The furthest changed pixel is when the filters are in the following configuration. |
| 166 | // |
| 167 | // S |
| 168 | // aaaAaaa |
| 169 | // bbbBbbb |
| 170 | // cccCccc |
| 171 | // D |
| 172 | // |
| 173 | // The A pixel is calculated using the value S, the B uses A, and the C uses B, and |
| 174 | // finally D is C. So, with a window size of seven the border is nine. In the odd case, the |
| 175 | // border is 3*((window - 1)/2). |
| 176 | // |
| 177 | // For even cases the filter stack is more complicated. The spec specifies two passes |
| 178 | // of even filters and a final pass of odd filters. A stack for a width of six looks like |
| 179 | // this. |
| 180 | // |
| 181 | // S |
| 182 | // aaaAaa |
| 183 | // bbBbbb |
| 184 | // cccCccc |
| 185 | // D |
| 186 | // |
| 187 | // The furthest pixel looks like this. |
| 188 | // |
| 189 | // S |
| 190 | // aaaAaa |
| 191 | // bbBbbb |
| 192 | // cccCccc |
| 193 | // D |
| 194 | // |
| 195 | // For a window of six, the border value is eight. In the even case the border is 3 * |
| 196 | // (window/2) - 1. |
| 197 | static int calculate_border(int window) { |
| 198 | return (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1; |
| 199 | } |
| 200 | |
| 201 | static int calculate_buffer(int window) { |
| 202 | int bufferSize = window - 1; |
| 203 | return (window & 1) == 1 ? 3 * bufferSize : 3 * bufferSize + 1; |
| 204 | } |
| 205 | |
| 206 | // blur_one_direction implements the common three pass box filter approximation of Gaussian blur, |
| 207 | // but combines all three passes into a single pass. This approach is facilitated by three circular |
| 208 | // buffers the width of the window which track values for trailing edges of each of the three |
| 209 | // passes. This allows the algorithm to use more precision in the calculation because the values |
| 210 | // are not rounded each pass. And this implementation also avoids a trap that's easy to fall |
| 211 | // into resulting in blending in too many zeroes near the edge. |
| 212 | // |
| 213 | // In general, a window sum has the form: |
| 214 | // sum_n+1 = sum_n + leading_edge - trailing_edge. |
| 215 | // If instead we do the subtraction at the end of the previous iteration, we can just |
| 216 | // calculate the sums instead of having to do the subtractions too. |
| 217 | // |
| 218 | // In previous iteration: |
| 219 | // sum_n+1 = sum_n - trailing_edge. |
| 220 | // |
| 221 | // In this iteration: |
| 222 | // sum_n+1 = sum_n + leading_edge. |
| 223 | // |
| 224 | // Now we can stack all three sums and do them at once. Sum0 gets its leading edge from the |
| 225 | // actual data. Sum1's leading edge is just Sum0, and Sum2's leading edge is Sum1. So, doing the |
| 226 | // three passes at the same time has the form: |
| 227 | // |
| 228 | // sum0_n+1 = sum0_n + leading edge |
| 229 | // sum1_n+1 = sum1_n + sum0_n+1 |
| 230 | // sum2_n+1 = sum2_n + sum1_n+1 |
| 231 | // |
| 232 | // sum2_n+1 / window^3 is the new value of the destination pixel. |
| 233 | // |
| 234 | // Reduce the sums by the trailing edges which were stored in the circular buffers, |
| 235 | // for the next go around. This is the case for odd sized windows, even windows the the third |
| 236 | // circular buffer is one larger then the first two circular buffers. |
| 237 | // |
| 238 | // sum2_n+2 = sum2_n+1 - buffer2[i]; |
| 239 | // buffer2[i] = sum1; |
| 240 | // sum1_n+2 = sum1_n+1 - buffer1[i]; |
| 241 | // buffer1[i] = sum0; |
| 242 | // sum0_n+2 = sum0_n+1 - buffer0[i]; |
| 243 | // buffer0[i] = leading edge |
| 244 | // |
| 245 | // This is all encapsulated in the processValue function below. |
| 246 | // |
| 247 | using Pass0And1 = Sk4u[2]; |
| 248 | // The would be dLeft parameter is assumed to be 0. |
| 249 | static void blur_one_direction(Sk4u* buffer, int window, |
| 250 | int srcLeft, int srcRight, int dstRight, |
| 251 | const uint32_t* src, int srcXStride, int srcYStride, int srcH, |
| 252 | uint32_t* dst, int dstXStride, int dstYStride) { |
| 253 | |
| 254 | // The circular buffers are one less than the window. |
| 255 | auto pass0Count = window - 1, |
| 256 | pass1Count = window - 1, |
| 257 | pass2Count = (window & 1) == 1 ? window - 1 : window; |
| 258 | |
| 259 | Pass0And1* buffer01Start = (Pass0And1*)buffer; |
| 260 | Sk4u* buffer2Start = buffer + pass0Count + pass1Count; |
| 261 | Pass0And1* buffer01End = (Pass0And1*)buffer2Start; |
| 262 | Sk4u* buffer2End = buffer2Start + pass2Count; |
| 263 | |
| 264 | // If the window is odd then the divisor is just window ^ 3 otherwise, |
| 265 | // it is window * window * (window + 1) = window ^ 3 + window ^ 2; |
| 266 | auto window2 = window * window; |
| 267 | auto window3 = window2 * window; |
| 268 | auto divisor = (window & 1) == 1 ? window3 : window3 + window2; |
| 269 | |
| 270 | // NB the sums in the blur code use the following technique to avoid |
| 271 | // adding 1/2 to round the divide. |
| 272 | // |
| 273 | // Sum/d + 1/2 == (Sum + h) / d |
| 274 | // Sum + d(1/2) == Sum + h |
| 275 | // h == (1/2)d |
| 276 | // |
| 277 | // But the d/2 it self should be rounded. |
| 278 | // h == d/2 + 1/2 == (d + 1) / 2 |
| 279 | // |
| 280 | // weight = 1 / d * 2 ^ 32 |
| 281 | auto weight = static_cast<uint32_t>(round(1.0 / divisor * (1ull << 32))); |
| 282 | auto half = static_cast<uint32_t>((divisor + 1) / 2); |
| 283 | |
| 284 | auto border = calculate_border(window); |
| 285 | |
| 286 | // Calculate the start and end of the source pixels with respect to the destination start. |
| 287 | auto srcStart = srcLeft - border, |
| 288 | srcEnd = srcRight - border, |
| 289 | dstEnd = dstRight; |
| 290 | |
| 291 | for (auto y = 0; y < srcH; y++) { |
| 292 | auto buffer01Cursor = buffer01Start; |
| 293 | auto buffer2Cursor = buffer2Start; |
| 294 | |
| 295 | Sk4u sum0{0u}; |
| 296 | Sk4u sum1{0u}; |
| 297 | Sk4u sum2{half}; |
| 298 | |
| 299 | sk_bzero(buffer01Start, (buffer2End - (Sk4u *) (buffer01Start)) * sizeof(*buffer2Start)); |
| 300 | |
| 301 | // Given an expanded input pixel, move the window ahead using the leadingEdge value. |
| 302 | auto processValue = [&](const Sk4u& leadingEdge) -> Sk4u { |
| 303 | sum0 += leadingEdge; |
| 304 | sum1 += sum0; |
| 305 | sum2 += sum1; |
| 306 | |
| 307 | Sk4u value = sum2.mulHi(weight); |
| 308 | |
| 309 | sum2 -= *buffer2Cursor; |
| 310 | *buffer2Cursor = sum1; |
| 311 | buffer2Cursor = (buffer2Cursor + 1) < buffer2End ? buffer2Cursor + 1 : buffer2Start; |
| 312 | |
| 313 | sum1 -= (*buffer01Cursor)[1]; |
| 314 | (*buffer01Cursor)[1] = sum0; |
| 315 | sum0 -= (*buffer01Cursor)[0]; |
| 316 | (*buffer01Cursor)[0] = leadingEdge; |
| 317 | buffer01Cursor = |
| 318 | (buffer01Cursor + 1) < buffer01End ? buffer01Cursor + 1 : buffer01Start; |
| 319 | |
| 320 | return value; |
| 321 | }; |
| 322 | |
| 323 | auto srcIdx = srcStart; |
| 324 | auto dstIdx = 0; |
| 325 | const uint32_t* srcCursor = src; |
| 326 | uint32_t* dstCursor = dst; |
| 327 | |
| 328 | // The destination pixels are not effected by the src pixels, |
| 329 | // change to zero as per the spec. |
| 330 | // https://drafts.fxtf.org/filter-effects/#FilterPrimitivesOverviewIntro |
| 331 | while (dstIdx < srcIdx) { |
| 332 | *dstCursor = 0; |
| 333 | dstCursor += dstXStride; |
| 334 | SK_PREFETCH(dstCursor); |
| 335 | dstIdx++; |
| 336 | } |
| 337 | |
| 338 | // The edge of the source is before the edge of the destination. Calculate the sums for |
| 339 | // the pixels before the start of the destination. |
| 340 | while (dstIdx > srcIdx) { |
| 341 | Sk4u leadingEdge = srcIdx < srcEnd ? SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)) : 0; |
| 342 | (void) processValue(leadingEdge); |
| 343 | srcCursor += srcXStride; |
| 344 | srcIdx++; |
| 345 | } |
| 346 | |
| 347 | // The dstIdx and srcIdx are in sync now; the code just uses the dstIdx for both now. |
| 348 | // Consume the source generating pixels to dst. |
| 349 | auto loopEnd = std::min(dstEnd, srcEnd); |
| 350 | while (dstIdx < loopEnd) { |
| 351 | Sk4u leadingEdge = SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)); |
| 352 | SkNx_cast<uint8_t>(processValue(leadingEdge)).store(dstCursor); |
| 353 | srcCursor += srcXStride; |
| 354 | dstCursor += dstXStride; |
| 355 | SK_PREFETCH(dstCursor); |
| 356 | dstIdx++; |
| 357 | } |
| 358 | |
| 359 | // The leading edge is beyond the end of the source. Assume that the pixels |
| 360 | // are now 0x0000 until the end of the destination. |
| 361 | loopEnd = dstEnd; |
| 362 | while (dstIdx < loopEnd) { |
| 363 | SkNx_cast<uint8_t>(processValue(0u)).store(dstCursor); |
| 364 | dstCursor += dstXStride; |
| 365 | SK_PREFETCH(dstCursor); |
| 366 | dstIdx++; |
| 367 | } |
| 368 | |
| 369 | src += srcYStride; |
| 370 | dst += dstYStride; |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | static sk_sp<SkSpecialImage> copy_image_with_bounds( |
| 375 | const SkImageFilter_Base::Context& ctx, const sk_sp<SkSpecialImage> &input, |
| 376 | SkIRect srcBounds, SkIRect dstBounds) { |
| 377 | SkBitmap inputBM; |
| 378 | if (!input->getROPixels(&inputBM)) { |
| 379 | return nullptr; |
| 380 | } |
| 381 | |
| 382 | if (inputBM.colorType() != kN32_SkColorType) { |
| 383 | return nullptr; |
| 384 | } |
| 385 | |
| 386 | SkBitmap src; |
| 387 | inputBM.extractSubset(&src, srcBounds); |
| 388 | |
| 389 | // Make everything relative to the destination bounds. |
| 390 | srcBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| 391 | dstBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| 392 | |
| 393 | auto srcW = srcBounds.width(), |
| 394 | dstW = dstBounds.width(), |
| 395 | dstH = dstBounds.height(); |
| 396 | |
| 397 | SkImageInfo dstInfo = SkImageInfo::Make(dstW, dstH, inputBM.colorType(), inputBM.alphaType()); |
| 398 | |
| 399 | SkBitmap dst; |
| 400 | if (!dst.tryAllocPixels(dstInfo)) { |
| 401 | return nullptr; |
| 402 | } |
| 403 | |
| 404 | // There is no blurring to do, but we still need to copy the source while accounting for the |
| 405 | // dstBounds. Remember that the src was intersected with the dst. |
| 406 | int y = 0; |
| 407 | size_t dstWBytes = dstW * sizeof(uint32_t); |
| 408 | for (;y < srcBounds.top(); y++) { |
| 409 | sk_bzero(dst.getAddr32(0, y), dstWBytes); |
| 410 | } |
| 411 | |
| 412 | for (;y < srcBounds.bottom(); y++) { |
| 413 | int x = 0; |
| 414 | uint32_t* dstPtr = dst.getAddr32(0, y); |
| 415 | for (;x < srcBounds.left(); x++) { |
| 416 | *dstPtr++ = 0; |
| 417 | } |
| 418 | |
| 419 | memcpy(dstPtr, src.getAddr32(x - srcBounds.left(), y - srcBounds.top()), |
| 420 | srcW * sizeof(uint32_t)); |
| 421 | |
| 422 | dstPtr += srcW; |
| 423 | x += srcW; |
| 424 | |
| 425 | for (;x < dstBounds.right(); x++) { |
| 426 | *dstPtr++ = 0; |
| 427 | } |
| 428 | } |
| 429 | |
| 430 | for (;y < dstBounds.bottom(); y++) { |
| 431 | sk_bzero(dst.getAddr32(0, y), dstWBytes); |
| 432 | } |
| 433 | |
| 434 | return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), |
| 435 | dstBounds.height()), |
| 436 | dst, ctx.surfaceProps()); |
| 437 | } |
| 438 | |
| 439 | // TODO: Implement CPU backend for different fTileMode. |
| 440 | static sk_sp<SkSpecialImage> cpu_blur( |
| 441 | const SkImageFilter_Base::Context& ctx, |
| 442 | SkVector sigma, const sk_sp<SkSpecialImage> &input, |
| 443 | SkIRect srcBounds, SkIRect dstBounds) { |
| 444 | auto windowW = calculate_window(sigma.x()), |
| 445 | windowH = calculate_window(sigma.y()); |
| 446 | |
| 447 | if (windowW <= 1 && windowH <= 1) { |
| 448 | return copy_image_with_bounds(ctx, input, srcBounds, dstBounds); |
| 449 | } |
| 450 | |
| 451 | SkBitmap inputBM; |
| 452 | |
| 453 | if (!input->getROPixels(&inputBM)) { |
| 454 | return nullptr; |
| 455 | } |
| 456 | |
| 457 | if (inputBM.colorType() != kN32_SkColorType) { |
| 458 | return nullptr; |
| 459 | } |
| 460 | |
| 461 | SkBitmap src; |
| 462 | inputBM.extractSubset(&src, srcBounds); |
| 463 | |
| 464 | // Make everything relative to the destination bounds. |
| 465 | srcBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| 466 | dstBounds.offset(-dstBounds.x(), -dstBounds.y()); |
| 467 | |
| 468 | auto srcW = srcBounds.width(), |
| 469 | srcH = srcBounds.height(), |
| 470 | dstW = dstBounds.width(), |
| 471 | dstH = dstBounds.height(); |
| 472 | |
| 473 | SkImageInfo dstInfo = inputBM.info().makeWH(dstW, dstH); |
| 474 | |
| 475 | SkBitmap dst; |
| 476 | if (!dst.tryAllocPixels(dstInfo)) { |
| 477 | return nullptr; |
| 478 | } |
| 479 | |
| 480 | auto bufferSizeW = calculate_buffer(windowW), |
| 481 | bufferSizeH = calculate_buffer(windowH); |
| 482 | |
| 483 | // The amount 1024 is enough for buffers up to 10 sigma. The tmp bitmap will be |
| 484 | // allocated on the heap. |
| 485 | SkSTArenaAlloc<1024> alloc; |
| 486 | Sk4u* buffer = alloc.makeArrayDefault<Sk4u>(std::max(bufferSizeW, bufferSizeH)); |
| 487 | |
| 488 | // Basic Plan: The three cases to handle |
| 489 | // * Horizontal and Vertical - blur horizontally while copying values from the source to |
| 490 | // the destination. Then, do an in-place vertical blur. |
| 491 | // * Horizontal only - blur horizontally copying values from the source to the destination. |
| 492 | // * Vertical only - blur vertically copying values from the source to the destination. |
| 493 | |
| 494 | // Default to vertical only blur case. If a horizontal blur is needed, then these values |
| 495 | // will be adjusted while doing the horizontal blur. |
| 496 | auto intermediateSrc = static_cast<uint32_t *>(src.getPixels()); |
| 497 | auto intermediateRowBytesAsPixels = src.rowBytesAsPixels(); |
| 498 | auto intermediateWidth = srcW; |
| 499 | |
| 500 | // Because the border is calculated before the fork of the GPU/CPU path. The border is |
| 501 | // the maximum of the two rendering methods. In the case where sigma is zero, then the |
| 502 | // src and dst left values are the same. If sigma is small resulting in a window size of |
| 503 | // 1, then border calculations add some pixels which will always be zero. Inset the |
| 504 | // destination by those zero pixels. This case is very rare. |
| 505 | auto intermediateDst = dst.getAddr32(srcBounds.left(), 0); |
| 506 | |
| 507 | // The following code is executed very rarely, I have never seen it in a real web |
| 508 | // page. If sigma is small but not zero then shared GPU/CPU border calculation |
| 509 | // code adds extra pixels for the border. Just clear everything to clear those pixels. |
| 510 | // This solution is overkill, but very simple. |
| 511 | if (windowW == 1 || windowH == 1) { |
| 512 | dst.eraseColor(0); |
| 513 | } |
| 514 | |
| 515 | if (windowW > 1) { |
| 516 | // Make int64 to avoid overflow in multiplication below. |
| 517 | int64_t shift = srcBounds.top() - dstBounds.top(); |
| 518 | |
| 519 | // For the horizontal blur, starts part way down in anticipation of the vertical blur. |
| 520 | // For a vertical sigma of zero shift should be zero. But, for small sigma, |
| 521 | // shift may be > 0 but the vertical window could be 1. |
| 522 | intermediateSrc = static_cast<uint32_t *>(dst.getPixels()) |
| 523 | + (shift > 0 ? shift * dst.rowBytesAsPixels() : 0); |
| 524 | intermediateRowBytesAsPixels = dst.rowBytesAsPixels(); |
| 525 | intermediateWidth = dstW; |
| 526 | intermediateDst = static_cast<uint32_t *>(dst.getPixels()); |
| 527 | |
| 528 | blur_one_direction( |
| 529 | buffer, windowW, |
| 530 | srcBounds.left(), srcBounds.right(), dstBounds.right(), |
| 531 | static_cast<uint32_t *>(src.getPixels()), 1, src.rowBytesAsPixels(), srcH, |
| 532 | intermediateSrc, 1, intermediateRowBytesAsPixels); |
| 533 | } |
| 534 | |
| 535 | if (windowH > 1) { |
| 536 | blur_one_direction( |
| 537 | buffer, windowH, |
| 538 | srcBounds.top(), srcBounds.bottom(), dstBounds.bottom(), |
| 539 | intermediateSrc, intermediateRowBytesAsPixels, 1, intermediateWidth, |
| 540 | intermediateDst, dst.rowBytesAsPixels(), 1); |
| 541 | } |
| 542 | |
| 543 | return SkSpecialImage::MakeFromRaster(SkIRect::MakeWH(dstBounds.width(), |
| 544 | dstBounds.height()), |
| 545 | dst, ctx.surfaceProps()); |
| 546 | } |
| 547 | |
| 548 | // This rather arbitrary-looking value results in a maximum box blur kernel size |
| 549 | // of 1000 pixels on the raster path, which matches the WebKit and Firefox |
| 550 | // implementations. Since the GPU path does not compute a box blur, putting |
| 551 | // the limit on sigma ensures consistent behaviour between the GPU and |
| 552 | // raster paths. |
| 553 | #define MAX_SIGMA SkIntToScalar(532) |
| 554 | |
| 555 | static SkVector map_sigma(const SkSize& localSigma, const SkMatrix& ctm) { |
| 556 | SkVector sigma = SkVector::Make(localSigma.width(), localSigma.height()); |
| 557 | ctm.mapVectors(&sigma, 1); |
| 558 | sigma.fX = std::min(SkScalarAbs(sigma.fX), MAX_SIGMA); |
| 559 | sigma.fY = std::min(SkScalarAbs(sigma.fY), MAX_SIGMA); |
| 560 | return sigma; |
| 561 | } |
| 562 | |
| 563 | sk_sp<SkSpecialImage> SkBlurImageFilterImpl::onFilterImage(const Context& ctx, |
| 564 | SkIPoint* offset) const { |
| 565 | SkIPoint inputOffset = SkIPoint::Make(0, 0); |
| 566 | |
| 567 | sk_sp<SkSpecialImage> input(this->filterInput(0, ctx, &inputOffset)); |
| 568 | if (!input) { |
| 569 | return nullptr; |
| 570 | } |
| 571 | |
| 572 | SkIRect inputBounds = SkIRect::MakeXYWH(inputOffset.fX, inputOffset.fY, |
| 573 | input->width(), input->height()); |
| 574 | |
| 575 | // Calculate the destination bounds. |
| 576 | SkIRect dstBounds; |
| 577 | if (!this->applyCropRect(this->mapContext(ctx), inputBounds, &dstBounds)) { |
| 578 | return nullptr; |
| 579 | } |
| 580 | if (!inputBounds.intersect(dstBounds)) { |
| 581 | return nullptr; |
| 582 | } |
| 583 | |
| 584 | // Save the offset in preparation to make all rectangles relative to the inputOffset. |
| 585 | SkIPoint resultOffset = SkIPoint::Make(dstBounds.fLeft, dstBounds.fTop); |
| 586 | |
| 587 | // Make all bounds relative to the inputOffset. |
| 588 | inputBounds.offset(-inputOffset); |
| 589 | dstBounds.offset(-inputOffset); |
| 590 | |
| 591 | SkVector sigma = map_sigma(fSigma, ctx.ctm()); |
| 592 | if (sigma.x() < 0 || sigma.y() < 0) { |
| 593 | return nullptr; |
| 594 | } |
| 595 | |
| 596 | sk_sp<SkSpecialImage> result; |
| 597 | #if SK_SUPPORT_GPU |
| 598 | if (ctx.gpuBacked()) { |
| 599 | // Ensure the input is in the destination's gamut. This saves us from having to do the |
| 600 | // xform during the filter itself. |
| 601 | input = ImageToColorSpace(input.get(), ctx.colorType(), ctx.colorSpace()); |
| 602 | result = this->gpuFilter(ctx, sigma, input, inputBounds, dstBounds, inputOffset, |
| 603 | &resultOffset); |
| 604 | } else |
| 605 | #endif |
| 606 | { |
| 607 | // NB 135 is the largest sigma that will not cause a buffer full of 255 mask values to overflow |
| 608 | // using the Gauss filter. It also limits the size of buffers used hold intermediate values. The |
| 609 | // additional + 1 added to window represents adding one more leading element before subtracting the |
| 610 | // trailing element. |
| 611 | // Explanation of maximums: |
| 612 | // sum0 = (window + 1) * 255 |
| 613 | // sum1 = (window + 1) * sum0 -> (window + 1) * (window + 1) * 255 |
| 614 | // sum2 = (window + 1) * sum1 -> (window + 1) * (window + 1) * (window + 1) * 255 -> window^3 * 255 |
| 615 | // |
| 616 | // The value (window + 1)^3 * 255 must fit in a uint32_t. So, |
| 617 | // (window + 1)^3 * 255 < 2^32. window = 255. |
| 618 | // |
| 619 | // window = floor(sigma * 3 * sqrt(2 * kPi) / 4) |
| 620 | // For window <= 255, the largest value for sigma is 135. |
| 621 | sigma.fX = SkTPin(sigma.fX, 0.0f, 135.0f); |
| 622 | sigma.fY = SkTPin(sigma.fY, 0.0f, 135.0f); |
| 623 | |
| 624 | result = cpu_blur(ctx, sigma, input, inputBounds, dstBounds); |
| 625 | } |
| 626 | |
| 627 | // Return the resultOffset if the blur succeeded. |
| 628 | if (result != nullptr) { |
| 629 | *offset = resultOffset; |
| 630 | } |
| 631 | return result; |
| 632 | } |
| 633 | |
| 634 | #if SK_SUPPORT_GPU |
| 635 | sk_sp<SkSpecialImage> SkBlurImageFilterImpl::gpuFilter( |
| 636 | const Context& ctx, SkVector sigma, const sk_sp<SkSpecialImage> &input, SkIRect inputBounds, |
| 637 | SkIRect dstBounds, SkIPoint inputOffset, SkIPoint* offset) const { |
| 638 | if (0 == sigma.x() && 0 == sigma.y()) { |
| 639 | offset->fX = inputBounds.x() + inputOffset.fX; |
| 640 | offset->fY = inputBounds.y() + inputOffset.fY; |
| 641 | return input->makeSubset(inputBounds); |
| 642 | } |
| 643 | |
| 644 | auto context = ctx.getContext(); |
| 645 | |
| 646 | GrSurfaceProxyView inputView = input->view(context); |
| 647 | if (!inputView.proxy()) { |
| 648 | return nullptr; |
| 649 | } |
| 650 | SkASSERT(inputView.asTextureProxy()); |
| 651 | |
| 652 | // TODO (michaelludwig) - The color space choice is odd, should it just be ctx.refColorSpace()? |
| 653 | dstBounds.offset(input->subset().topLeft()); |
| 654 | inputBounds.offset(input->subset().topLeft()); |
| 655 | auto renderTargetContext = SkGpuBlurUtils::GaussianBlur( |
| 656 | context, |
| 657 | std::move(inputView), |
| 658 | SkColorTypeToGrColorType(input->colorType()), |
| 659 | input->alphaType(), |
| 660 | ctx.colorSpace() ? sk_ref_sp(input->getColorSpace()) : nullptr, |
| 661 | dstBounds, |
| 662 | inputBounds, |
| 663 | sigma.x(), |
| 664 | sigma.y(), |
| 665 | fTileMode); |
| 666 | if (!renderTargetContext) { |
| 667 | return nullptr; |
| 668 | } |
| 669 | |
| 670 | return SkSpecialImage::MakeDeferredFromGpu(context, |
| 671 | SkIRect::MakeSize(dstBounds.size()), |
| 672 | kNeedNewImageUniqueID_SpecialImage, |
| 673 | renderTargetContext->readSurfaceView(), |
| 674 | renderTargetContext->colorInfo().colorType(), |
| 675 | sk_ref_sp(input->getColorSpace()), |
| 676 | ctx.surfaceProps()); |
| 677 | } |
| 678 | #endif |
| 679 | |
| 680 | SkRect SkBlurImageFilterImpl::computeFastBounds(const SkRect& src) const { |
| 681 | SkRect bounds = this->getInput(0) ? this->getInput(0)->computeFastBounds(src) : src; |
| 682 | bounds.outset(fSigma.width() * 3, fSigma.height() * 3); |
| 683 | return bounds; |
| 684 | } |
| 685 | |
| 686 | SkIRect SkBlurImageFilterImpl::onFilterNodeBounds(const SkIRect& src, const SkMatrix& ctm, |
| 687 | MapDirection, const SkIRect* inputRect) const { |
| 688 | SkVector sigma = map_sigma(fSigma, ctm); |
| 689 | return src.makeOutset(SkScalarCeilToInt(sigma.x() * 3), SkScalarCeilToInt(sigma.y() * 3)); |
| 690 | } |
| 691 | |