1 | /* |
2 | * Copyright 2018 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "src/gpu/ccpr/GrCCStroker.h" |
9 | |
10 | #include "include/core/SkStrokeRec.h" |
11 | #include "src/core/SkPathPriv.h" |
12 | #include "src/gpu/GrOnFlushResourceProvider.h" |
13 | #include "src/gpu/GrOpsRenderPass.h" |
14 | #include "src/gpu/GrProgramInfo.h" |
15 | #include "src/gpu/ccpr/GrAutoMapVertexBuffer.h" |
16 | #include "src/gpu/ccpr/GrCCCoverageProcessor.h" |
17 | #include "src/gpu/glsl/GrGLSLFragmentShaderBuilder.h" |
18 | #include "src/gpu/glsl/GrGLSLVertexGeoBuilder.h" |
19 | |
20 | static constexpr int kMaxNumLinearSegmentsLog2 = GrCCStrokeGeometry::kMaxNumLinearSegmentsLog2; |
21 | using TriangleInstance = GrCCCoverageProcessor::TriPointInstance; |
22 | using ConicInstance = GrCCCoverageProcessor::QuadPointInstance; |
23 | |
24 | namespace { |
25 | |
26 | struct LinearStrokeInstance { |
27 | float fEndpoints[4]; |
28 | float fStrokeRadius; |
29 | |
30 | inline void set(const SkPoint[2], float dx, float dy, float strokeRadius); |
31 | }; |
32 | |
33 | inline void LinearStrokeInstance::set(const SkPoint P[2], float dx, float dy, float strokeRadius) { |
34 | Sk2f X, Y; |
35 | Sk2f::Load2(P, &X, &Y); |
36 | Sk2f::Store2(fEndpoints, X + dx, Y + dy); |
37 | fStrokeRadius = strokeRadius; |
38 | } |
39 | |
40 | struct CubicStrokeInstance { |
41 | float fX[4]; |
42 | float fY[4]; |
43 | float fStrokeRadius; |
44 | float fNumSegments; |
45 | |
46 | inline void set(const SkPoint[4], float dx, float dy, float strokeRadius, int numSegments); |
47 | inline void set(const Sk4f& X, const Sk4f& Y, float dx, float dy, float strokeRadius, |
48 | int numSegments); |
49 | }; |
50 | |
51 | inline void CubicStrokeInstance::set(const SkPoint P[4], float dx, float dy, float strokeRadius, |
52 | int numSegments) { |
53 | Sk4f X, Y; |
54 | Sk4f::Load2(P, &X, &Y); |
55 | this->set(X, Y, dx, dy, strokeRadius, numSegments); |
56 | } |
57 | |
58 | inline void CubicStrokeInstance::set(const Sk4f& X, const Sk4f& Y, float dx, float dy, |
59 | float strokeRadius, int numSegments) { |
60 | (X + dx).store(&fX); |
61 | (Y + dy).store(&fY); |
62 | fStrokeRadius = strokeRadius; |
63 | fNumSegments = static_cast<float>(numSegments); |
64 | } |
65 | |
66 | // This class draws stroked lines in post-transform device space (a.k.a. rectangles). Rigid-body |
67 | // transforms can be achieved by transforming the line ahead of time and adjusting the stroke |
68 | // width. Skews of the stroke itself are not yet supported. |
69 | // |
70 | // Corner coverage is AA-correct, meaning, n^2 attenuation along the diagonals. This is important |
71 | // for seamless integration with the connecting geometry. |
72 | class LinearStrokeProcessor : public GrGeometryProcessor { |
73 | public: |
74 | LinearStrokeProcessor() : INHERITED(kLinearStrokeProcessor_ClassID) { |
75 | this->setInstanceAttributes(kInstanceAttribs, 2); |
76 | #ifdef SK_DEBUG |
77 | using Instance = LinearStrokeInstance; |
78 | SkASSERT(this->instanceStride() == sizeof(Instance)); |
79 | #endif |
80 | } |
81 | |
82 | private: |
83 | const char* name() const override { return "LinearStrokeProcessor" ; } |
84 | void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {} |
85 | |
86 | static constexpr Attribute kInstanceAttribs[2] = { |
87 | {"endpts" , kFloat4_GrVertexAttribType, kFloat4_GrSLType}, |
88 | {"stroke_radius" , kFloat_GrVertexAttribType, kFloat_GrSLType} |
89 | }; |
90 | |
91 | class Impl : public GrGLSLGeometryProcessor { |
92 | void setData(const GrGLSLProgramDataManager&, const GrPrimitiveProcessor&, |
93 | const CoordTransformRange&) override {} |
94 | void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override; |
95 | }; |
96 | |
97 | GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override { |
98 | return new Impl(); |
99 | } |
100 | |
101 | typedef GrGeometryProcessor INHERITED; |
102 | }; |
103 | |
104 | void LinearStrokeProcessor::Impl::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) { |
105 | GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler; |
106 | GrGLSLUniformHandler* uniHandler = args.fUniformHandler; |
107 | |
108 | varyingHandler->emitAttributes(args.fGP.cast<LinearStrokeProcessor>()); |
109 | |
110 | GrGLSLVertexBuilder* v = args.fVertBuilder; |
111 | v->codeAppend ("float2 tan = normalize(endpts.zw - endpts.xy);" ); |
112 | v->codeAppend ("float2 n = float2(tan.y, -tan.x);" ); |
113 | v->codeAppend ("float nwidth = abs(n.x) + abs(n.y);" ); |
114 | |
115 | // Outset the vertex position for AA butt caps. |
116 | v->codeAppend ("float2 outset = tan*nwidth/2;" ); |
117 | v->codeAppend ("float2 position = (sk_VertexID < 2) " |
118 | "? endpts.xy - outset : endpts.zw + outset;" ); |
119 | |
120 | // Calculate Manhattan distance from both butt caps, where distance=0 on the actual endpoint and |
121 | // distance=-.5 on the outset edge. |
122 | GrGLSLVarying edgeDistances(kFloat4_GrSLType); |
123 | varyingHandler->addVarying("edge_distances" , &edgeDistances); |
124 | v->codeAppendf("%s.xz = float2(-.5, dot(endpts.zw - endpts.xy, tan) / nwidth + .5);" , |
125 | edgeDistances.vsOut()); |
126 | v->codeAppendf("%s.xz = (sk_VertexID < 2) ? %s.xz : %s.zx;" , |
127 | edgeDistances.vsOut(), edgeDistances.vsOut(), edgeDistances.vsOut()); |
128 | |
129 | // Outset the vertex position for stroke radius plus edge AA. |
130 | v->codeAppend ("outset = n * (stroke_radius + nwidth/2);" ); |
131 | v->codeAppend ("position += (0 == (sk_VertexID & 1)) ? +outset : -outset;" ); |
132 | |
133 | // Calculate Manhattan distance from both edges, where distance=0 on the actual edge and |
134 | // distance=-.5 on the outset. |
135 | v->codeAppendf("%s.yw = float2(-.5, 2*stroke_radius / nwidth + .5);" , edgeDistances.vsOut()); |
136 | v->codeAppendf("%s.yw = (0 == (sk_VertexID & 1)) ? %s.yw : %s.wy;" , |
137 | edgeDistances.vsOut(), edgeDistances.vsOut(), edgeDistances.vsOut()); |
138 | |
139 | gpArgs->fPositionVar.set(kFloat2_GrSLType, "position" ); |
140 | this->emitTransforms(v, varyingHandler, uniHandler, GrShaderVar("position" , kFloat2_GrSLType), |
141 | SkMatrix::I(), args.fFPCoordTransformHandler); |
142 | |
143 | // Use the 4 edge distances to calculate coverage in the fragment shader. |
144 | GrGLSLFPFragmentBuilder* f = args.fFragBuilder; |
145 | f->codeAppendf("half2 coverages = half2(min(%s.xy, .5) + min(%s.zw, .5));" , |
146 | edgeDistances.fsIn(), edgeDistances.fsIn()); |
147 | f->codeAppendf("%s = half4(coverages.x * coverages.y);" , args.fOutputColor); |
148 | |
149 | // This shader doesn't use the built-in Ganesh coverage. |
150 | f->codeAppendf("%s = half4(1);" , args.fOutputCoverage); |
151 | } |
152 | |
153 | constexpr GrPrimitiveProcessor::Attribute LinearStrokeProcessor::kInstanceAttribs[]; |
154 | |
155 | // This class draws stroked cubics in post-transform device space. Rigid-body transforms can be |
156 | // achieved by transforming the curve ahead of time and adjusting the stroke width. Skews of the |
157 | // stroke itself are not yet supported. Quadratics can be drawn by converting them to cubics. |
158 | // |
159 | // This class works by finding stroke-width line segments orthogonal to the curve at a |
160 | // pre-determined number of evenly spaced points along the curve (evenly spaced in the parametric |
161 | // sense). It then connects the segments with a triangle strip. As for common in CCPR, clockwise- |
162 | // winding triangles from the strip emit positive coverage, counter-clockwise triangles emit |
163 | // negative, and we use SkBlendMode::kPlus. |
164 | class CubicStrokeProcessor : public GrGeometryProcessor { |
165 | public: |
166 | CubicStrokeProcessor() : GrGeometryProcessor(kCubicStrokeProcessor_ClassID) { |
167 | this->setInstanceAttributes(kInstanceAttribs, 3); |
168 | #ifdef SK_DEBUG |
169 | using Instance = CubicStrokeInstance; |
170 | SkASSERT(this->instanceStride() == sizeof(Instance)); |
171 | #endif |
172 | } |
173 | |
174 | private: |
175 | const char* name() const override { return "CubicStrokeProcessor" ; } |
176 | void getGLSLProcessorKey(const GrShaderCaps&, GrProcessorKeyBuilder*) const override {} |
177 | |
178 | static constexpr Attribute kInstanceAttribs[3] = { |
179 | {"X" , kFloat4_GrVertexAttribType, kFloat4_GrSLType}, |
180 | {"Y" , kFloat4_GrVertexAttribType, kFloat4_GrSLType}, |
181 | {"stroke_info" , kFloat2_GrVertexAttribType, kFloat2_GrSLType} |
182 | }; |
183 | |
184 | class Impl : public GrGLSLGeometryProcessor { |
185 | void setData(const GrGLSLProgramDataManager&, const GrPrimitiveProcessor&, |
186 | const CoordTransformRange&) override {} |
187 | void onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) override; |
188 | }; |
189 | |
190 | GrGLSLPrimitiveProcessor* createGLSLInstance(const GrShaderCaps&) const override { |
191 | return new Impl(); |
192 | } |
193 | }; |
194 | |
195 | void CubicStrokeProcessor::Impl::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) { |
196 | GrGLSLVaryingHandler* varyingHandler = args.fVaryingHandler; |
197 | GrGLSLUniformHandler* uniHandler = args.fUniformHandler; |
198 | |
199 | varyingHandler->emitAttributes(args.fGP.cast<CubicStrokeProcessor>()); |
200 | |
201 | GrGLSLVertexBuilder* v = args.fVertBuilder; |
202 | v->codeAppend ("float4x2 P = transpose(float2x4(X, Y));" ); |
203 | v->codeAppend ("float stroke_radius = stroke_info[0];" ); |
204 | v->codeAppend ("float num_segments = stroke_info[1];" ); |
205 | |
206 | // Find the parametric T value at which we will emit our orthogonal line segment. We emit two |
207 | // line segments at T=0 and double at T=1 as well for AA butt caps. |
208 | v->codeAppend ("float point_id = float(sk_VertexID/2);" ); |
209 | v->codeAppend ("float T = max((point_id - 1) / num_segments, 0);" ); |
210 | v->codeAppend ("T = (point_id >= num_segments + 1) ? 1 : T;" ); // In case x/x !== 1. |
211 | |
212 | // Use De Casteljau's algorithm to find the position and tangent for our orthogonal line |
213 | // segment. De Casteljau's is more numerically stable than evaluating the curve and derivative |
214 | // directly. |
215 | v->codeAppend ("float2 ab = mix(P[0], P[1], T);" ); |
216 | v->codeAppend ("float2 bc = mix(P[1], P[2], T);" ); |
217 | v->codeAppend ("float2 cd = mix(P[2], P[3], T);" ); |
218 | v->codeAppend ("float2 abc = mix(ab, bc, T);" ); |
219 | v->codeAppend ("float2 bcd = mix(bc, cd, T);" ); |
220 | v->codeAppend ("float2 position = mix(abc, bcd, T);" ); |
221 | v->codeAppend ("float2 tan = bcd - abc;" ); |
222 | |
223 | // Find actual tangents for the corner cases when De Casteljau's yields tan=0. (We shouldn't |
224 | // encounter other numerically unstable cases where tan ~= 0, because GrCCStrokeGeometry snaps |
225 | // control points to endpoints in curves where they are almost equal.) |
226 | v->codeAppend ("if (0 == T && P[0] == P[1]) {" ); |
227 | v->codeAppend ( "tan = P[2] - P[0];" ); |
228 | v->codeAppend ("}" ); |
229 | v->codeAppend ("if (1 == T && P[2] == P[3]) {" ); |
230 | v->codeAppend ( "tan = P[3] - P[1];" ); |
231 | v->codeAppend ("}" ); |
232 | v->codeAppend ("tan = normalize(tan);" ); |
233 | v->codeAppend ("float2 n = float2(tan.y, -tan.x);" ); |
234 | v->codeAppend ("float nwidth = abs(n.x) + abs(n.y);" ); |
235 | |
236 | // Outset the vertex position for stroke radius plus edge AA. |
237 | v->codeAppend ("float2 outset = n * (stroke_radius + nwidth/2);" ); |
238 | v->codeAppend ("position += (0 == (sk_VertexID & 1)) ? -outset : +outset;" ); |
239 | |
240 | // Calculate the Manhattan distance from both edges, where distance=0 on the actual edge and |
241 | // distance=-.5 on the outset. |
242 | GrGLSLVarying coverages(kFloat3_GrSLType); |
243 | varyingHandler->addVarying("coverages" , &coverages); |
244 | v->codeAppendf("%s.xy = float2(-.5, 2*stroke_radius / nwidth + .5);" , coverages.vsOut()); |
245 | v->codeAppendf("%s.xy = (0 == (sk_VertexID & 1)) ? %s.xy : %s.yx;" , |
246 | coverages.vsOut(), coverages.vsOut(), coverages.vsOut()); |
247 | |
248 | // Adjust the orthogonal line segments on the endpoints so they straddle the actual endpoint |
249 | // at a Manhattan distance of .5 on either side. |
250 | v->codeAppend ("if (0 == point_id || num_segments+1 == point_id) {" ); |
251 | v->codeAppend ( "position -= tan*nwidth/2;" ); |
252 | v->codeAppend ("}" ); |
253 | v->codeAppend ("if (1 == point_id || num_segments+2 == point_id) {" ); |
254 | v->codeAppend ( "position += tan*nwidth/2;" ); |
255 | v->codeAppend ("}" ); |
256 | |
257 | // Interpolate coverage for butt cap AA from 0 on the outer segment to 1 on the inner. |
258 | v->codeAppendf("%s.z = (0 == point_id || num_segments+2 == point_id) ? 0 : 1;" , |
259 | coverages.vsOut()); |
260 | |
261 | gpArgs->fPositionVar.set(kFloat2_GrSLType, "position" ); |
262 | this->emitTransforms(v, varyingHandler, uniHandler, GrShaderVar("position" , kFloat2_GrSLType), |
263 | SkMatrix::I(), args.fFPCoordTransformHandler); |
264 | |
265 | // Use the 2 edge distances and interpolated butt cap AA to calculate fragment coverage. |
266 | GrGLSLFPFragmentBuilder* f = args.fFragBuilder; |
267 | f->codeAppendf("half2 edge_coverages = min(half2(%s.xy), .5);" , coverages.fsIn()); |
268 | f->codeAppend ("half coverage = edge_coverages.x + edge_coverages.y;" ); |
269 | f->codeAppendf("coverage *= half(%s.z);" , coverages.fsIn()); // Butt cap AA. |
270 | |
271 | // As is common for CCPR, clockwise-winding triangles from the strip emit positive coverage, and |
272 | // counter-clockwise triangles emit negative. |
273 | f->codeAppendf("%s = half4(sk_Clockwise ? +coverage : -coverage);" , args.fOutputColor); |
274 | |
275 | // This shader doesn't use the built-in Ganesh coverage. |
276 | f->codeAppendf("%s = half4(1);" , args.fOutputCoverage); |
277 | } |
278 | |
279 | constexpr GrPrimitiveProcessor::Attribute CubicStrokeProcessor::kInstanceAttribs[]; |
280 | |
281 | } // anonymous namespace |
282 | |
283 | void GrCCStroker::parseDeviceSpaceStroke(const SkPath& path, const SkPoint* deviceSpacePts, |
284 | const SkStrokeRec& stroke, float strokeDevWidth, |
285 | GrScissorTest scissorTest, |
286 | const SkIRect& clippedDevIBounds, |
287 | const SkIVector& devToAtlasOffset) { |
288 | SkASSERT(SkStrokeRec::kStroke_Style == stroke.getStyle() || |
289 | SkStrokeRec::kHairline_Style == stroke.getStyle()); |
290 | SkASSERT(!fInstanceBuffer); |
291 | SkASSERT(!path.isEmpty()); |
292 | |
293 | if (!fHasOpenBatch) { |
294 | fBatches.emplace_back(&fTalliesAllocator, *fInstanceCounts[(int)GrScissorTest::kDisabled], |
295 | fScissorSubBatches.count()); |
296 | fInstanceCounts[(int)GrScissorTest::kDisabled] = fBatches.back().fNonScissorEndInstances; |
297 | fHasOpenBatch = true; |
298 | } |
299 | |
300 | InstanceTallies* currStrokeEndIndices; |
301 | if (GrScissorTest::kEnabled == scissorTest) { |
302 | SkASSERT(fBatches.back().fEndScissorSubBatch == fScissorSubBatches.count()); |
303 | fScissorSubBatches.emplace_back(&fTalliesAllocator, |
304 | *fInstanceCounts[(int)GrScissorTest::kEnabled], |
305 | clippedDevIBounds.makeOffset(devToAtlasOffset)); |
306 | fBatches.back().fEndScissorSubBatch = fScissorSubBatches.count(); |
307 | fInstanceCounts[(int)GrScissorTest::kEnabled] = |
308 | currStrokeEndIndices = fScissorSubBatches.back().fEndInstances; |
309 | } else { |
310 | currStrokeEndIndices = fBatches.back().fNonScissorEndInstances; |
311 | } |
312 | |
313 | fGeometry.beginPath(stroke, strokeDevWidth, currStrokeEndIndices); |
314 | |
315 | fPathInfos.push_back() = {devToAtlasOffset, strokeDevWidth/2, scissorTest}; |
316 | |
317 | int devPtsIdx = 0; |
318 | SkPath::Verb previousVerb = SkPath::kClose_Verb; |
319 | |
320 | for (SkPath::Verb verb : SkPathPriv::Verbs(path)) { |
321 | SkASSERT(SkPath::kDone_Verb != previousVerb); |
322 | const SkPoint* P = &deviceSpacePts[devPtsIdx - 1]; |
323 | switch (verb) { |
324 | case SkPath::kMove_Verb: |
325 | if (devPtsIdx > 0 && SkPath::kClose_Verb != previousVerb) { |
326 | fGeometry.capContourAndExit(); |
327 | } |
328 | fGeometry.moveTo(deviceSpacePts[devPtsIdx]); |
329 | ++devPtsIdx; |
330 | break; |
331 | case SkPath::kClose_Verb: |
332 | SkASSERT(SkPath::kClose_Verb != previousVerb); |
333 | fGeometry.closeContour(); |
334 | break; |
335 | case SkPath::kLine_Verb: |
336 | SkASSERT(SkPath::kClose_Verb != previousVerb); |
337 | fGeometry.lineTo(P[1]); |
338 | ++devPtsIdx; |
339 | break; |
340 | case SkPath::kQuad_Verb: |
341 | SkASSERT(SkPath::kClose_Verb != previousVerb); |
342 | fGeometry.quadraticTo(P); |
343 | devPtsIdx += 2; |
344 | break; |
345 | case SkPath::kCubic_Verb: { |
346 | SkASSERT(SkPath::kClose_Verb != previousVerb); |
347 | fGeometry.cubicTo(P); |
348 | devPtsIdx += 3; |
349 | break; |
350 | } |
351 | case SkPath::kConic_Verb: |
352 | SkASSERT(SkPath::kClose_Verb != previousVerb); |
353 | SK_ABORT("Stroked conics not supported." ); |
354 | break; |
355 | case SkPath::kDone_Verb: |
356 | break; |
357 | } |
358 | previousVerb = verb; |
359 | } |
360 | |
361 | if (devPtsIdx > 0 && SkPath::kClose_Verb != previousVerb) { |
362 | fGeometry.capContourAndExit(); |
363 | } |
364 | } |
365 | |
366 | // This class encapsulates the process of expanding ready-to-draw geometry from GrCCStrokeGeometry |
367 | // directly into GPU instance buffers. |
368 | class GrCCStroker::InstanceBufferBuilder { |
369 | public: |
370 | InstanceBufferBuilder(GrOnFlushResourceProvider* onFlushRP, GrCCStroker* stroker) { |
371 | memcpy(fNextInstances, stroker->fBaseInstances, sizeof(fNextInstances)); |
372 | #ifdef SK_DEBUG |
373 | fEndInstances[0] = stroker->fBaseInstances[0] + *stroker->fInstanceCounts[0]; |
374 | fEndInstances[1] = stroker->fBaseInstances[1] + *stroker->fInstanceCounts[1]; |
375 | #endif |
376 | |
377 | int endConicsIdx = stroker->fBaseInstances[1].fConics + |
378 | stroker->fInstanceCounts[1]->fConics; |
379 | fInstanceBuffer.resetAndMapBuffer(onFlushRP, endConicsIdx * sizeof(ConicInstance)); |
380 | if (!fInstanceBuffer.gpuBuffer()) { |
381 | SkDebugf("WARNING: failed to allocate CCPR stroke instance buffer.\n" ); |
382 | return; |
383 | } |
384 | } |
385 | |
386 | bool isMapped() const { return fInstanceBuffer.isMapped(); } |
387 | |
388 | void updateCurrentInfo(const PathInfo& pathInfo) { |
389 | SkASSERT(this->isMapped()); |
390 | fCurrDX = static_cast<float>(pathInfo.fDevToAtlasOffset.x()); |
391 | fCurrDY = static_cast<float>(pathInfo.fDevToAtlasOffset.y()); |
392 | fCurrStrokeRadius = pathInfo.fStrokeRadius; |
393 | fCurrNextInstances = &fNextInstances[(int)pathInfo.fScissorTest]; |
394 | SkDEBUGCODE(fCurrEndInstances = &fEndInstances[(int)pathInfo.fScissorTest]); |
395 | } |
396 | |
397 | void appendLinearStroke(const SkPoint endpts[2]) { |
398 | SkASSERT(this->isMapped()); |
399 | this->appendLinearStrokeInstance().set(endpts, fCurrDX, fCurrDY, fCurrStrokeRadius); |
400 | } |
401 | |
402 | void appendQuadraticStroke(const SkPoint P[3], int numLinearSegmentsLog2) { |
403 | SkASSERT(this->isMapped()); |
404 | SkASSERT(numLinearSegmentsLog2 > 0); |
405 | |
406 | Sk4f ptsT[2]; |
407 | Sk2f p0 = Sk2f::Load(P); |
408 | Sk2f p1 = Sk2f::Load(P+1); |
409 | Sk2f p2 = Sk2f::Load(P+2); |
410 | |
411 | // Convert the quadratic to cubic. |
412 | Sk2f c1 = SkNx_fma(Sk2f(2/3.f), p1 - p0, p0); |
413 | Sk2f c2 = SkNx_fma(Sk2f(1/3.f), p2 - p1, p1); |
414 | Sk2f::Store4(ptsT, p0, c1, c2, p2); |
415 | |
416 | this->appendCubicStrokeInstance(numLinearSegmentsLog2).set( |
417 | ptsT[0], ptsT[1], fCurrDX, fCurrDY, fCurrStrokeRadius, 1 << numLinearSegmentsLog2); |
418 | } |
419 | |
420 | void appendCubicStroke(const SkPoint P[3], int numLinearSegmentsLog2) { |
421 | SkASSERT(this->isMapped()); |
422 | SkASSERT(numLinearSegmentsLog2 > 0); |
423 | this->appendCubicStrokeInstance(numLinearSegmentsLog2).set( |
424 | P, fCurrDX, fCurrDY, fCurrStrokeRadius, 1 << numLinearSegmentsLog2); |
425 | } |
426 | |
427 | void appendJoin(Verb joinVerb, const SkPoint& center, const SkVector& leftNorm, |
428 | const SkVector& rightNorm, float miterCapHeightOverWidth, float conicWeight) { |
429 | SkASSERT(this->isMapped()); |
430 | |
431 | Sk2f offset = Sk2f::Load(¢er) + Sk2f(fCurrDX, fCurrDY); |
432 | Sk2f n0 = Sk2f::Load(&leftNorm); |
433 | Sk2f n1 = Sk2f::Load(&rightNorm); |
434 | |
435 | // Identify the outer edge. |
436 | Sk2f cross = n0 * SkNx_shuffle<1,0>(n1); |
437 | if (cross[0] < cross[1]) { |
438 | Sk2f tmp = n0; |
439 | n0 = -n1; |
440 | n1 = -tmp; |
441 | } |
442 | |
443 | if (!GrCCStrokeGeometry::IsInternalJoinVerb(joinVerb)) { |
444 | // Normal joins are a triangle that connects the outer corners of two adjoining strokes. |
445 | this->appendTriangleInstance().set( |
446 | n1 * fCurrStrokeRadius, Sk2f(0, 0), n0 * fCurrStrokeRadius, offset, |
447 | TriangleInstance::Ordering::kXYTransposed); |
448 | if (Verb::kBevelJoin == joinVerb) { |
449 | return; |
450 | } |
451 | } else { |
452 | // Internal joins are coverage-counted, self-intersecting quadrilaterals that tie the |
453 | // four corners of two adjoining strokes together a like a shoelace. Coverage is |
454 | // negative on the inside half. We implement this geometry with a pair of triangles. |
455 | this->appendTriangleInstance().set( |
456 | -n0 * fCurrStrokeRadius, n0 * fCurrStrokeRadius, n1 * fCurrStrokeRadius, |
457 | offset, TriangleInstance::Ordering::kXYTransposed); |
458 | if (Verb::kBevelJoin == joinVerb) { |
459 | return; |
460 | } |
461 | this->appendTriangleInstance().set( |
462 | -n0 * fCurrStrokeRadius, n1 * fCurrStrokeRadius, -n1 * fCurrStrokeRadius, |
463 | offset, TriangleInstance::Ordering::kXYTransposed); |
464 | if (Verb::kBevelJoin == joinVerb) { |
465 | return; |
466 | } |
467 | if (Verb::kInternalBevelJoin == joinVerb) { |
468 | return; |
469 | } |
470 | } |
471 | |
472 | // For miter and round joins, we place an additional triangle cap on top of the bevel. This |
473 | // triangle is literal for miters and is conic control points for round joins. |
474 | SkASSERT(miterCapHeightOverWidth >= 0 || SkScalarIsNaN(miterCapHeightOverWidth)); |
475 | Sk2f base = n1 - n0; |
476 | Sk2f baseNorm = Sk2f(base[1], -base[0]); |
477 | Sk2f c = (n0 + n1) * .5f + baseNorm * miterCapHeightOverWidth; |
478 | |
479 | if (Verb::kMiterJoin == joinVerb) { |
480 | this->appendTriangleInstance().set( |
481 | n0 * fCurrStrokeRadius, c * fCurrStrokeRadius, n1 * fCurrStrokeRadius, offset, |
482 | TriangleInstance::Ordering::kXYTransposed); |
483 | } else { |
484 | SkASSERT(Verb::kRoundJoin == joinVerb || Verb::kInternalRoundJoin == joinVerb); |
485 | this->appendConicInstance().setW(n0 * fCurrStrokeRadius, c * fCurrStrokeRadius, |
486 | n1 * fCurrStrokeRadius, offset, conicWeight); |
487 | if (Verb::kInternalRoundJoin == joinVerb) { |
488 | this->appendConicInstance().setW(-n1 * fCurrStrokeRadius, c * -fCurrStrokeRadius, |
489 | -n0 * fCurrStrokeRadius, offset, conicWeight); |
490 | } |
491 | } |
492 | } |
493 | |
494 | void appendCap(Verb capType, const SkPoint& pt, const SkVector& norm) { |
495 | SkASSERT(this->isMapped()); |
496 | |
497 | Sk2f n = Sk2f::Load(&norm) * fCurrStrokeRadius; |
498 | Sk2f v = Sk2f(-n[1], n[0]); |
499 | Sk2f offset = Sk2f::Load(&pt) + Sk2f(fCurrDX, fCurrDY); |
500 | |
501 | if (Verb::kSquareCap == capType) { |
502 | SkPoint endPts[2] = {{0, 0}, {v[0], v[1]}}; |
503 | this->appendLinearStrokeInstance().set(endPts, offset[0], offset[1], fCurrStrokeRadius); |
504 | } else { |
505 | SkASSERT(Verb::kRoundCap == capType); |
506 | this->appendTriangleInstance().set( |
507 | n, v, -n, offset, TriangleInstance::Ordering::kXYTransposed); |
508 | this->appendConicInstance().setW(n, n + v, v, offset, SK_ScalarRoot2Over2); |
509 | this->appendConicInstance().setW(v, v - n, -n, offset, SK_ScalarRoot2Over2); |
510 | } |
511 | } |
512 | |
513 | sk_sp<GrGpuBuffer> finish() { |
514 | SkASSERT(this->isMapped()); |
515 | SkASSERT(!memcmp(fNextInstances, fEndInstances, sizeof(fNextInstances))); |
516 | fInstanceBuffer.unmapBuffer(); |
517 | SkASSERT(!this->isMapped()); |
518 | return sk_ref_sp(fInstanceBuffer.gpuBuffer()); |
519 | } |
520 | |
521 | private: |
522 | LinearStrokeInstance& appendLinearStrokeInstance() { |
523 | int instanceIdx = fCurrNextInstances->fStrokes[0]++; |
524 | SkASSERT(instanceIdx < fCurrEndInstances->fStrokes[0]); |
525 | |
526 | return reinterpret_cast<LinearStrokeInstance*>(fInstanceBuffer.data())[instanceIdx]; |
527 | } |
528 | |
529 | CubicStrokeInstance& appendCubicStrokeInstance(int numLinearSegmentsLog2) { |
530 | SkASSERT(numLinearSegmentsLog2 > 0); |
531 | SkASSERT(numLinearSegmentsLog2 <= kMaxNumLinearSegmentsLog2); |
532 | |
533 | int instanceIdx = fCurrNextInstances->fStrokes[numLinearSegmentsLog2]++; |
534 | SkASSERT(instanceIdx < fCurrEndInstances->fStrokes[numLinearSegmentsLog2]); |
535 | |
536 | return reinterpret_cast<CubicStrokeInstance*>(fInstanceBuffer.data())[instanceIdx]; |
537 | } |
538 | |
539 | TriangleInstance& appendTriangleInstance() { |
540 | int instanceIdx = fCurrNextInstances->fTriangles++; |
541 | SkASSERT(instanceIdx < fCurrEndInstances->fTriangles); |
542 | |
543 | return reinterpret_cast<TriangleInstance*>(fInstanceBuffer.data())[instanceIdx]; |
544 | } |
545 | |
546 | ConicInstance& appendConicInstance() { |
547 | int instanceIdx = fCurrNextInstances->fConics++; |
548 | SkASSERT(instanceIdx < fCurrEndInstances->fConics); |
549 | |
550 | return reinterpret_cast<ConicInstance*>(fInstanceBuffer.data())[instanceIdx]; |
551 | } |
552 | |
553 | float fCurrDX, fCurrDY; |
554 | float fCurrStrokeRadius; |
555 | InstanceTallies* fCurrNextInstances; |
556 | SkDEBUGCODE(const InstanceTallies* fCurrEndInstances); |
557 | |
558 | GrAutoMapVertexBuffer fInstanceBuffer; |
559 | InstanceTallies fNextInstances[2]; |
560 | SkDEBUGCODE(InstanceTallies fEndInstances[2]); |
561 | }; |
562 | |
563 | GrCCStroker::BatchID GrCCStroker::closeCurrentBatch() { |
564 | if (!fHasOpenBatch) { |
565 | return kEmptyBatchID; |
566 | } |
567 | int start = (fBatches.count() < 2) ? 0 : fBatches[fBatches.count() - 2].fEndScissorSubBatch; |
568 | int end = fBatches.back().fEndScissorSubBatch; |
569 | fMaxNumScissorSubBatches = std::max(fMaxNumScissorSubBatches, end - start); |
570 | fHasOpenBatch = false; |
571 | return fBatches.count() - 1; |
572 | } |
573 | |
574 | bool GrCCStroker::prepareToDraw(GrOnFlushResourceProvider* onFlushRP) { |
575 | SkASSERT(!fInstanceBuffer); |
576 | SkASSERT(!fHasOpenBatch); // Call closeCurrentBatch() first. |
577 | |
578 | // Here we layout a single instance buffer to share with every internal batch. |
579 | // |
580 | // Rather than place each instance array in its own GPU buffer, we allocate a single |
581 | // megabuffer and lay them all out side-by-side. We can offset the "baseInstance" parameter in |
582 | // our draw calls to direct the GPU to the applicable elements within a given array. |
583 | fBaseInstances[0].fStrokes[0] = 0; |
584 | fBaseInstances[1].fStrokes[0] = fInstanceCounts[0]->fStrokes[0]; |
585 | int endLinearStrokesIdx = fBaseInstances[1].fStrokes[0] + fInstanceCounts[1]->fStrokes[0]; |
586 | |
587 | int cubicStrokesIdx = GrSizeDivRoundUp(endLinearStrokesIdx * sizeof(LinearStrokeInstance), |
588 | sizeof(CubicStrokeInstance)); |
589 | for (int i = 1; i <= kMaxNumLinearSegmentsLog2; ++i) { |
590 | for (int j = 0; j < kNumScissorModes; ++j) { |
591 | fBaseInstances[j].fStrokes[i] = cubicStrokesIdx; |
592 | cubicStrokesIdx += fInstanceCounts[j]->fStrokes[i]; |
593 | } |
594 | } |
595 | |
596 | int trianglesIdx = GrSizeDivRoundUp(cubicStrokesIdx * sizeof(CubicStrokeInstance), |
597 | sizeof(TriangleInstance)); |
598 | fBaseInstances[0].fTriangles = trianglesIdx; |
599 | fBaseInstances[1].fTriangles = |
600 | fBaseInstances[0].fTriangles + fInstanceCounts[0]->fTriangles; |
601 | int endTrianglesIdx = |
602 | fBaseInstances[1].fTriangles + fInstanceCounts[1]->fTriangles; |
603 | |
604 | int conicsIdx = |
605 | GrSizeDivRoundUp(endTrianglesIdx * sizeof(TriangleInstance), sizeof(ConicInstance)); |
606 | fBaseInstances[0].fConics = conicsIdx; |
607 | fBaseInstances[1].fConics = fBaseInstances[0].fConics + fInstanceCounts[0]->fConics; |
608 | |
609 | InstanceBufferBuilder builder(onFlushRP, this); |
610 | if (!builder.isMapped()) { |
611 | return false; // Buffer allocation failed. |
612 | } |
613 | |
614 | // Now parse the GrCCStrokeGeometry and expand it into the instance buffer. |
615 | int pathIdx = 0; |
616 | int ptsIdx = 0; |
617 | int paramsIdx = 0; |
618 | int normalsIdx = 0; |
619 | |
620 | const SkTArray<GrCCStrokeGeometry::Parameter, true>& params = fGeometry.params(); |
621 | const SkTArray<SkPoint, true>& pts = fGeometry.points(); |
622 | const SkTArray<SkVector, true>& normals = fGeometry.normals(); |
623 | |
624 | float miterCapHeightOverWidth=0, conicWeight=0; |
625 | |
626 | for (Verb verb : fGeometry.verbs()) { |
627 | switch (verb) { |
628 | case Verb::kBeginPath: |
629 | builder.updateCurrentInfo(fPathInfos[pathIdx]); |
630 | ++pathIdx; |
631 | continue; |
632 | |
633 | case Verb::kLinearStroke: |
634 | builder.appendLinearStroke(&pts[ptsIdx]); |
635 | ++ptsIdx; |
636 | continue; |
637 | case Verb::kQuadraticStroke: |
638 | builder.appendQuadraticStroke(&pts[ptsIdx], |
639 | params[paramsIdx++].fNumLinearSegmentsLog2); |
640 | ptsIdx += 2; |
641 | ++normalsIdx; |
642 | continue; |
643 | case Verb::kCubicStroke: |
644 | builder.appendCubicStroke(&pts[ptsIdx], params[paramsIdx++].fNumLinearSegmentsLog2); |
645 | ptsIdx += 3; |
646 | ++normalsIdx; |
647 | continue; |
648 | |
649 | case Verb::kRoundJoin: |
650 | case Verb::kInternalRoundJoin: |
651 | conicWeight = params[paramsIdx++].fConicWeight; |
652 | // fallthru |
653 | case Verb::kMiterJoin: |
654 | miterCapHeightOverWidth = params[paramsIdx++].fMiterCapHeightOverWidth; |
655 | // fallthru |
656 | case Verb::kBevelJoin: |
657 | case Verb::kInternalBevelJoin: |
658 | builder.appendJoin(verb, pts[ptsIdx], normals[normalsIdx], normals[normalsIdx + 1], |
659 | miterCapHeightOverWidth, conicWeight); |
660 | ++normalsIdx; |
661 | continue; |
662 | |
663 | case Verb::kSquareCap: |
664 | case Verb::kRoundCap: |
665 | builder.appendCap(verb, pts[ptsIdx], normals[normalsIdx]); |
666 | continue; |
667 | |
668 | case Verb::kEndContour: |
669 | ++ptsIdx; |
670 | ++normalsIdx; |
671 | continue; |
672 | } |
673 | SK_ABORT("Invalid CCPR stroke element." ); |
674 | } |
675 | |
676 | fInstanceBuffer = builder.finish(); |
677 | SkASSERT(fPathInfos.count() == pathIdx); |
678 | SkASSERT(pts.count() == ptsIdx); |
679 | SkASSERT(normals.count() == normalsIdx); |
680 | return true; |
681 | } |
682 | |
683 | void GrCCStroker::drawStrokes(GrOpFlushState* flushState, GrCCCoverageProcessor* proc, |
684 | BatchID batchID, const SkIRect& drawBounds) const { |
685 | using PrimitiveType = GrCCCoverageProcessor::PrimitiveType; |
686 | SkASSERT(fInstanceBuffer); |
687 | |
688 | if (kEmptyBatchID == batchID) { |
689 | return; |
690 | } |
691 | const Batch& batch = fBatches[batchID]; |
692 | int startScissorSubBatch = (!batchID) ? 0 : fBatches[batchID - 1].fEndScissorSubBatch; |
693 | |
694 | const InstanceTallies* startIndices[2]; |
695 | startIndices[(int)GrScissorTest::kDisabled] = (!batchID) |
696 | ? &fZeroTallies : fBatches[batchID - 1].fNonScissorEndInstances; |
697 | startIndices[(int)GrScissorTest::kEnabled] = (!startScissorSubBatch) |
698 | ? &fZeroTallies : fScissorSubBatches[startScissorSubBatch - 1].fEndInstances; |
699 | |
700 | GrPipeline pipeline(GrScissorTest::kEnabled, SkBlendMode::kPlus, |
701 | flushState->drawOpArgs().writeSwizzle()); |
702 | |
703 | // Draw linear strokes. |
704 | this->drawLog2Strokes(0, flushState, LinearStrokeProcessor(), pipeline, batch, startIndices, |
705 | startScissorSubBatch, drawBounds); |
706 | |
707 | // Draw cubic strokes. (Quadratics were converted to cubics for GPU processing.) |
708 | CubicStrokeProcessor cubicProc; |
709 | for (int i = 1; i <= kMaxNumLinearSegmentsLog2; ++i) { |
710 | this->drawLog2Strokes(i, flushState, cubicProc, pipeline, batch, startIndices, |
711 | startScissorSubBatch, drawBounds); |
712 | } |
713 | |
714 | int numConnectingGeometrySubpasses = proc->numSubpasses(); |
715 | |
716 | // Draw triangles. |
717 | for (int i = 0; i < numConnectingGeometrySubpasses; ++i) { |
718 | proc->reset(PrimitiveType::kTriangles, i, flushState->resourceProvider()); |
719 | this->drawConnectingGeometry<&InstanceTallies::fTriangles>( |
720 | flushState, pipeline, *proc, batch, startIndices, startScissorSubBatch, drawBounds); |
721 | } |
722 | |
723 | // Draw conics. |
724 | for (int i = 0; i < numConnectingGeometrySubpasses; ++i) { |
725 | proc->reset(PrimitiveType::kConics, i, flushState->resourceProvider()); |
726 | this->drawConnectingGeometry<&InstanceTallies::fConics>( |
727 | flushState, pipeline, *proc, batch, startIndices, startScissorSubBatch, drawBounds); |
728 | } |
729 | } |
730 | |
731 | void GrCCStroker::drawLog2Strokes(int numSegmentsLog2, GrOpFlushState* flushState, |
732 | const GrPrimitiveProcessor& processor, const GrPipeline& pipeline, |
733 | const Batch& batch, const InstanceTallies* startIndices[2], |
734 | int startScissorSubBatch, const SkIRect& drawBounds) const { |
735 | GrProgramInfo programInfo(flushState->proxy()->numSamples(), |
736 | flushState->proxy()->numStencilSamples(), |
737 | flushState->proxy()->backendFormat(), |
738 | flushState->writeView()->origin(), &pipeline, &processor, |
739 | GrPrimitiveType::kTriangleStrip); |
740 | |
741 | flushState->bindPipeline(programInfo, SkRect::Make(drawBounds)); |
742 | flushState->bindBuffers(nullptr, fInstanceBuffer.get(), nullptr); |
743 | |
744 | // Linear strokes draw a quad. Cubic strokes emit a strip with normals at "numSegments" |
745 | // evenly-spaced points along the curve, plus one more for the final endpoint, plus two more for |
746 | // AA butt caps. (i.e., 2 vertices * (numSegments + 3).) |
747 | int numStripVertices = (0 == numSegmentsLog2) ? 4 : ((1 << numSegmentsLog2) + 3) * 2; |
748 | |
749 | // Draw non-scissored strokes. |
750 | int baseInstance = fBaseInstances[(int)GrScissorTest::kDisabled].fStrokes[numSegmentsLog2]; |
751 | int startIdx = startIndices[(int)GrScissorTest::kDisabled]->fStrokes[numSegmentsLog2]; |
752 | int endIdx = batch.fNonScissorEndInstances->fStrokes[numSegmentsLog2]; |
753 | SkASSERT(endIdx >= startIdx); |
754 | if (int instanceCount = endIdx - startIdx) { |
755 | flushState->setScissorRect(drawBounds); |
756 | flushState->drawInstanced(instanceCount, baseInstance + startIdx, numStripVertices, 0); |
757 | } |
758 | |
759 | // Draw scissored strokes. |
760 | baseInstance = fBaseInstances[(int)GrScissorTest::kEnabled].fStrokes[numSegmentsLog2]; |
761 | startIdx = startIndices[(int)GrScissorTest::kEnabled]->fStrokes[numSegmentsLog2]; |
762 | for (int i = startScissorSubBatch; i < batch.fEndScissorSubBatch; ++i) { |
763 | const ScissorSubBatch& subBatch = fScissorSubBatches[i]; |
764 | endIdx = subBatch.fEndInstances->fStrokes[numSegmentsLog2]; |
765 | SkASSERT(endIdx >= startIdx); |
766 | if (int instanceCount = endIdx - startIdx) { |
767 | flushState->setScissorRect(subBatch.fScissor); |
768 | flushState->drawInstanced(instanceCount, baseInstance + startIdx, numStripVertices, 0); |
769 | startIdx = endIdx; |
770 | } |
771 | } |
772 | } |
773 | |
774 | template<int GrCCStrokeGeometry::InstanceTallies::* InstanceType> |
775 | void GrCCStroker::drawConnectingGeometry(GrOpFlushState* flushState, const GrPipeline& pipeline, |
776 | const GrCCCoverageProcessor& processor, |
777 | const Batch& batch, const InstanceTallies* startIndices[2], |
778 | int startScissorSubBatch, |
779 | const SkIRect& drawBounds) const { |
780 | processor.bindPipeline(flushState, pipeline, SkRect::Make(drawBounds)); |
781 | processor.bindBuffers(flushState->opsRenderPass(), fInstanceBuffer.get()); |
782 | |
783 | // Append non-scissored meshes. |
784 | int baseInstance = fBaseInstances[(int)GrScissorTest::kDisabled].*InstanceType; |
785 | int startIdx = startIndices[(int)GrScissorTest::kDisabled]->*InstanceType; |
786 | int endIdx = batch.fNonScissorEndInstances->*InstanceType; |
787 | SkASSERT(endIdx >= startIdx); |
788 | if (int instanceCount = endIdx - startIdx) { |
789 | flushState->setScissorRect(drawBounds); |
790 | processor.drawInstances(flushState->opsRenderPass(), instanceCount, |
791 | baseInstance + startIdx); |
792 | } |
793 | |
794 | // Append scissored meshes. |
795 | baseInstance = fBaseInstances[(int)GrScissorTest::kEnabled].*InstanceType; |
796 | startIdx = startIndices[(int)GrScissorTest::kEnabled]->*InstanceType; |
797 | for (int i = startScissorSubBatch; i < batch.fEndScissorSubBatch; ++i) { |
798 | const ScissorSubBatch& subBatch = fScissorSubBatches[i]; |
799 | endIdx = subBatch.fEndInstances->*InstanceType; |
800 | SkASSERT(endIdx >= startIdx); |
801 | if (int instanceCount = endIdx - startIdx) { |
802 | flushState->setScissorRect(subBatch.fScissor); |
803 | processor.drawInstances(flushState->opsRenderPass(), instanceCount, |
804 | baseInstance + startIdx); |
805 | startIdx = endIdx; |
806 | } |
807 | } |
808 | } |
809 | |