1/*-------------------------------------------------------------------------
2 *
3 * jsonb.h
4 * Declarations for jsonb data type support.
5 *
6 * Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 * src/include/utils/jsonb.h
9 *
10 *-------------------------------------------------------------------------
11 */
12#ifndef __JSONB_H__
13#define __JSONB_H__
14
15#include "lib/stringinfo.h"
16#include "utils/array.h"
17#include "utils/numeric.h"
18
19/* Tokens used when sequentially processing a jsonb value */
20typedef enum
21{
22 WJB_DONE,
23 WJB_KEY,
24 WJB_VALUE,
25 WJB_ELEM,
26 WJB_BEGIN_ARRAY,
27 WJB_END_ARRAY,
28 WJB_BEGIN_OBJECT,
29 WJB_END_OBJECT
30} JsonbIteratorToken;
31
32/* Strategy numbers for GIN index opclasses */
33#define JsonbContainsStrategyNumber 7
34#define JsonbExistsStrategyNumber 9
35#define JsonbExistsAnyStrategyNumber 10
36#define JsonbExistsAllStrategyNumber 11
37#define JsonbJsonpathExistsStrategyNumber 15
38#define JsonbJsonpathPredicateStrategyNumber 16
39
40
41/*
42 * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
43 * keys and values. The storage format is text. The first byte of the text
44 * string distinguishes whether this is a key (always a string), null value,
45 * boolean value, numeric value, or string value. However, array elements
46 * that are strings are marked as though they were keys; this imprecision
47 * supports the definition of the "exists" operator, which treats array
48 * elements like keys. The remainder of the text string is empty for a null
49 * value, "t" or "f" for a boolean value, a normalized print representation of
50 * a numeric value, or the text of a string value. However, if the length of
51 * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
52 * the text representation and store an 8-hex-digit representation of the
53 * uint32 hash value, marking the prefix byte with an additional bit to
54 * distinguish that this has happened. Hashing long strings saves space and
55 * ensures that we won't overrun the maximum entry length for a GIN index.
56 * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
57 * to ensure that the on-disk text datum will have a short varlena header.)
58 * Note that when any hashed item appears in a query, we must recheck index
59 * matches against the heap tuple; currently, this costs nothing because we
60 * must always recheck for other reasons.
61 */
62#define JGINFLAG_KEY 0x01 /* key (or string array element) */
63#define JGINFLAG_NULL 0x02 /* null value */
64#define JGINFLAG_BOOL 0x03 /* boolean value */
65#define JGINFLAG_NUM 0x04 /* numeric value */
66#define JGINFLAG_STR 0x05 /* string value (if not an array element) */
67#define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
68#define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
69
70/* Convenience macros */
71#define DatumGetJsonbP(d) ((Jsonb *) PG_DETOAST_DATUM(d))
72#define DatumGetJsonbPCopy(d) ((Jsonb *) PG_DETOAST_DATUM_COPY(d))
73#define JsonbPGetDatum(p) PointerGetDatum(p)
74#define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
75#define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
76#define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
77
78typedef struct JsonbPair JsonbPair;
79typedef struct JsonbValue JsonbValue;
80
81/*
82 * Jsonbs are varlena objects, so must meet the varlena convention that the
83 * first int32 of the object contains the total object size in bytes. Be sure
84 * to use VARSIZE() and SET_VARSIZE() to access it, though!
85 *
86 * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
87 * representation. Often, JsonbValues are just shims through which a Jsonb
88 * buffer is accessed, but they can also be deep copied and passed around.
89 *
90 * Jsonb is a tree structure. Each node in the tree consists of a JEntry
91 * header and a variable-length content (possibly of zero size). The JEntry
92 * header indicates what kind of a node it is, e.g. a string or an array,
93 * and provides the length of its variable-length portion.
94 *
95 * The JEntry and the content of a node are not stored physically together.
96 * Instead, the container array or object has an array that holds the JEntrys
97 * of all the child nodes, followed by their variable-length portions.
98 *
99 * The root node is an exception; it has no parent array or object that could
100 * hold its JEntry. Hence, no JEntry header is stored for the root node. It
101 * is implicitly known that the root node must be an array or an object,
102 * so we can get away without the type indicator as long as we can distinguish
103 * the two. For that purpose, both an array and an object begin with a uint32
104 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
105 * scalar value needs to be stored as a Jsonb value, what we actually store is
106 * an array with one element, with the flags in the array's header field set
107 * to JB_FSCALAR | JB_FARRAY.
108 *
109 * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
110 * the variable-length portion of some node types is aligned to a 4-byte
111 * boundary, while others are not. When alignment is needed, the padding is
112 * in the beginning of the node that requires it. For example, if a numeric
113 * node is stored after a string node, so that the numeric node begins at
114 * offset 3, the variable-length portion of the numeric node will begin with
115 * one padding byte so that the actual numeric data is 4-byte aligned.
116 */
117
118/*
119 * JEntry format.
120 *
121 * The least significant 28 bits store either the data length of the entry,
122 * or its end+1 offset from the start of the variable-length portion of the
123 * containing object. The next three bits store the type of the entry, and
124 * the high-order bit tells whether the least significant bits store a length
125 * or an offset.
126 *
127 * The reason for the offset-or-length complication is to compromise between
128 * access speed and data compressibility. In the initial design each JEntry
129 * always stored an offset, but this resulted in JEntry arrays with horrible
130 * compressibility properties, so that TOAST compression of a JSONB did not
131 * work well. Storing only lengths would greatly improve compressibility,
132 * but it makes random access into large arrays expensive (O(N) not O(1)).
133 * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
134 * a length in the rest. This results in reasonably compressible data (as
135 * long as the stride isn't too small). We may have to examine as many as
136 * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
137 * given item, but that's still O(1) no matter how large the container is.
138 *
139 * We could avoid eating a flag bit for this purpose if we were to store
140 * the stride in the container header, or if we were willing to treat the
141 * stride as an unchangeable constant. Neither of those options is very
142 * attractive though.
143 */
144typedef uint32 JEntry;
145
146#define JENTRY_OFFLENMASK 0x0FFFFFFF
147#define JENTRY_TYPEMASK 0x70000000
148#define JENTRY_HAS_OFF 0x80000000
149
150/* values stored in the type bits */
151#define JENTRY_ISSTRING 0x00000000
152#define JENTRY_ISNUMERIC 0x10000000
153#define JENTRY_ISBOOL_FALSE 0x20000000
154#define JENTRY_ISBOOL_TRUE 0x30000000
155#define JENTRY_ISNULL 0x40000000
156#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
157
158/* Access macros. Note possible multiple evaluations */
159#define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
160#define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
161#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
162#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
163#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
164#define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
165#define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
166#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
167#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
168
169/* Macro for advancing an offset variable to the next JEntry */
170#define JBE_ADVANCE_OFFSET(offset, je) \
171 do { \
172 JEntry je_ = (je); \
173 if (JBE_HAS_OFF(je_)) \
174 (offset) = JBE_OFFLENFLD(je_); \
175 else \
176 (offset) += JBE_OFFLENFLD(je_); \
177 } while(0)
178
179/*
180 * We store an offset, not a length, every JB_OFFSET_STRIDE children.
181 * Caution: this macro should only be referenced when creating a JSONB
182 * value. When examining an existing value, pay attention to the HAS_OFF
183 * bits instead. This allows changes in the offset-placement heuristic
184 * without breaking on-disk compatibility.
185 */
186#define JB_OFFSET_STRIDE 32
187
188/*
189 * A jsonb array or object node, within a Jsonb Datum.
190 *
191 * An array has one child for each element, stored in array order.
192 *
193 * An object has two children for each key/value pair. The keys all appear
194 * first, in key sort order; then the values appear, in an order matching the
195 * key order. This arrangement keeps the keys compact in memory, making a
196 * search for a particular key more cache-friendly.
197 */
198typedef struct JsonbContainer
199{
200 uint32 header; /* number of elements or key/value pairs, and
201 * flags */
202 JEntry children[FLEXIBLE_ARRAY_MEMBER];
203
204 /* the data for each child node follows. */
205} JsonbContainer;
206
207/* flags for the header-field in JsonbContainer */
208#define JB_CMASK 0x0FFFFFFF /* mask for count field */
209#define JB_FSCALAR 0x10000000 /* flag bits */
210#define JB_FOBJECT 0x20000000
211#define JB_FARRAY 0x40000000
212
213/* convenience macros for accessing a JsonbContainer struct */
214#define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
215#define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
216#define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
217#define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
218
219/* The top-level on-disk format for a jsonb datum. */
220typedef struct
221{
222 int32 vl_len_; /* varlena header (do not touch directly!) */
223 JsonbContainer root;
224} Jsonb;
225
226/* convenience macros for accessing the root container in a Jsonb datum */
227#define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
228#define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
229#define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
230#define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
231
232
233enum jbvType
234{
235 /* Scalar types */
236 jbvNull = 0x0,
237 jbvString,
238 jbvNumeric,
239 jbvBool,
240 /* Composite types */
241 jbvArray = 0x10,
242 jbvObject,
243 /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
244 jbvBinary
245};
246
247/*
248 * JsonbValue: In-memory representation of Jsonb. This is a convenient
249 * deserialized representation, that can easily support using the "val"
250 * union across underlying types during manipulation. The Jsonb on-disk
251 * representation has various alignment considerations.
252 */
253struct JsonbValue
254{
255 enum jbvType type; /* Influences sort order */
256
257 union
258 {
259 Numeric numeric;
260 bool boolean;
261 struct
262 {
263 int len;
264 char *val; /* Not necessarily null-terminated */
265 } string; /* String primitive type */
266
267 struct
268 {
269 int nElems;
270 JsonbValue *elems;
271 bool rawScalar; /* Top-level "raw scalar" array? */
272 } array; /* Array container type */
273
274 struct
275 {
276 int nPairs; /* 1 pair, 2 elements */
277 JsonbPair *pairs;
278 } object; /* Associative container type */
279
280 struct
281 {
282 int len;
283 JsonbContainer *data;
284 } binary; /* Array or object, in on-disk format */
285 } val;
286};
287
288#define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
289 (jsonbval)->type <= jbvBool)
290
291/*
292 * Key/value pair within an Object.
293 *
294 * This struct type is only used briefly while constructing a Jsonb; it is
295 * *not* the on-disk representation.
296 *
297 * Pairs with duplicate keys are de-duplicated. We store the originally
298 * observed pair ordering for the purpose of removing duplicates in a
299 * well-defined way (which is "last observed wins").
300 */
301struct JsonbPair
302{
303 JsonbValue key; /* Must be a jbvString */
304 JsonbValue value; /* May be of any type */
305 uint32 order; /* Pair's index in original sequence */
306};
307
308/* Conversion state used when parsing Jsonb from text, or for type coercion */
309typedef struct JsonbParseState
310{
311 JsonbValue contVal;
312 Size size;
313 struct JsonbParseState *next;
314} JsonbParseState;
315
316/*
317 * JsonbIterator holds details of the type for each iteration. It also stores a
318 * Jsonb varlena buffer, which can be directly accessed in some contexts.
319 */
320typedef enum
321{
322 JBI_ARRAY_START,
323 JBI_ARRAY_ELEM,
324 JBI_OBJECT_START,
325 JBI_OBJECT_KEY,
326 JBI_OBJECT_VALUE
327} JsonbIterState;
328
329typedef struct JsonbIterator
330{
331 /* Container being iterated */
332 JsonbContainer *container;
333 uint32 nElems; /* Number of elements in children array (will
334 * be nPairs for objects) */
335 bool isScalar; /* Pseudo-array scalar value? */
336 JEntry *children; /* JEntrys for child nodes */
337 /* Data proper. This points to the beginning of the variable-length data */
338 char *dataProper;
339
340 /* Current item in buffer (up to nElems) */
341 int curIndex;
342
343 /* Data offset corresponding to current item */
344 uint32 curDataOffset;
345
346 /*
347 * If the container is an object, we want to return keys and values
348 * alternately; so curDataOffset points to the current key, and
349 * curValueOffset points to the current value.
350 */
351 uint32 curValueOffset;
352
353 /* Private state */
354 JsonbIterState state;
355
356 struct JsonbIterator *parent;
357} JsonbIterator;
358
359
360/* Support functions */
361extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
362extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
363extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
364extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
365 uint32 flags,
366 JsonbValue *key);
367extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
368 uint32 i);
369extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
370 JsonbIteratorToken seq, JsonbValue *jbVal);
371extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
372extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
373 bool skipNested);
374extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
375extern bool JsonbDeepContains(JsonbIterator **val,
376 JsonbIterator **mContained);
377extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
378extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
379 uint64 *hash, uint64 seed);
380
381/* jsonb.c support functions */
382extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
383 int estimated_len);
384extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
385 int estimated_len);
386extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
387extern const char *JsonbTypeName(JsonbValue *jb);
388
389
390#endif /* __JSONB_H__ */
391