json.c source code [PostgreSQL/src/backend/utils/adt/json.c]

1	/-------------------------------------------------------------------------*
2	*
3	* json.c
4	* JSON data type support.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	* IDENTIFICATION
10	* src/backend/utils/adt/json.c
11	*
12	*-------------------------------------------------------------------------
13	*/
14	#include "postgres.h"
15
16	#include "access/htup_details.h"
17	#include "access/transam.h"
18	#include "catalog/pg_type.h"
19	#include "executor/spi.h"
20	#include "funcapi.h"
21	#include "lib/stringinfo.h"
22	#include "libpq/pqformat.h"
23	#include "mb/pg_wchar.h"
24	#include "miscadmin.h"
25	#include "parser/parse_coerce.h"
26	#include "utils/array.h"
27	#include "utils/builtins.h"
28	#include "utils/date.h"
29	#include "utils/datetime.h"
30	#include "utils/lsyscache.h"
31	#include "utils/json.h"
32	#include "utils/jsonapi.h"
33	#include "utils/typcache.h"
34	#include "utils/syscache.h"
35
36	/*
37	* The context of the parser is maintained by the recursive descent
38	* mechanism, but is passed explicitly to the error reporting routine
39	* for better diagnostics.
40	*/
41	typedef enum / contexts of JSON parser /
42	{
43	JSON_PARSE_VALUE, / expecting a value /
44	JSON_PARSE_STRING, / expecting a string (for a field name) /
45	JSON_PARSE_ARRAY_START, / saw '[', expecting value or ']' /
46	JSON_PARSE_ARRAY_NEXT, / saw array element, expecting ',' or ']' /
47	JSON_PARSE_OBJECT_START, / saw '{', expecting label or '}' /
48	JSON_PARSE_OBJECT_LABEL, / saw object label, expecting ':' /
49	JSON_PARSE_OBJECT_NEXT, / saw object value, expecting ',' or '}' /
50	JSON_PARSE_OBJECT_COMMA, / saw object ',', expecting next label /
51	JSON_PARSE_END / saw the end of a document, expect nothing /
52	} JsonParseContext;
53
54	typedef enum / type categories for datum_to_json /
55	{
56	JSONTYPE_NULL, / null, so we didn't bother to identify /
57	JSONTYPE_BOOL, / boolean (built-in types only) /
58	JSONTYPE_NUMERIC, / numeric (ditto) /
59	JSONTYPE_DATE, / we use special formatting for datetimes /
60	JSONTYPE_TIMESTAMP,
61	JSONTYPE_TIMESTAMPTZ,
62	JSONTYPE_JSON, / JSON itself (and JSONB) /
63	JSONTYPE_ARRAY, / array /
64	JSONTYPE_COMPOSITE, / composite /
65	JSONTYPE_CAST, / something with an explicit cast to JSON /
66	JSONTYPE_OTHER / all else /
67	} JsonTypeCategory;
68
69	typedef struct JsonAggState
70	{
71	StringInfo str;
72	JsonTypeCategory key_category;
73	Oid key_output_func;
74	JsonTypeCategory val_category;
75	Oid val_output_func;
76	} JsonAggState;
77
78	static inline void json_lex(JsonLexContext *lex);
79	static inline void json_lex_string(JsonLexContext *lex);
80	static inline void json_lex_number(JsonLexContext lex, char* *s,
81	bool num_err, int* *total_len);
82	static inline void parse_scalar(JsonLexContext lex, JsonSemAction sem);
83	static void parse_object_field(JsonLexContext lex, JsonSemAction sem);
84	static void parse_object(JsonLexContext lex, JsonSemAction sem);
85	static void parse_array_element(JsonLexContext lex, JsonSemAction sem);
86	static void parse_array(JsonLexContext lex, JsonSemAction sem);
87	static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex) pg_attribute_noreturn();
88	static void report_invalid_token(JsonLexContext *lex) pg_attribute_noreturn();
89	static int report_json_context(JsonLexContext *lex);
90	static char extract_mb_char(char* *s);
91	static void composite_to_json(Datum composite, StringInfo result,
92	bool use_line_feeds);
93	static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
94	Datum vals, bool nulls, int *valcount,
95	JsonTypeCategory tcategory, Oid outfuncoid,
96	bool use_line_feeds);
97	static void array_to_json_internal(Datum array, StringInfo result,
98	bool use_line_feeds);
99	static void json_categorize_type(Oid typoid,
100	JsonTypeCategory *tcategory,
101	Oid *outfuncoid);
102	static void datum_to_json(Datum val, bool is_null, StringInfo result,
103	JsonTypeCategory tcategory, Oid outfuncoid,
104	bool key_scalar);
105	static void add_json(Datum val, bool is_null, StringInfo result,
106	Oid val_type, bool key_scalar);
107	static text catenate_stringinfo_string(StringInfo buffer, const* char *addon);
108
109	/ the null action object used for pure validation /
110	static JsonSemAction nullSemAction =
111	{
112	NULL, NULL, NULL, NULL, NULL,
113	NULL, NULL, NULL, NULL, NULL
114	};
115
116	/ Recursive Descent parser support routines /
117
118	/*
119	* lex_peek
120	*
121	* what is the current look_ahead token?
122	*/
123	static inline JsonTokenType
124	lex_peek(JsonLexContext *lex)
125	{
126	return lex->token_type;
127	}
128
129	/*
130	* lex_accept
131	*
132	* accept the look_ahead token and move the lexer to the next token if the
133	* look_ahead token matches the token parameter. In that case, and if required,
134	* also hand back the de-escaped lexeme.
135	*
136	* returns true if the token matched, false otherwise.
137	*/
138	static inline bool
139	lex_accept(JsonLexContext lex, JsonTokenType token, char* **lexeme)
140	{
141	if (lex->token_type == token)
142	{
143	if (lexeme != NULL)
144	{
145	if (lex->token_type == JSON_TOKEN_STRING)
146	{
147	if (lex->strval != NULL)
148	*lexeme = pstrdup(lex->strval->data);
149	}
150	else
151	{
152	int len = (lex->token_terminator - lex->token_start);
153	char *tokstr = palloc(len + `1`);
154
155	memcpy(tokstr, lex->token_start, len);
156	tokstr[len] = `'\0'`;
157	*lexeme = tokstr;
158	}
159	}
160	json_lex(lex);
161	return true;
162	}
163	return false;
164	}
165
166	/*
167	* lex_accept
168	*
169	* move the lexer to the next token if the current look_ahead token matches
170	* the parameter token. Otherwise, report an error.
171	*/
172	static inline void
173	lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
174	{
175	if (!lex_accept(lex, token, NULL))
176	report_parse_error(ctx, lex);
177	}
178
179	/ chars to consider as part of an alphanumeric token /
180	#define JSON_ALPHANUMERIC_CHAR(c) \
181	(((c) >= 'a' && (c) <= 'z') \|\| \
182	((c) >= 'A' && (c) <= 'Z') \|\| \
183	((c) >= '0' && (c) <= '9') \|\| \
184	(c) == '_' \|\| \
185	IS_HIGHBIT_SET(c))
186
187	/*
188	* Utility function to check if a string is a valid JSON number.
189	*
190	* str is of length len, and need not be null-terminated.
191	*/
192	bool
193	IsValidJsonNumber(const char str, int* len)
194	{
195	bool numeric_error;
196	int total_len;
197	JsonLexContext dummy_lex;
198
199	if (len <= `0`)
200	return false;
201
202	/*
203	* json_lex_number expects a leading '-' to have been eaten already.
204	*
205	* having to cast away the constness of str is ugly, but there's not much
206	* easy alternative.
207	*/
208	if (*str == `'-'`)
209	{
210	dummy_lex.input = unconstify(char *, str) +`1`;
211	dummy_lex.input_length = len - `1`;
212	}
213	else
214	{
215	dummy_lex.input = unconstify(char *, str);
216	dummy_lex.input_length = len;
217	}
218
219	json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
220
221	return (!numeric_error) && (total_len == dummy_lex.input_length);
222	}
223
224	/*
225	* Input.
226	*/
227	Datum
228	json_in(PG_FUNCTION_ARGS)
229	{
230	char *json = PG_GETARG_CSTRING(`0`);
231	text *result = cstring_to_text(json);
232	JsonLexContext *lex;
233
234	/ validate it /
235	lex = makeJsonLexContext(result, false);
236	pg_parse_json(lex, &nullSemAction);
237
238	/ Internal representation is the same as text, for now /
239	PG_RETURN_TEXT_P(result);
240	}
241
242	/*
243	* Output.
244	*/
245	Datum
246	json_out(PG_FUNCTION_ARGS)
247	{
248	/ we needn't detoast because text_to_cstring will handle that /
249	Datum txt = PG_GETARG_DATUM(`0`);
250
251	PG_RETURN_CSTRING(TextDatumGetCString(txt));
252	}
253
254	/*
255	* Binary send.
256	*/
257	Datum
258	json_send(PG_FUNCTION_ARGS)
259	{
260	text *t = PG_GETARG_TEXT_PP(`0`);
261	StringInfoData buf;
262
263	pq_begintypsend(&buf);
264	pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
265	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
266	}
267
268	/*
269	* Binary receive.
270	*/
271	Datum
272	json_recv(PG_FUNCTION_ARGS)
273	{
274	StringInfo buf = (StringInfo) PG_GETARG_POINTER(`0`);
275	char *str;
276	int nbytes;
277	JsonLexContext *lex;
278
279	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
280
281	/ Validate it. /
282	lex = makeJsonLexContextCstringLen(str, nbytes, false);
283	pg_parse_json(lex, &nullSemAction);
284
285	PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
286	}
287
288	/*
289	* makeJsonLexContext
290	*
291	* lex constructor, with or without StringInfo object
292	* for de-escaped lexemes.
293	*
294	* Without is better as it makes the processing faster, so only make one
295	* if really required.
296	*
297	* If you already have the json as a text* value, use the first of these
298	* functions, otherwise use makeJsonLexContextCstringLen().
299	*/
300	JsonLexContext *
301	makeJsonLexContext(text *json, bool need_escapes)
302	{
303	return makeJsonLexContextCstringLen(VARDATA_ANY(json),
304	VARSIZE_ANY_EXHDR(json),
305	need_escapes);
306	}
307
308	JsonLexContext *
309	makeJsonLexContextCstringLen(char json, int* len, bool need_escapes)
310	{
311	JsonLexContext lex = palloc0(sizeof*(JsonLexContext));
312
313	lex->input = lex->token_terminator = lex->line_start = json;
314	lex->line_number = `1`;
315	lex->input_length = len;
316	if (need_escapes)
317	lex->strval = makeStringInfo();
318	return lex;
319	}
320
321	/*
322	* pg_parse_json
323	*
324	* Publicly visible entry point for the JSON parser.
325	*
326	* lex is a lexing context, set up for the json to be processed by calling
327	* makeJsonLexContext(). sem is a structure of function pointers to semantic
328	* action routines to be called at appropriate spots during parsing, and a
329	* pointer to a state object to be passed to those routines.
330	*/
331	void
332	pg_parse_json(JsonLexContext lex, JsonSemAction sem)
333	{
334	JsonTokenType tok;
335
336	/ get the initial token /
337	json_lex(lex);
338
339	tok = lex_peek(lex);
340
341	/ parse by recursive descent /
342	switch (tok)
343	{
344	case JSON_TOKEN_OBJECT_START:
345	parse_object(lex, sem);
346	break;
347	case JSON_TOKEN_ARRAY_START:
348	parse_array(lex, sem);
349	break;
350	default:
351	parse_scalar(lex, sem); / json can be a bare scalar /
352	}
353
354	lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
355
356	}
357
358	/*
359	* json_count_array_elements
360	*
361	* Returns number of array elements in lex context at start of array token
362	* until end of array token at same nesting level.
363	*
364	* Designed to be called from array_start routines.
365	*/
366	int
367	json_count_array_elements(JsonLexContext *lex)
368	{
369	JsonLexContext copylex;
370	int count;
371
372	/*
373	* It's safe to do this with a shallow copy because the lexical routines
374	* don't scribble on the input. They do scribble on the other pointers
375	* etc, so doing this with a copy makes that safe.
376	*/
377	memcpy(&copylex, lex, sizeof(JsonLexContext));
378	copylex.strval = NULL; / not interested in values here /
379	copylex.lex_level++;
380
381	count = `0`;
382	lex_expect(JSON_PARSE_ARRAY_START, &copylex, JSON_TOKEN_ARRAY_START);
383	if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
384	{
385	do
386	{
387	count++;
388	parse_array_element(&copylex, &nullSemAction);
389	}
390	while (lex_accept(&copylex, JSON_TOKEN_COMMA, NULL));
391	}
392	lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex, JSON_TOKEN_ARRAY_END);
393
394	return count;
395	}
396
397	/*
398	* Recursive Descent parse routines. There is one for each structural
399	* element in a json document:
400	* - scalar (string, number, true, false, null)
401	* - array ( [ ] )
402	* - array element
403	* - object ( { } )
404	* - object field
405	*/
406	static inline void
407	parse_scalar(JsonLexContext lex, JsonSemAction sem)
408	{
409	char *val = NULL;
410	json_scalar_action sfunc = sem->scalar;
411	char **valaddr;
412	JsonTokenType tok = lex_peek(lex);
413
414	valaddr = sfunc == NULL ? NULL : &val;
415
416	/ a scalar must be a string, a number, true, false, or null /
417	switch (tok)
418	{
419	case JSON_TOKEN_TRUE:
420	lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
421	break;
422	case JSON_TOKEN_FALSE:
423	lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
424	break;
425	case JSON_TOKEN_NULL:
426	lex_accept(lex, JSON_TOKEN_NULL, valaddr);
427	break;
428	case JSON_TOKEN_NUMBER:
429	lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
430	break;
431	case JSON_TOKEN_STRING:
432	lex_accept(lex, JSON_TOKEN_STRING, valaddr);
433	break;
434	default:
435	report_parse_error(JSON_PARSE_VALUE, lex);
436	}
437
438	if (sfunc != NULL)
439	(*sfunc) (sem->semstate, val, tok);
440	}
441
442	static void
443	parse_object_field(JsonLexContext lex, JsonSemAction sem)
444	{
445	/*
446	* An object field is "fieldname" : value where value can be a scalar,
447	* object or array. Note: in user-facing docs and error messages, we
448	* generally call a field name a "key".
449	*/
450
451	char fname = NULL; /* keep compiler quiet /
452	json_ofield_action ostart = sem->object_field_start;
453	json_ofield_action oend = sem->object_field_end;
454	bool isnull;
455	char **fnameaddr = NULL;
456	JsonTokenType tok;
457
458	if (ostart != NULL \|\| oend != NULL)
459	fnameaddr = &fname;
460
461	if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
462	report_parse_error(JSON_PARSE_STRING, lex);
463
464	lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
465
466	tok = lex_peek(lex);
467	isnull = tok == JSON_TOKEN_NULL;
468
469	if (ostart != NULL)
470	(*ostart) (sem->semstate, fname, isnull);
471
472	switch (tok)
473	{
474	case JSON_TOKEN_OBJECT_START:
475	parse_object(lex, sem);
476	break;
477	case JSON_TOKEN_ARRAY_START:
478	parse_array(lex, sem);
479	break;
480	default:
481	parse_scalar(lex, sem);
482	}
483
484	if (oend != NULL)
485	(*oend) (sem->semstate, fname, isnull);
486	}
487
488	static void
489	parse_object(JsonLexContext lex, JsonSemAction sem)
490	{
491	/*
492	* an object is a possibly empty sequence of object fields, separated by
493	* commas and surrounded by curly braces.
494	*/
495	json_struct_action ostart = sem->object_start;
496	json_struct_action oend = sem->object_end;
497	JsonTokenType tok;
498
499	check_stack_depth();
500
501	if (ostart != NULL)
502	(*ostart) (sem->semstate);
503
504	/*
505	* Data inside an object is at a higher nesting level than the object
506	* itself. Note that we increment this after we call the semantic routine
507	* for the object start and restore it before we call the routine for the
508	* object end.
509	*/
510	lex->lex_level++;
511
512	/ we know this will succeed, just clearing the token /
513	lex_expect(JSON_PARSE_OBJECT_START, lex, JSON_TOKEN_OBJECT_START);
514
515	tok = lex_peek(lex);
516	switch (tok)
517	{
518	case JSON_TOKEN_STRING:
519	parse_object_field(lex, sem);
520	while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
521	parse_object_field(lex, sem);
522	break;
523	case JSON_TOKEN_OBJECT_END:
524	break;
525	default:
526	/ case of an invalid initial token inside the object /
527	report_parse_error(JSON_PARSE_OBJECT_START, lex);
528	}
529
530	lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
531
532	lex->lex_level--;
533
534	if (oend != NULL)
535	(*oend) (sem->semstate);
536	}
537
538	static void
539	parse_array_element(JsonLexContext lex, JsonSemAction sem)
540	{
541	json_aelem_action astart = sem->array_element_start;
542	json_aelem_action aend = sem->array_element_end;
543	JsonTokenType tok = lex_peek(lex);
544
545	bool isnull;
546
547	isnull = tok == JSON_TOKEN_NULL;
548
549	if (astart != NULL)
550	(*astart) (sem->semstate, isnull);
551
552	/ an array element is any object, array or scalar /
553	switch (tok)
554	{
555	case JSON_TOKEN_OBJECT_START:
556	parse_object(lex, sem);
557	break;
558	case JSON_TOKEN_ARRAY_START:
559	parse_array(lex, sem);
560	break;
561	default:
562	parse_scalar(lex, sem);
563	}
564
565	if (aend != NULL)
566	(*aend) (sem->semstate, isnull);
567	}
568
569	static void
570	parse_array(JsonLexContext lex, JsonSemAction sem)
571	{
572	/*
573	* an array is a possibly empty sequence of array elements, separated by
574	* commas and surrounded by square brackets.
575	*/
576	json_struct_action astart = sem->array_start;
577	json_struct_action aend = sem->array_end;
578
579	check_stack_depth();
580
581	if (astart != NULL)
582	(*astart) (sem->semstate);
583
584	/*
585	* Data inside an array is at a higher nesting level than the array
586	* itself. Note that we increment this after we call the semantic routine
587	* for the array start and restore it before we call the routine for the
588	* array end.
589	*/
590	lex->lex_level++;
591
592	lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
593	if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
594	{
595
596	parse_array_element(lex, sem);
597
598	while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
599	parse_array_element(lex, sem);
600	}
601
602	lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
603
604	lex->lex_level--;
605
606	if (aend != NULL)
607	(*aend) (sem->semstate);
608	}
609
610	/*
611	* Lex one token from the input stream.
612	*/
613	static inline void
614	json_lex(JsonLexContext *lex)
615	{
616	char *s;
617	int len;
618
619	/ Skip leading whitespace. /
620	s = lex->token_terminator;
621	len = s - lex->input;
622	while (len < lex->input_length &&
623	(s == `' '` \|\| s == `'\t'` \|\| s == `'\n'` \|\| s == `'\r'`))
624	{
625	if (*s == `'\n'`)
626	++lex->line_number;
627	++s;
628	++len;
629	}
630	lex->token_start = s;
631
632	/ Determine token type. /
633	if (len >= lex->input_length)
634	{
635	lex->token_start = NULL;
636	lex->prev_token_terminator = lex->token_terminator;
637	lex->token_terminator = s;
638	lex->token_type = JSON_TOKEN_END;
639	}
640	else
641	switch (*s)
642	{
643	/ Single-character token, some kind of punctuation mark. /
644	case `'{'`:
645	lex->prev_token_terminator = lex->token_terminator;
646	lex->token_terminator = s + `1`;
647	lex->token_type = JSON_TOKEN_OBJECT_START;
648	break;
649	case `'}'`:
650	lex->prev_token_terminator = lex->token_terminator;
651	lex->token_terminator = s + `1`;
652	lex->token_type = JSON_TOKEN_OBJECT_END;
653	break;
654	case `'['`:
655	lex->prev_token_terminator = lex->token_terminator;
656	lex->token_terminator = s + `1`;
657	lex->token_type = JSON_TOKEN_ARRAY_START;
658	break;
659	case `']'`:
660	lex->prev_token_terminator = lex->token_terminator;
661	lex->token_terminator = s + `1`;
662	lex->token_type = JSON_TOKEN_ARRAY_END;
663	break;
664	case `','`:
665	lex->prev_token_terminator = lex->token_terminator;
666	lex->token_terminator = s + `1`;
667	lex->token_type = JSON_TOKEN_COMMA;
668	break;
669	case `':'`:
670	lex->prev_token_terminator = lex->token_terminator;
671	lex->token_terminator = s + `1`;
672	lex->token_type = JSON_TOKEN_COLON;
673	break;
674	case `'"'`:
675	/ string /
676	json_lex_string(lex);
677	lex->token_type = JSON_TOKEN_STRING;
678	break;
679	case `'-'`:
680	/ Negative number. /
681	json_lex_number(lex, s + `1`, NULL, NULL);
682	lex->token_type = JSON_TOKEN_NUMBER;
683	break;
684	case `'0'`:
685	case `'1'`:
686	case `'2'`:
687	case `'3'`:
688	case `'4'`:
689	case `'5'`:
690	case `'6'`:
691	case `'7'`:
692	case `'8'`:
693	case `'9'`:
694	/ Positive number. /
695	json_lex_number(lex, s, NULL, NULL);
696	lex->token_type = JSON_TOKEN_NUMBER;
697	break;
698	default:
699	{
700	char *p;
701
702	/*
703	* We're not dealing with a string, number, legal
704	* punctuation mark, or end of string. The only legal
705	* tokens we might find here are true, false, and null,
706	* but for error reporting purposes we scan until we see a
707	* non-alphanumeric character. That way, we can report
708	* the whole word as an unexpected token, rather than just
709	* some unintuitive prefix thereof.
710	*/
711	for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
712	/ skip / ;
713
714	/*
715	* We got some sort of unexpected punctuation or an
716	* otherwise unexpected character, so just complain about
717	* that one character.
718	*/
719	if (p == s)
720	{
721	lex->prev_token_terminator = lex->token_terminator;
722	lex->token_terminator = s + `1`;
723	report_invalid_token(lex);
724	}
725
726	/*
727	* We've got a real alphanumeric token here. If it
728	* happens to be true, false, or null, all is well. If
729	* not, error out.
730	*/
731	lex->prev_token_terminator = lex->token_terminator;
732	lex->token_terminator = p;
733	if (p - s == `4`)
734	{
735	if (memcmp(s, "true", `4`) == `0`)
736	lex->token_type = JSON_TOKEN_TRUE;
737	else if (memcmp(s, "null", `4`) == `0`)
738	lex->token_type = JSON_TOKEN_NULL;
739	else
740	report_invalid_token(lex);
741	}
742	else if (p - s == `5` && memcmp(s, "false", `5`) == `0`)
743	lex->token_type = JSON_TOKEN_FALSE;
744	else
745	report_invalid_token(lex);
746
747	}
748	} / end of switch /
749	}
750
751	/*
752	* The next token in the input stream is known to be a string; lex it.
753	*/
754	static inline void
755	json_lex_string(JsonLexContext *lex)
756	{
757	char *s;
758	int len;
759	int hi_surrogate = -`1`;
760
761	if (lex->strval != NULL)
762	resetStringInfo(lex->strval);
763
764	Assert(lex->input_length > `0`);
765	s = lex->token_start;
766	len = lex->token_start - lex->input;
767	for (;;)
768	{
769	s++;
770	len++;
771	/ Premature end of the string. /
772	if (len >= lex->input_length)
773	{
774	lex->token_terminator = s;
775	report_invalid_token(lex);
776	}
777	else if (*s == `'"'`)
778	break;
779	else if ((unsigned char) *s < `32`)
780	{
781	/ Per RFC4627, these characters MUST be escaped. /
782	/ Since s isn't printable, exclude it from the context string /*
783	lex->token_terminator = s;
784	ereport(ERROR,
785	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
786	errmsg("invalid input syntax for type %s", "json"),
787	errdetail("Character with value 0x%02x must be escaped.",
788	(unsigned char) *s),
789	report_json_context(lex)));
790	}
791	else if (*s == `'\\'`)
792	{
793	/ OK, we have an escape character. /
794	s++;
795	len++;
796	if (len >= lex->input_length)
797	{
798	lex->token_terminator = s;
799	report_invalid_token(lex);
800	}
801	else if (*s == `'u'`)
802	{
803	int i;
804	int ch = `0`;
805
806	for (i = `1`; i <= `4`; i++)
807	{
808	s++;
809	len++;
810	if (len >= lex->input_length)
811	{
812	lex->token_terminator = s;
813	report_invalid_token(lex);
814	}
815	else if (s >= `'0'` && s <= `'9'`)
816	ch = (ch * `16`) + (*s - `'0'`);
817	else if (s >= `'a'` && s <= `'f'`)
818	ch = (ch * `16`) + (*s - `'a'`) + `10`;
819	else if (s >= `'A'` && s <= `'F'`)
820	ch = (ch * `16`) + (*s - `'A'`) + `10`;
821	else
822	{
823	lex->token_terminator = s + pg_mblen(s);
824	ereport(ERROR,
825	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
826	errmsg("invalid input syntax for type %s",
827	"json"),
828	errdetail("\"\\u\" must be followed by four hexadecimal digits."),
829	report_json_context(lex)));
830	}
831	}
832	if (lex->strval != NULL)
833	{
834	char utf8str[`5`];
835	int utf8len;
836
837	if (ch >= `0xd800` && ch <= `0xdbff`)
838	{
839	if (hi_surrogate != -`1`)
840	ereport(ERROR,
841	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
842	errmsg("invalid input syntax for type %s",
843	"json"),
844	errdetail("Unicode high surrogate must not follow a high surrogate."),
845	report_json_context(lex)));
846	hi_surrogate = (ch & `0x3ff`) << `10`;
847	continue;
848	}
849	else if (ch >= `0xdc00` && ch <= `0xdfff`)
850	{
851	if (hi_surrogate == -`1`)
852	ereport(ERROR,
853	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
854	errmsg("invalid input syntax for type %s", "json"),
855	errdetail("Unicode low surrogate must follow a high surrogate."),
856	report_json_context(lex)));
857	ch = `0x10000` + hi_surrogate + (ch & `0x3ff`);
858	hi_surrogate = -`1`;
859	}
860
861	if (hi_surrogate != -`1`)
862	ereport(ERROR,
863	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
864	errmsg("invalid input syntax for type %s", "json"),
865	errdetail("Unicode low surrogate must follow a high surrogate."),
866	report_json_context(lex)));
867
868	/*
869	* For UTF8, replace the escape sequence by the actual
870	* utf8 character in lex->strval. Do this also for other
871	* encodings if the escape designates an ASCII character,
872	* otherwise raise an error.
873	*/
874
875	if (ch == `0`)
876	{
877	/ We can't allow this, since our TEXT type doesn't /
878	ereport(ERROR,
879	(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
880	errmsg("unsupported Unicode escape sequence"),
881	errdetail("\\u0000 cannot be converted to text."),
882	report_json_context(lex)));
883	}
884	else if (GetDatabaseEncoding() == PG_UTF8)
885	{
886	unicode_to_utf8(ch, (unsigned char *) utf8str);
887	utf8len = pg_utf_mblen((unsigned char *) utf8str);
888	appendBinaryStringInfo(lex->strval, utf8str, utf8len);
889	}
890	else if (ch <= `0x007f`)
891	{
892	/*
893	* This is the only way to designate things like a
894	* form feed character in JSON, so it's useful in all
895	* encodings.
896	*/
897	appendStringInfoChar(lex->strval, (char) ch);
898	}
899	else
900	{
901	ereport(ERROR,
902	(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
903	errmsg("unsupported Unicode escape sequence"),
904	errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
905	report_json_context(lex)));
906	}
907
908	}
909	}
910	else if (lex->strval != NULL)
911	{
912	if (hi_surrogate != -`1`)
913	ereport(ERROR,
914	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
915	errmsg("invalid input syntax for type %s",
916	"json"),
917	errdetail("Unicode low surrogate must follow a high surrogate."),
918	report_json_context(lex)));
919
920	switch (*s)
921	{
922	case `'"'`:
923	case `'\\'`:
924	case `'/'`:
925	appendStringInfoChar(lex->strval, *s);
926	break;
927	case `'b'`:
928	appendStringInfoChar(lex->strval, `'\b'`);
929	break;
930	case `'f'`:
931	appendStringInfoChar(lex->strval, `'\f'`);
932	break;
933	case `'n'`:
934	appendStringInfoChar(lex->strval, `'\n'`);
935	break;
936	case `'r'`:
937	appendStringInfoChar(lex->strval, `'\r'`);
938	break;
939	case `'t'`:
940	appendStringInfoChar(lex->strval, `'\t'`);
941	break;
942	default:
943	/ Not a valid string escape, so error out. /
944	lex->token_terminator = s + pg_mblen(s);
945	ereport(ERROR,
946	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
947	errmsg("invalid input syntax for type %s",
948	"json"),
949	errdetail("Escape sequence \"\\%s\" is invalid.",
950	extract_mb_char(s)),
951	report_json_context(lex)));
952	}
953	}
954	else if (strchr("\"\\/bfnrt", *s) == NULL)
955	{
956	/*
957	* Simpler processing if we're not bothered about de-escaping
958	*
959	* It's very tempting to remove the strchr() call here and
960	* replace it with a switch statement, but testing so far has
961	* shown it's not a performance win.
962	*/
963	lex->token_terminator = s + pg_mblen(s);
964	ereport(ERROR,
965	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
966	errmsg("invalid input syntax for type %s", "json"),
967	errdetail("Escape sequence \"\\%s\" is invalid.",
968	extract_mb_char(s)),
969	report_json_context(lex)));
970	}
971
972	}
973	else if (lex->strval != NULL)
974	{
975	if (hi_surrogate != -`1`)
976	ereport(ERROR,
977	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
978	errmsg("invalid input syntax for type %s", "json"),
979	errdetail("Unicode low surrogate must follow a high surrogate."),
980	report_json_context(lex)));
981
982	appendStringInfoChar(lex->strval, *s);
983	}
984
985	}
986
987	if (hi_surrogate != -`1`)
988	ereport(ERROR,
989	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
990	errmsg("invalid input syntax for type %s", "json"),
991	errdetail("Unicode low surrogate must follow a high surrogate."),
992	report_json_context(lex)));
993
994	/ Hooray, we found the end of the string! /
995	lex->prev_token_terminator = lex->token_terminator;
996	lex->token_terminator = s + `1`;
997	}
998
999	/*
1000	* The next token in the input stream is known to be a number; lex it.
1001	*
1002	* In JSON, a number consists of four parts:
1003	*
1004	* (1) An optional minus sign ('-').
1005	*
1006	* (2) Either a single '0', or a string of one or more digits that does not
1007	* begin with a '0'.
1008	*
1009	* (3) An optional decimal part, consisting of a period ('.') followed by
1010	* one or more digits. (Note: While this part can be omitted
1011	* completely, it's not OK to have only the decimal point without
1012	* any digits afterwards.)
1013	*
1014	* (4) An optional exponent part, consisting of 'e' or 'E', optionally
1015	* followed by '+' or '-', followed by one or more digits. (Note:
1016	* As with the decimal part, if 'e' or 'E' is present, it must be
1017	* followed by at least one digit.)
1018	*
1019	* The 's' argument to this function points to the ostensible beginning
1020	* of part 2 - i.e. the character after any optional minus sign, or the
1021	* first character of the string if there is none.
1022	*
1023	* If num_err is not NULL, we return an error flag to *num_err rather than
1024	* raising an error for a badly-formed number. Also, if total_len is not NULL
1025	* the distance from lex->input to the token end+1 is returned to *total_len.
1026	*/
1027	static inline void
1028	json_lex_number(JsonLexContext lex, char* *s,
1029	bool num_err, int* *total_len)
1030	{
1031	bool error = false;
1032	int len = s - lex->input;
1033
1034	/ Part (1): leading sign indicator. /
1035	/ Caller already did this for us; so do nothing. /
1036
1037	/ Part (2): parse main digit string. /
1038	if (len < lex->input_length && *s == `'0'`)
1039	{
1040	s++;
1041	len++;
1042	}
1043	else if (len < lex->input_length && s >= `'1'` && s <= `'9'`)
1044	{
1045	do
1046	{
1047	s++;
1048	len++;
1049	} while (len < lex->input_length && s >= `'0'` && s <= `'9'`);
1050	}
1051	else
1052	error = true;
1053
1054	/ Part (3): parse optional decimal portion. /
1055	if (len < lex->input_length && *s == `'.'`)
1056	{
1057	s++;
1058	len++;
1059	if (len == lex->input_length \|\| s < `'0'` \|\| s > `'9'`)
1060	error = true;
1061	else
1062	{
1063	do
1064	{
1065	s++;
1066	len++;
1067	} while (len < lex->input_length && s >= `'0'` && s <= `'9'`);
1068	}
1069	}
1070
1071	/ Part (4): parse optional exponent. /
1072	if (len < lex->input_length && (s == `'e'` \|\| s == `'E'`))
1073	{
1074	s++;
1075	len++;
1076	if (len < lex->input_length && (s == `'+'` \|\| s == `'-'`))
1077	{
1078	s++;
1079	len++;
1080	}
1081	if (len == lex->input_length \|\| s < `'0'` \|\| s > `'9'`)
1082	error = true;
1083	else
1084	{
1085	do
1086	{
1087	s++;
1088	len++;
1089	} while (len < lex->input_length && s >= `'0'` && s <= `'9'`);
1090	}
1091	}
1092
1093	/*
1094	* Check for trailing garbage. As in json_lex(), any alphanumeric stuff
1095	* here should be considered part of the token for error-reporting
1096	* purposes.
1097	*/
1098	for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
1099	error = true;
1100
1101	if (total_len != NULL)
1102	*total_len = len;
1103
1104	if (num_err != NULL)
1105	{
1106	/ let the caller handle any error /
1107	*num_err = error;
1108	}
1109	else
1110	{
1111	/ return token endpoint /
1112	lex->prev_token_terminator = lex->token_terminator;
1113	lex->token_terminator = s;
1114	/ handle error if any /
1115	if (error)
1116	report_invalid_token(lex);
1117	}
1118	}
1119
1120	/*
1121	* Report a parse error.
1122	*
1123	* lex->token_start and lex->token_terminator must identify the current token.
1124	*/
1125	static void
1126	report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
1127	{
1128	char *token;
1129	int toklen;
1130
1131	/ Handle case where the input ended prematurely. /
1132	if (lex->token_start == NULL \|\| lex->token_type == JSON_TOKEN_END)
1133	ereport(ERROR,
1134	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1135	errmsg("invalid input syntax for type %s", "json"),
1136	errdetail("The input string ended unexpectedly."),
1137	report_json_context(lex)));
1138
1139	/ Separate out the current token. /
1140	toklen = lex->token_terminator - lex->token_start;
1141	token = palloc(toklen + `1`);
1142	memcpy(token, lex->token_start, toklen);
1143	token[toklen] = `'\0'`;
1144
1145	/ Complain, with the appropriate detail message. /
1146	if (ctx == JSON_PARSE_END)
1147	ereport(ERROR,
1148	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1149	errmsg("invalid input syntax for type %s", "json"),
1150	errdetail("Expected end of input, but found \"%s\".",
1151	token),
1152	report_json_context(lex)));
1153	else
1154	{
1155	switch (ctx)
1156	{
1157	case JSON_PARSE_VALUE:
1158	ereport(ERROR,
1159	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1160	errmsg("invalid input syntax for type %s", "json"),
1161	errdetail("Expected JSON value, but found \"%s\".",
1162	token),
1163	report_json_context(lex)));
1164	break;
1165	case JSON_PARSE_STRING:
1166	ereport(ERROR,
1167	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1168	errmsg("invalid input syntax for type %s", "json"),
1169	errdetail("Expected string, but found \"%s\".",
1170	token),
1171	report_json_context(lex)));
1172	break;
1173	case JSON_PARSE_ARRAY_START:
1174	ereport(ERROR,
1175	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1176	errmsg("invalid input syntax for type %s", "json"),
1177	errdetail("Expected array element or \"]\", but found \"%s\".",
1178	token),
1179	report_json_context(lex)));
1180	break;
1181	case JSON_PARSE_ARRAY_NEXT:
1182	ereport(ERROR,
1183	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1184	errmsg("invalid input syntax for type %s", "json"),
1185	errdetail("Expected \",\" or \"]\", but found \"%s\".",
1186	token),
1187	report_json_context(lex)));
1188	break;
1189	case JSON_PARSE_OBJECT_START:
1190	ereport(ERROR,
1191	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1192	errmsg("invalid input syntax for type %s", "json"),
1193	errdetail("Expected string or \"}\", but found \"%s\".",
1194	token),
1195	report_json_context(lex)));
1196	break;
1197	case JSON_PARSE_OBJECT_LABEL:
1198	ereport(ERROR,
1199	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1200	errmsg("invalid input syntax for type %s", "json"),
1201	errdetail("Expected \":\", but found \"%s\".",
1202	token),
1203	report_json_context(lex)));
1204	break;
1205	case JSON_PARSE_OBJECT_NEXT:
1206	ereport(ERROR,
1207	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1208	errmsg("invalid input syntax for type %s", "json"),
1209	errdetail("Expected \",\" or \"}\", but found \"%s\".",
1210	token),
1211	report_json_context(lex)));
1212	break;
1213	case JSON_PARSE_OBJECT_COMMA:
1214	ereport(ERROR,
1215	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1216	errmsg("invalid input syntax for type %s", "json"),
1217	errdetail("Expected string, but found \"%s\".",
1218	token),
1219	report_json_context(lex)));
1220	break;
1221	default:
1222	elog(ERROR, "unexpected json parse state: %d", ctx);
1223	}
1224	}
1225	}
1226
1227	/*
1228	* Report an invalid input token.
1229	*
1230	* lex->token_start and lex->token_terminator must identify the token.
1231	*/
1232	static void
1233	report_invalid_token(JsonLexContext *lex)
1234	{
1235	char *token;
1236	int toklen;
1237
1238	/ Separate out the offending token. /
1239	toklen = lex->token_terminator - lex->token_start;
1240	token = palloc(toklen + `1`);
1241	memcpy(token, lex->token_start, toklen);
1242	token[toklen] = `'\0'`;
1243
1244	ereport(ERROR,
1245	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1246	errmsg("invalid input syntax for type %s", "json"),
1247	errdetail("Token \"%s\" is invalid.", token),
1248	report_json_context(lex)));
1249	}
1250
1251	/*
1252	* Report a CONTEXT line for bogus JSON input.
1253	*
1254	* lex->token_terminator must be set to identify the spot where we detected
1255	* the error. Note that lex->token_start might be NULL, in case we recognized
1256	* error at EOF.
1257	*
1258	* The return value isn't meaningful, but we make it non-void so that this
1259	* can be invoked inside ereport().
1260	*/
1261	static int
1262	report_json_context(JsonLexContext *lex)
1263	{
1264	const char *context_start;
1265	const char *context_end;
1266	const char *line_start;
1267	int line_number;
1268	char *ctxt;
1269	int ctxtlen;
1270	const char *prefix;
1271	const char *suffix;
1272
1273	/ Choose boundaries for the part of the input we will display /
1274	context_start = lex->input;
1275	context_end = lex->token_terminator;
1276	line_start = context_start;
1277	line_number = `1`;
1278	for (;;)
1279	{
1280	/ Always advance over newlines /
1281	if (context_start < context_end && *context_start == `'\n'`)
1282	{
1283	context_start++;
1284	line_start = context_start;
1285	line_number++;
1286	continue;
1287	}
1288	/ Otherwise, done as soon as we are close enough to context_end /
1289	if (context_end - context_start < `50`)
1290	break;
1291	/ Advance to next multibyte character /
1292	if (IS_HIGHBIT_SET(*context_start))
1293	context_start += pg_mblen(context_start);
1294	else
1295	context_start++;
1296	}
1297
1298	/*
1299	* We add "..." to indicate that the excerpt doesn't start at the
1300	* beginning of the line ... but if we're within 3 characters of the
1301	* beginning of the line, we might as well just show the whole line.
1302	*/
1303	if (context_start - line_start <= `3`)
1304	context_start = line_start;
1305
1306	/ Get a null-terminated copy of the data to present /
1307	ctxtlen = context_end - context_start;
1308	ctxt = palloc(ctxtlen + `1`);
1309	memcpy(ctxt, context_start, ctxtlen);
1310	ctxt[ctxtlen] = `'\0'`;
1311
1312	/*
1313	* Show the context, prefixing "..." if not starting at start of line, and
1314	* suffixing "..." if not ending at end of line.
1315	*/
1316	prefix = (context_start > line_start) ? "..." : "";
1317	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && context_end != `'\n'` && context_end != `'\r'`) ? "..." : "";
1318
1319	return errcontext("JSON data, line %d: %s%s%s",
1320	line_number, prefix, ctxt, suffix);
1321	}
1322
1323	/*
1324	* Extract a single, possibly multi-byte char from the input string.
1325	*/
1326	static char *
1327	extract_mb_char(char *s)
1328	{
1329	char *res;
1330	int len;
1331
1332	len = pg_mblen(s);
1333	res = palloc(len + `1`);
1334	memcpy(res, s, len);
1335	res[len] = `'\0'`;
1336
1337	return res;
1338	}
1339
1340	/*
1341	* Determine how we want to print values of a given type in datum_to_json.
1342	*
1343	* Given the datatype OID, return its JsonTypeCategory, as well as the type's
1344	* output function OID. If the returned category is JSONTYPE_CAST, we
1345	* return the OID of the type->JSON cast function instead.
1346	*/
1347	static void
1348	json_categorize_type(Oid typoid,
1349	JsonTypeCategory *tcategory,
1350	Oid *outfuncoid)
1351	{
1352	bool typisvarlena;
1353
1354	/ Look through any domain /
1355	typoid = getBaseType(typoid);
1356
1357	*outfuncoid = InvalidOid;
1358
1359	/*
1360	* We need to get the output function for everything except date and
1361	* timestamp types, array and composite types, booleans, and non-builtin
1362	* types where there's a cast to json.
1363	*/
1364
1365	switch (typoid)
1366	{
1367	case BOOLOID:
1368	*tcategory = JSONTYPE_BOOL;
1369	break;
1370
1371	case INT2OID:
1372	case INT4OID:
1373	case INT8OID:
1374	case FLOAT4OID:
1375	case FLOAT8OID:
1376	case NUMERICOID:
1377	getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1378	*tcategory = JSONTYPE_NUMERIC;
1379	break;
1380
1381	case DATEOID:
1382	*tcategory = JSONTYPE_DATE;
1383	break;
1384
1385	case TIMESTAMPOID:
1386	*tcategory = JSONTYPE_TIMESTAMP;
1387	break;
1388
1389	case TIMESTAMPTZOID:
1390	*tcategory = JSONTYPE_TIMESTAMPTZ;
1391	break;
1392
1393	case JSONOID:
1394	case JSONBOID:
1395	getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1396	*tcategory = JSONTYPE_JSON;
1397	break;
1398
1399	default:
1400	/ Check for arrays and composites /
1401	if (OidIsValid(get_element_type(typoid)) \|\| typoid == ANYARRAYOID
1402	\|\| typoid == RECORDARRAYOID)
1403	*tcategory = JSONTYPE_ARRAY;
1404	else if (type_is_rowtype(typoid)) / includes RECORDOID /
1405	*tcategory = JSONTYPE_COMPOSITE;
1406	else
1407	{
1408	/ It's probably the general case ... /
1409	*tcategory = JSONTYPE_OTHER;
1410	/ but let's look for a cast to json, if it's not built-in /
1411	if (typoid >= FirstNormalObjectId)
1412	{
1413	Oid castfunc;
1414	CoercionPathType ctype;
1415
1416	ctype = find_coercion_pathway(JSONOID, typoid,
1417	COERCION_EXPLICIT,
1418	&castfunc);
1419	if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
1420	{
1421	*tcategory = JSONTYPE_CAST;
1422	*outfuncoid = castfunc;
1423	}
1424	else
1425	{
1426	/ non builtin type with no cast /
1427	getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1428	}
1429	}
1430	else
1431	{
1432	/ any other builtin type /
1433	getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1434	}
1435	}
1436	break;
1437	}
1438	}
1439
1440	/*
1441	* Turn a Datum into JSON text, appending the string to "result".
1442	*
1443	* tcategory and outfuncoid are from a previous call to json_categorize_type,
1444	* except that if is_null is true then they can be invalid.
1445	*
1446	* If key_scalar is true, the value is being printed as a key, so insist
1447	* it's of an acceptable type, and force it to be quoted.
1448	*/
1449	static void
1450	datum_to_json(Datum val, bool is_null, StringInfo result,
1451	JsonTypeCategory tcategory, Oid outfuncoid,
1452	bool key_scalar)
1453	{
1454	char *outputstr;
1455	text *jsontext;
1456
1457	check_stack_depth();
1458
1459	/ callers are expected to ensure that null keys are not passed in /
1460	Assert(!(key_scalar && is_null));
1461
1462	if (is_null)
1463	{
1464	appendStringInfoString(result, "null");
1465	return;
1466	}
1467
1468	if (key_scalar &&
1469	(tcategory == JSONTYPE_ARRAY \|\|
1470	tcategory == JSONTYPE_COMPOSITE \|\|
1471	tcategory == JSONTYPE_JSON \|\|
1472	tcategory == JSONTYPE_CAST))
1473	ereport(ERROR,
1474	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1475	errmsg("key value must be scalar, not array, composite, or json")));
1476
1477	switch (tcategory)
1478	{
1479	case JSONTYPE_ARRAY:
1480	array_to_json_internal(val, result, false);
1481	break;
1482	case JSONTYPE_COMPOSITE:
1483	composite_to_json(val, result, false);
1484	break;
1485	case JSONTYPE_BOOL:
1486	outputstr = DatumGetBool(val) ? "true" : "false";
1487	if (key_scalar)
1488	escape_json(result, outputstr);
1489	else
1490	appendStringInfoString(result, outputstr);
1491	break;
1492	case JSONTYPE_NUMERIC:
1493	outputstr = OidOutputFunctionCall(outfuncoid, val);
1494
1495	/*
1496	* Don't call escape_json for a non-key if it's a valid JSON
1497	* number.
1498	*/
1499	if (!key_scalar && IsValidJsonNumber(outputstr, strlen(outputstr)))
1500	appendStringInfoString(result, outputstr);
1501	else
1502	escape_json(result, outputstr);
1503	pfree(outputstr);
1504	break;
1505	case JSONTYPE_DATE:
1506	{
1507	char buf[MAXDATELEN + `1`];
1508
1509	JsonEncodeDateTime(buf, val, DATEOID);
1510	appendStringInfo(result, "\"%s\"", buf);
1511	}
1512	break;
1513	case JSONTYPE_TIMESTAMP:
1514	{
1515	char buf[MAXDATELEN + `1`];
1516
1517	JsonEncodeDateTime(buf, val, TIMESTAMPOID);
1518	appendStringInfo(result, "\"%s\"", buf);
1519	}
1520	break;
1521	case JSONTYPE_TIMESTAMPTZ:
1522	{
1523	char buf[MAXDATELEN + `1`];
1524
1525	JsonEncodeDateTime(buf, val, TIMESTAMPTZOID);
1526	appendStringInfo(result, "\"%s\"", buf);
1527	}
1528	break;
1529	case JSONTYPE_JSON:
1530	/ JSON and JSONB output will already be escaped /
1531	outputstr = OidOutputFunctionCall(outfuncoid, val);
1532	appendStringInfoString(result, outputstr);
1533	pfree(outputstr);
1534	break;
1535	case JSONTYPE_CAST:
1536	/ outfuncoid refers to a cast function, not an output function /
1537	jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val));
1538	outputstr = text_to_cstring(jsontext);
1539	appendStringInfoString(result, outputstr);
1540	pfree(outputstr);
1541	pfree(jsontext);
1542	break;
1543	default:
1544	outputstr = OidOutputFunctionCall(outfuncoid, val);
1545	escape_json(result, outputstr);
1546	pfree(outputstr);
1547	break;
1548	}
1549	}
1550
1551	/*
1552	* Encode 'value' of datetime type 'typid' into JSON string in ISO format using
1553	* optionally preallocated buffer 'buf'.
1554	*/
1555	char *
1556	JsonEncodeDateTime(char *buf, Datum value, Oid typid)
1557	{
1558	if (!buf)
1559	buf = palloc(MAXDATELEN + `1`);
1560
1561	switch (typid)
1562	{
1563	case DATEOID:
1564	{
1565	DateADT date;
1566	struct pg_tm tm;
1567
1568	date = DatumGetDateADT(value);
1569
1570	/ Same as date_out(), but forcing DateStyle /
1571	if (DATE_NOT_FINITE(date))
1572	EncodeSpecialDate(date, buf);
1573	else
1574	{
1575	j2date(date + POSTGRES_EPOCH_JDATE,
1576	&(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
1577	EncodeDateOnly(&tm, USE_XSD_DATES, buf);
1578	}
1579	}
1580	break;
1581	case TIMEOID:
1582	{
1583	TimeADT time = DatumGetTimeADT(value);
1584	struct pg_tm tt,
1585	*tm = &tt;
1586	fsec_t fsec;
1587
1588	/ Same as time_out(), but forcing DateStyle /
1589	time2tm(time, tm, &fsec);
1590	EncodeTimeOnly(tm, fsec, false, `0`, USE_XSD_DATES, buf);
1591	}
1592	break;
1593	case TIMETZOID:
1594	{
1595	TimeTzADT *time = DatumGetTimeTzADTP(value);
1596	struct pg_tm tt,
1597	*tm = &tt;
1598	fsec_t fsec;
1599	int tz;
1600
1601	/ Same as timetz_out(), but forcing DateStyle /
1602	timetz2tm(time, tm, &fsec, &tz);
1603	EncodeTimeOnly(tm, fsec, true, tz, USE_XSD_DATES, buf);
1604	}
1605	break;
1606	case TIMESTAMPOID:
1607	{
1608	Timestamp timestamp;
1609	struct pg_tm tm;
1610	fsec_t fsec;
1611
1612	timestamp = DatumGetTimestamp(value);
1613	/ Same as timestamp_out(), but forcing DateStyle /
1614	if (TIMESTAMP_NOT_FINITE(timestamp))
1615	EncodeSpecialTimestamp(timestamp, buf);
1616	else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == `0`)
1617	EncodeDateTime(&tm, fsec, false, `0`, NULL, USE_XSD_DATES, buf);
1618	else
1619	ereport(ERROR,
1620	(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
1621	errmsg("timestamp out of range")));
1622	}
1623	break;
1624	case TIMESTAMPTZOID:
1625	{
1626	TimestampTz timestamp;
1627	struct pg_tm tm;
1628	int tz;
1629	fsec_t fsec;
1630	const char *tzn = NULL;
1631
1632	timestamp = DatumGetTimestampTz(value);
1633	/ Same as timestamptz_out(), but forcing DateStyle /
1634	if (TIMESTAMP_NOT_FINITE(timestamp))
1635	EncodeSpecialTimestamp(timestamp, buf);
1636	else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == `0`)
1637	EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
1638	else
1639	ereport(ERROR,
1640	(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
1641	errmsg("timestamp out of range")));
1642	}
1643	break;
1644	default:
1645	elog(ERROR, "unknown jsonb value datetime type oid %d", typid);
1646	return NULL;
1647	}
1648
1649	return buf;
1650	}
1651
1652	/*
1653	* Process a single dimension of an array.
1654	* If it's the innermost dimension, output the values, otherwise call
1655	* ourselves recursively to process the next dimension.
1656	*/
1657	static void
1658	array_dim_to_json(StringInfo result, int dim, int ndims, int dims, Datum vals,
1659	bool nulls, int* *valcount, JsonTypeCategory tcategory,
1660	Oid outfuncoid, bool use_line_feeds)
1661	{
1662	int i;
1663	const char *sep;
1664
1665	Assert(dim < ndims);
1666
1667	sep = use_line_feeds ? ",\n " : ",";
1668
1669	appendStringInfoChar(result, `'['`);
1670
1671	for (i = `1`; i <= dims[dim]; i++)
1672	{
1673	if (i > `1`)
1674	appendStringInfoString(result, sep);
1675
1676	if (dim + `1` == ndims)
1677	{
1678	datum_to_json(vals[valcount], nulls[valcount], result, tcategory,
1679	outfuncoid, false);
1680	(*valcount)++;
1681	}
1682	else
1683	{
1684	/*
1685	* Do we want line feeds on inner dimensions of arrays? For now
1686	* we'll say no.
1687	*/
1688	array_dim_to_json(result, dim + `1`, ndims, dims, vals, nulls,
1689	valcount, tcategory, outfuncoid, false);
1690	}
1691	}
1692
1693	appendStringInfoChar(result, `']'`);
1694	}
1695
1696	/*
1697	* Turn an array into JSON.
1698	*/
1699	static void
1700	array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
1701	{
1702	ArrayType *v = DatumGetArrayTypeP(array);
1703	Oid element_type = ARR_ELEMTYPE(v);
1704	int *dim;
1705	int ndim;
1706	int nitems;
1707	int count = `0`;
1708	Datum *elements;
1709	bool *nulls;
1710	int16 typlen;
1711	bool typbyval;
1712	char typalign;
1713	JsonTypeCategory tcategory;
1714	Oid outfuncoid;
1715
1716	ndim = ARR_NDIM(v);
1717	dim = ARR_DIMS(v);
1718	nitems = ArrayGetNItems(ndim, dim);
1719
1720	if (nitems <= `0`)
1721	{
1722	appendStringInfoString(result, "[]");
1723	return;
1724	}
1725
1726	get_typlenbyvalalign(element_type,
1727	&typlen, &typbyval, &typalign);
1728
1729	json_categorize_type(element_type,
1730	&tcategory, &outfuncoid);
1731
1732	deconstruct_array(v, element_type, typlen, typbyval,
1733	typalign, &elements, &nulls,
1734	&nitems);
1735
1736	array_dim_to_json(result, `0`, ndim, dim, elements, nulls, &count, tcategory,
1737	outfuncoid, use_line_feeds);
1738
1739	pfree(elements);
1740	pfree(nulls);
1741	}
1742
1743	/*
1744	* Turn a composite / record into JSON.
1745	*/
1746	static void
1747	composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
1748	{
1749	HeapTupleHeader td;
1750	Oid tupType;
1751	int32 tupTypmod;
1752	TupleDesc tupdesc;
1753	HeapTupleData tmptup,
1754	*tuple;
1755	int i;
1756	bool needsep = false;
1757	const char *sep;
1758
1759	sep = use_line_feeds ? ",\n " : ",";
1760
1761	td = DatumGetHeapTupleHeader(composite);
1762
1763	/ Extract rowtype info and find a tupdesc /
1764	tupType = HeapTupleHeaderGetTypeId(td);
1765	tupTypmod = HeapTupleHeaderGetTypMod(td);
1766	tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1767
1768	/ Build a temporary HeapTuple control structure /
1769	tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
1770	tmptup.t_data = td;
1771	tuple = &tmptup;
1772
1773	appendStringInfoChar(result, `'{'`);
1774
1775	for (i = `0`; i < tupdesc->natts; i++)
1776	{
1777	Datum val;
1778	bool isnull;
1779	char *attname;
1780	JsonTypeCategory tcategory;
1781	Oid outfuncoid;
1782	Form_pg_attribute att = TupleDescAttr(tupdesc, i);
1783
1784	if (att->attisdropped)
1785	continue;
1786
1787	if (needsep)
1788	appendStringInfoString(result, sep);
1789	needsep = true;
1790
1791	attname = NameStr(att->attname);
1792	escape_json(result, attname);
1793	appendStringInfoChar(result, `':'`);
1794
1795	val = heap_getattr(tuple, i + `1`, tupdesc, &isnull);
1796
1797	if (isnull)
1798	{
1799	tcategory = JSONTYPE_NULL;
1800	outfuncoid = InvalidOid;
1801	}
1802	else
1803	json_categorize_type(att->atttypid, &tcategory, &outfuncoid);
1804
1805	datum_to_json(val, isnull, result, tcategory, outfuncoid, false);
1806	}
1807
1808	appendStringInfoChar(result, `'}'`);
1809	ReleaseTupleDesc(tupdesc);
1810	}
1811
1812	/*
1813	* Append JSON text for "val" to "result".
1814	*
1815	* This is just a thin wrapper around datum_to_json. If the same type will be
1816	* printed many times, avoid using this; better to do the json_categorize_type
1817	* lookups only once.
1818	*/
1819	static void
1820	add_json(Datum val, bool is_null, StringInfo result,
1821	Oid val_type, bool key_scalar)
1822	{
1823	JsonTypeCategory tcategory;
1824	Oid outfuncoid;
1825
1826	if (val_type == InvalidOid)
1827	ereport(ERROR,
1828	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1829	errmsg("could not determine input data type")));
1830
1831	if (is_null)
1832	{
1833	tcategory = JSONTYPE_NULL;
1834	outfuncoid = InvalidOid;
1835	}
1836	else
1837	json_categorize_type(val_type,
1838	&tcategory, &outfuncoid);
1839
1840	datum_to_json(val, is_null, result, tcategory, outfuncoid, key_scalar);
1841	}
1842
1843	/*
1844	* SQL function array_to_json(row)
1845	*/
1846	Datum
1847	array_to_json(PG_FUNCTION_ARGS)
1848	{
1849	Datum array = PG_GETARG_DATUM(`0`);
1850	StringInfo result;
1851
1852	result = makeStringInfo();
1853
1854	array_to_json_internal(array, result, false);
1855
1856	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
1857	}
1858
1859	/*
1860	* SQL function array_to_json(row, prettybool)
1861	*/
1862	Datum
1863	array_to_json_pretty(PG_FUNCTION_ARGS)
1864	{
1865	Datum array = PG_GETARG_DATUM(`0`);
1866	bool use_line_feeds = PG_GETARG_BOOL(`1`);
1867	StringInfo result;
1868
1869	result = makeStringInfo();
1870
1871	array_to_json_internal(array, result, use_line_feeds);
1872
1873	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
1874	}
1875
1876	/*
1877	* SQL function row_to_json(row)
1878	*/
1879	Datum
1880	row_to_json(PG_FUNCTION_ARGS)
1881	{
1882	Datum array = PG_GETARG_DATUM(`0`);
1883	StringInfo result;
1884
1885	result = makeStringInfo();
1886
1887	composite_to_json(array, result, false);
1888
1889	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
1890	}
1891
1892	/*
1893	* SQL function row_to_json(row, prettybool)
1894	*/
1895	Datum
1896	row_to_json_pretty(PG_FUNCTION_ARGS)
1897	{
1898	Datum array = PG_GETARG_DATUM(`0`);
1899	bool use_line_feeds = PG_GETARG_BOOL(`1`);
1900	StringInfo result;
1901
1902	result = makeStringInfo();
1903
1904	composite_to_json(array, result, use_line_feeds);
1905
1906	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
1907	}
1908
1909	/*
1910	* SQL function to_json(anyvalue)
1911	*/
1912	Datum
1913	to_json(PG_FUNCTION_ARGS)
1914	{
1915	Datum val = PG_GETARG_DATUM(`0`);
1916	Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, `0`);
1917	StringInfo result;
1918	JsonTypeCategory tcategory;
1919	Oid outfuncoid;
1920
1921	if (val_type == InvalidOid)
1922	ereport(ERROR,
1923	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1924	errmsg("could not determine input data type")));
1925
1926	json_categorize_type(val_type,
1927	&tcategory, &outfuncoid);
1928
1929	result = makeStringInfo();
1930
1931	datum_to_json(val, false, result, tcategory, outfuncoid, false);
1932
1933	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
1934	}
1935
1936	/*
1937	* json_agg transition function
1938	*
1939	* aggregate input column as a json array value.
1940	*/
1941	Datum
1942	json_agg_transfn(PG_FUNCTION_ARGS)
1943	{
1944	MemoryContext aggcontext,
1945	oldcontext;
1946	JsonAggState *state;
1947	Datum val;
1948
1949	if (!AggCheckCallContext(fcinfo, &aggcontext))
1950	{
1951	/ cannot be called directly because of internal-type argument /
1952	elog(ERROR, "json_agg_transfn called in non-aggregate context");
1953	}
1954
1955	if (PG_ARGISNULL(`0`))
1956	{
1957	Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, `1`);
1958
1959	if (arg_type == InvalidOid)
1960	ereport(ERROR,
1961	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1962	errmsg("could not determine input data type")));
1963
1964	/*
1965	* Make this state object in a context where it will persist for the
1966	* duration of the aggregate call. MemoryContextSwitchTo is only
1967	* needed the first time, as the StringInfo routines make sure they
1968	* use the right context to enlarge the object if necessary.
1969	*/
1970	oldcontext = MemoryContextSwitchTo(aggcontext);
1971	state = (JsonAggState ) palloc(sizeof*(JsonAggState));
1972	state->str = makeStringInfo();
1973	MemoryContextSwitchTo(oldcontext);
1974
1975	appendStringInfoChar(state->str, `'['`);
1976	json_categorize_type(arg_type, &state->val_category,
1977	&state->val_output_func);
1978	}
1979	else
1980	{
1981	state = (JsonAggState *) PG_GETARG_POINTER(`0`);
1982	appendStringInfoString(state->str, ", ");
1983	}
1984
1985	/ fast path for NULLs /
1986	if (PG_ARGISNULL(`1`))
1987	{
1988	datum_to_json((Datum) `0`, true, state->str, JSONTYPE_NULL,
1989	InvalidOid, false);
1990	PG_RETURN_POINTER(state);
1991	}
1992
1993	val = PG_GETARG_DATUM(`1`);
1994
1995	/ add some whitespace if structured type and not first item /
1996	if (!PG_ARGISNULL(`0`) &&
1997	(state->val_category == JSONTYPE_ARRAY \|\|
1998	state->val_category == JSONTYPE_COMPOSITE))
1999	{
2000	appendStringInfoString(state->str, "\n ");
2001	}
2002
2003	datum_to_json(val, false, state->str, state->val_category,
2004	state->val_output_func, false);
2005
2006	/*
2007	* The transition type for json_agg() is declared to be "internal", which
2008	* is a pass-by-value type the same size as a pointer. So we can safely
2009	* pass the JsonAggState pointer through nodeAgg.c's machinations.
2010	*/
2011	PG_RETURN_POINTER(state);
2012	}
2013
2014	/*
2015	* json_agg final function
2016	*/
2017	Datum
2018	json_agg_finalfn(PG_FUNCTION_ARGS)
2019	{
2020	JsonAggState *state;
2021
2022	/ cannot be called directly because of internal-type argument /
2023	Assert(AggCheckCallContext(fcinfo, NULL));
2024
2025	state = PG_ARGISNULL(`0`) ?
2026	NULL :
2027	(JsonAggState *) PG_GETARG_POINTER(`0`);
2028
2029	/ NULL result for no rows in, as is standard with aggregates /
2030	if (state == NULL)
2031	PG_RETURN_NULL();
2032
2033	/ Else return state with appropriate array terminator added /
2034	PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
2035	}
2036
2037	/*
2038	* json_object_agg transition function.
2039	*
2040	* aggregate two input columns as a single json object value.
2041	*/
2042	Datum
2043	json_object_agg_transfn(PG_FUNCTION_ARGS)
2044	{
2045	MemoryContext aggcontext,
2046	oldcontext;
2047	JsonAggState *state;
2048	Datum arg;
2049
2050	if (!AggCheckCallContext(fcinfo, &aggcontext))
2051	{
2052	/ cannot be called directly because of internal-type argument /
2053	elog(ERROR, "json_object_agg_transfn called in non-aggregate context");
2054	}
2055
2056	if (PG_ARGISNULL(`0`))
2057	{
2058	Oid arg_type;
2059
2060	/*
2061	* Make the StringInfo in a context where it will persist for the
2062	* duration of the aggregate call. Switching context is only needed
2063	* for this initial step, as the StringInfo routines make sure they
2064	* use the right context to enlarge the object if necessary.
2065	*/
2066	oldcontext = MemoryContextSwitchTo(aggcontext);
2067	state = (JsonAggState ) palloc(sizeof*(JsonAggState));
2068	state->str = makeStringInfo();
2069	MemoryContextSwitchTo(oldcontext);
2070
2071	arg_type = get_fn_expr_argtype(fcinfo->flinfo, `1`);
2072
2073	if (arg_type == InvalidOid)
2074	ereport(ERROR,
2075	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2076	errmsg("could not determine data type for argument %d", `1`)));
2077
2078	json_categorize_type(arg_type, &state->key_category,
2079	&state->key_output_func);
2080
2081	arg_type = get_fn_expr_argtype(fcinfo->flinfo, `2`);
2082
2083	if (arg_type == InvalidOid)
2084	ereport(ERROR,
2085	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2086	errmsg("could not determine data type for argument %d", `2`)));
2087
2088	json_categorize_type(arg_type, &state->val_category,
2089	&state->val_output_func);
2090
2091	appendStringInfoString(state->str, "{ ");
2092	}
2093	else
2094	{
2095	state = (JsonAggState *) PG_GETARG_POINTER(`0`);
2096	appendStringInfoString(state->str, ", ");
2097	}
2098
2099	/*
2100	* Note: since json_object_agg() is declared as taking type "any", the
2101	* parser will not do any type conversion on unknown-type literals (that
2102	* is, undecorated strings or NULLs). Such values will arrive here as
2103	* type UNKNOWN, which fortunately does not matter to us, since
2104	* unknownout() works fine.
2105	*/
2106
2107	if (PG_ARGISNULL(`1`))
2108	ereport(ERROR,
2109	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2110	errmsg("field name must not be null")));
2111
2112	arg = PG_GETARG_DATUM(`1`);
2113
2114	datum_to_json(arg, false, state->str, state->key_category,
2115	state->key_output_func, true);
2116
2117	appendStringInfoString(state->str, " : ");
2118
2119	if (PG_ARGISNULL(`2`))
2120	arg = (Datum) `0`;
2121	else
2122	arg = PG_GETARG_DATUM(`2`);
2123
2124	datum_to_json(arg, PG_ARGISNULL(`2`), state->str, state->val_category,
2125	state->val_output_func, false);
2126
2127	PG_RETURN_POINTER(state);
2128	}
2129
2130	/*
2131	* json_object_agg final function.
2132	*/
2133	Datum
2134	json_object_agg_finalfn(PG_FUNCTION_ARGS)
2135	{
2136	JsonAggState *state;
2137
2138	/ cannot be called directly because of internal-type argument /
2139	Assert(AggCheckCallContext(fcinfo, NULL));
2140
2141	state = PG_ARGISNULL(`0`) ? NULL : (JsonAggState *) PG_GETARG_POINTER(`0`);
2142
2143	/ NULL result for no rows in, as is standard with aggregates /
2144	if (state == NULL)
2145	PG_RETURN_NULL();
2146
2147	/ Else return state with appropriate object terminator added /
2148	PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }"));
2149	}
2150
2151	/*
2152	* Helper function for aggregates: return given StringInfo's contents plus
2153	* specified trailing string, as a text datum. We need this because aggregate
2154	* final functions are not allowed to modify the aggregate state.
2155	*/
2156	static text *
2157	catenate_stringinfo_string(StringInfo buffer, const char *addon)
2158	{
2159	/ custom version of cstring_to_text_with_len /
2160	int buflen = buffer->len;
2161	int addlen = strlen(addon);
2162	text result = (text ) palloc(buflen + addlen + VARHDRSZ);
2163
2164	SET_VARSIZE(result, buflen + addlen + VARHDRSZ);
2165	memcpy(VARDATA(result), buffer->data, buflen);
2166	memcpy(VARDATA(result) + buflen, addon, addlen);
2167
2168	return result;
2169	}
2170
2171	/*
2172	* SQL function json_build_object(variadic "any")
2173	*/
2174	Datum
2175	json_build_object(PG_FUNCTION_ARGS)
2176	{
2177	int nargs = PG_NARGS();
2178	int i;
2179	const char *sep = "";
2180	StringInfo result;
2181	Datum *args;
2182	bool *nulls;
2183	Oid *types;
2184
2185	/ fetch argument values to build the object /
2186	nargs = extract_variadic_args(fcinfo, `0`, false, &args, &types, &nulls);
2187
2188	if (nargs < `0`)
2189	PG_RETURN_NULL();
2190
2191	if (nargs % `2` != `0`)
2192	ereport(ERROR,
2193	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2194	errmsg("argument list must have even number of elements"),
2195	/ translator: %s is a SQL function name /
2196	errhint("The arguments of %s must consist of alternating keys and values.",
2197	"json_build_object()")));
2198
2199	result = makeStringInfo();
2200
2201	appendStringInfoChar(result, `'{'`);
2202
2203	for (i = `0`; i < nargs; i += `2`)
2204	{
2205	appendStringInfoString(result, sep);
2206	sep = ", ";
2207
2208	/ process key /
2209	if (nulls[i])
2210	ereport(ERROR,
2211	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2212	errmsg("argument %d cannot be null", i + `1`),
2213	errhint("Object keys should be text.")));
2214
2215	add_json(args[i], false, result, types[i], true);
2216
2217	appendStringInfoString(result, " : ");
2218
2219	/ process value /
2220	add_json(args[i + `1`], nulls[i + `1`], result, types[i + `1`], false);
2221	}
2222
2223	appendStringInfoChar(result, `'}'`);
2224
2225	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
2226	}
2227
2228	/*
2229	* degenerate case of json_build_object where it gets 0 arguments.
2230	*/
2231	Datum
2232	json_build_object_noargs(PG_FUNCTION_ARGS)
2233	{
2234	PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", `2`));
2235	}
2236
2237	/*
2238	* SQL function json_build_array(variadic "any")
2239	*/
2240	Datum
2241	json_build_array(PG_FUNCTION_ARGS)
2242	{
2243	int nargs;
2244	int i;
2245	const char *sep = "";
2246	StringInfo result;
2247	Datum *args;
2248	bool *nulls;
2249	Oid *types;
2250
2251	/ fetch argument values to build the array /
2252	nargs = extract_variadic_args(fcinfo, `0`, false, &args, &types, &nulls);
2253
2254	if (nargs < `0`)
2255	PG_RETURN_NULL();
2256
2257	result = makeStringInfo();
2258
2259	appendStringInfoChar(result, `'['`);
2260
2261	for (i = `0`; i < nargs; i++)
2262	{
2263	appendStringInfoString(result, sep);
2264	sep = ", ";
2265	add_json(args[i], nulls[i], result, types[i], false);
2266	}
2267
2268	appendStringInfoChar(result, `']'`);
2269
2270	PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
2271	}
2272
2273	/*
2274	* degenerate case of json_build_array where it gets 0 arguments.
2275	*/
2276	Datum
2277	json_build_array_noargs(PG_FUNCTION_ARGS)
2278	{
2279	PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", `2`));
2280	}
2281
2282	/*
2283	* SQL function json_object(text[])
2284	*
2285	* take a one or two dimensional array of text as key/value pairs
2286	* for a json object.
2287	*/
2288	Datum
2289	json_object(PG_FUNCTION_ARGS)
2290	{
2291	ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(`0`);
2292	int ndims = ARR_NDIM(in_array);
2293	StringInfoData result;
2294	Datum *in_datums;
2295	bool *in_nulls;
2296	int in_count,
2297	count,
2298	i;
2299	text *rval;
2300	char *v;
2301
2302	switch (ndims)
2303	{
2304	case `0`:
2305	PG_RETURN_DATUM(CStringGetTextDatum("{}"));
2306	break;
2307
2308	case `1`:
2309	if ((ARR_DIMS(in_array)[`0`]) % `2`)
2310	ereport(ERROR,
2311	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2312	errmsg("array must have even number of elements")));
2313	break;
2314
2315	case `2`:
2316	if ((ARR_DIMS(in_array)[`1`]) != `2`)
2317	ereport(ERROR,
2318	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2319	errmsg("array must have two columns")));
2320	break;
2321
2322	default:
2323	ereport(ERROR,
2324	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2325	errmsg("wrong number of array subscripts")));
2326	}
2327
2328	deconstruct_array(in_array,
2329	TEXTOID, -`1`, false, `'i'`,
2330	&in_datums, &in_nulls, &in_count);
2331
2332	count = in_count / `2`;
2333
2334	initStringInfo(&result);
2335
2336	appendStringInfoChar(&result, `'{'`);
2337
2338	for (i = `0`; i < count; ++i)
2339	{
2340	if (in_nulls[i * `2`])
2341	ereport(ERROR,
2342	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2343	errmsg("null value not allowed for object key")));
2344
2345	v = TextDatumGetCString(in_datums[i * `2`]);
2346	if (i > `0`)
2347	appendStringInfoString(&result, ", ");
2348	escape_json(&result, v);
2349	appendStringInfoString(&result, " : ");
2350	pfree(v);
2351	if (in_nulls[i * `2` + `1`])
2352	appendStringInfoString(&result, "null");
2353	else
2354	{
2355	v = TextDatumGetCString(in_datums[i * `2` + `1`]);
2356	escape_json(&result, v);
2357	pfree(v);
2358	}
2359	}
2360
2361	appendStringInfoChar(&result, `'}'`);
2362
2363	pfree(in_datums);
2364	pfree(in_nulls);
2365
2366	rval = cstring_to_text_with_len(result.data, result.len);
2367	pfree(result.data);
2368
2369	PG_RETURN_TEXT_P(rval);
2370
2371	}
2372
2373	/*
2374	* SQL function json_object(text[], text[])
2375	*
2376	* take separate key and value arrays of text to construct a json object
2377	* pairwise.
2378	*/
2379	Datum
2380	json_object_two_arg(PG_FUNCTION_ARGS)
2381	{
2382	ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(`0`);
2383	ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(`1`);
2384	int nkdims = ARR_NDIM(key_array);
2385	int nvdims = ARR_NDIM(val_array);
2386	StringInfoData result;
2387	Datum *key_datums,
2388	*val_datums;
2389	bool *key_nulls,
2390	*val_nulls;
2391	int key_count,
2392	val_count,
2393	i;
2394	text *rval;
2395	char *v;
2396
2397	if (nkdims > `1` \|\| nkdims != nvdims)
2398	ereport(ERROR,
2399	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2400	errmsg("wrong number of array subscripts")));
2401
2402	if (nkdims == `0`)
2403	PG_RETURN_DATUM(CStringGetTextDatum("{}"));
2404
2405	deconstruct_array(key_array,
2406	TEXTOID, -`1`, false, `'i'`,
2407	&key_datums, &key_nulls, &key_count);
2408
2409	deconstruct_array(val_array,
2410	TEXTOID, -`1`, false, `'i'`,
2411	&val_datums, &val_nulls, &val_count);
2412
2413	if (key_count != val_count)
2414	ereport(ERROR,
2415	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2416	errmsg("mismatched array dimensions")));
2417
2418	initStringInfo(&result);
2419
2420	appendStringInfoChar(&result, `'{'`);
2421
2422	for (i = `0`; i < key_count; ++i)
2423	{
2424	if (key_nulls[i])
2425	ereport(ERROR,
2426	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2427	errmsg("null value not allowed for object key")));
2428
2429	v = TextDatumGetCString(key_datums[i]);
2430	if (i > `0`)
2431	appendStringInfoString(&result, ", ");
2432	escape_json(&result, v);
2433	appendStringInfoString(&result, " : ");
2434	pfree(v);
2435	if (val_nulls[i])
2436	appendStringInfoString(&result, "null");
2437	else
2438	{
2439	v = TextDatumGetCString(val_datums[i]);
2440	escape_json(&result, v);
2441	pfree(v);
2442	}
2443	}
2444
2445	appendStringInfoChar(&result, `'}'`);
2446
2447	pfree(key_datums);
2448	pfree(key_nulls);
2449	pfree(val_datums);
2450	pfree(val_nulls);
2451
2452	rval = cstring_to_text_with_len(result.data, result.len);
2453	pfree(result.data);
2454
2455	PG_RETURN_TEXT_P(rval);
2456	}
2457
2458
2459	/*
2460	* Produce a JSON string literal, properly escaping characters in the text.
2461	*/
2462	void
2463	escape_json(StringInfo buf, const char *str)
2464	{
2465	const char *p;
2466
2467	appendStringInfoCharMacro(buf, `'"'`);
2468	for (p = str; *p; p++)
2469	{
2470	switch (*p)
2471	{
2472	case `'\b'`:
2473	appendStringInfoString(buf, "\\b");
2474	break;
2475	case `'\f'`:
2476	appendStringInfoString(buf, "\\f");
2477	break;
2478	case `'\n'`:
2479	appendStringInfoString(buf, "\\n");
2480	break;
2481	case `'\r'`:
2482	appendStringInfoString(buf, "\\r");
2483	break;
2484	case `'\t'`:
2485	appendStringInfoString(buf, "\\t");
2486	break;
2487	case `'"'`:
2488	appendStringInfoString(buf, "\\\"");
2489	break;
2490	case `'\\'`:
2491	appendStringInfoString(buf, "\\\\");
2492	break;
2493	default:
2494	if ((unsigned char) *p < `' '`)
2495	appendStringInfo(buf, "\\u%04x", (int) *p);
2496	else
2497	appendStringInfoCharMacro(buf, *p);
2498	break;
2499	}
2500	}
2501	appendStringInfoCharMacro(buf, `'"'`);
2502	}
2503
2504	/*
2505	* SQL function json_typeof(json) -> text
2506	*
2507	* Returns the type of the outermost JSON value as TEXT. Possible types are
2508	* "object", "array", "string", "number", "boolean", and "null".
2509	*
2510	* Performs a single call to json_lex() to get the first token of the supplied
2511	* value. This initial token uniquely determines the value's type. As our
2512	* input must already have been validated by json_in() or json_recv(), the
2513	* initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END,
2514	* JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END.
2515	*/
2516	Datum
2517	json_typeof(PG_FUNCTION_ARGS)
2518	{
2519	text *json;
2520
2521	JsonLexContext *lex;
2522	JsonTokenType tok;
2523	char *type;
2524
2525	json = PG_GETARG_TEXT_PP(`0`);
2526	lex = makeJsonLexContext(json, false);
2527
2528	/ Lex exactly one token from the input and check its type. /
2529	json_lex(lex);
2530	tok = lex_peek(lex);
2531	switch (tok)
2532	{
2533	case JSON_TOKEN_OBJECT_START:
2534	type = "object";
2535	break;
2536	case JSON_TOKEN_ARRAY_START:
2537	type = "array";
2538	break;
2539	case JSON_TOKEN_STRING:
2540	type = "string";
2541	break;
2542	case JSON_TOKEN_NUMBER:
2543	type = "number";
2544	break;
2545	case JSON_TOKEN_TRUE:
2546	case JSON_TOKEN_FALSE:
2547	type = "boolean";
2548	break;
2549	case JSON_TOKEN_NULL:
2550	type = "null";
2551	break;
2552	default:
2553	elog(ERROR, "unexpected json token: %d", tok);
2554	}
2555
2556	PG_RETURN_TEXT_P(cstring_to_text(type));
2557	}
2558

Browse the source code of PostgreSQL/src/backend/utils/adt/json.c