| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * datum.c |
| 4 | * POSTGRES Datum (abstract data type) manipulation routines. |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/utils/adt/datum.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | |
| 16 | /* |
| 17 | * In the implementation of these routines we assume the following: |
| 18 | * |
| 19 | * A) if a type is "byVal" then all the information is stored in the |
| 20 | * Datum itself (i.e. no pointers involved!). In this case the |
| 21 | * length of the type is always greater than zero and not more than |
| 22 | * "sizeof(Datum)" |
| 23 | * |
| 24 | * B) if a type is not "byVal" and it has a fixed length (typlen > 0), |
| 25 | * then the "Datum" always contains a pointer to a stream of bytes. |
| 26 | * The number of significant bytes are always equal to the typlen. |
| 27 | * |
| 28 | * C) if a type is not "byVal" and has typlen == -1, |
| 29 | * then the "Datum" always points to a "struct varlena". |
| 30 | * This varlena structure has information about the actual length of this |
| 31 | * particular instance of the type and about its value. |
| 32 | * |
| 33 | * D) if a type is not "byVal" and has typlen == -2, |
| 34 | * then the "Datum" always points to a null-terminated C string. |
| 35 | * |
| 36 | * Note that we do not treat "toasted" datums specially; therefore what |
| 37 | * will be copied or compared is the compressed data or toast reference. |
| 38 | * An exception is made for datumCopy() of an expanded object, however, |
| 39 | * because most callers expect to get a simple contiguous (and pfree'able) |
| 40 | * result from datumCopy(). See also datumTransfer(). |
| 41 | */ |
| 42 | |
| 43 | #include "postgres.h" |
| 44 | |
| 45 | #include "access/tuptoaster.h" |
| 46 | #include "fmgr.h" |
| 47 | #include "utils/datum.h" |
| 48 | #include "utils/expandeddatum.h" |
| 49 | |
| 50 | |
| 51 | /*------------------------------------------------------------------------- |
| 52 | * datumGetSize |
| 53 | * |
| 54 | * Find the "real" size of a datum, given the datum value, |
| 55 | * whether it is a "by value", and the declared type length. |
| 56 | * (For TOAST pointer datums, this is the size of the pointer datum.) |
| 57 | * |
| 58 | * This is essentially an out-of-line version of the att_addlength_datum() |
| 59 | * macro in access/tupmacs.h. We do a tad more error checking though. |
| 60 | *------------------------------------------------------------------------- |
| 61 | */ |
| 62 | Size |
| 63 | datumGetSize(Datum value, bool typByVal, int typLen) |
| 64 | { |
| 65 | Size size; |
| 66 | |
| 67 | if (typByVal) |
| 68 | { |
| 69 | /* Pass-by-value types are always fixed-length */ |
| 70 | Assert(typLen > 0 && typLen <= sizeof(Datum)); |
| 71 | size = (Size) typLen; |
| 72 | } |
| 73 | else |
| 74 | { |
| 75 | if (typLen > 0) |
| 76 | { |
| 77 | /* Fixed-length pass-by-ref type */ |
| 78 | size = (Size) typLen; |
| 79 | } |
| 80 | else if (typLen == -1) |
| 81 | { |
| 82 | /* It is a varlena datatype */ |
| 83 | struct varlena *s = (struct varlena *) DatumGetPointer(value); |
| 84 | |
| 85 | if (!PointerIsValid(s)) |
| 86 | ereport(ERROR, |
| 87 | (errcode(ERRCODE_DATA_EXCEPTION), |
| 88 | errmsg("invalid Datum pointer" ))); |
| 89 | |
| 90 | size = (Size) VARSIZE_ANY(s); |
| 91 | } |
| 92 | else if (typLen == -2) |
| 93 | { |
| 94 | /* It is a cstring datatype */ |
| 95 | char *s = (char *) DatumGetPointer(value); |
| 96 | |
| 97 | if (!PointerIsValid(s)) |
| 98 | ereport(ERROR, |
| 99 | (errcode(ERRCODE_DATA_EXCEPTION), |
| 100 | errmsg("invalid Datum pointer" ))); |
| 101 | |
| 102 | size = (Size) (strlen(s) + 1); |
| 103 | } |
| 104 | else |
| 105 | { |
| 106 | elog(ERROR, "invalid typLen: %d" , typLen); |
| 107 | size = 0; /* keep compiler quiet */ |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | return size; |
| 112 | } |
| 113 | |
| 114 | /*------------------------------------------------------------------------- |
| 115 | * datumCopy |
| 116 | * |
| 117 | * Make a copy of a non-NULL datum. |
| 118 | * |
| 119 | * If the datatype is pass-by-reference, memory is obtained with palloc(). |
| 120 | * |
| 121 | * If the value is a reference to an expanded object, we flatten into memory |
| 122 | * obtained with palloc(). We need to copy because one of the main uses of |
| 123 | * this function is to copy a datum out of a transient memory context that's |
| 124 | * about to be destroyed, and the expanded object is probably in a child |
| 125 | * context that will also go away. Moreover, many callers assume that the |
| 126 | * result is a single pfree-able chunk. |
| 127 | *------------------------------------------------------------------------- |
| 128 | */ |
| 129 | Datum |
| 130 | datumCopy(Datum value, bool typByVal, int typLen) |
| 131 | { |
| 132 | Datum res; |
| 133 | |
| 134 | if (typByVal) |
| 135 | res = value; |
| 136 | else if (typLen == -1) |
| 137 | { |
| 138 | /* It is a varlena datatype */ |
| 139 | struct varlena *vl = (struct varlena *) DatumGetPointer(value); |
| 140 | |
| 141 | if (VARATT_IS_EXTERNAL_EXPANDED(vl)) |
| 142 | { |
| 143 | /* Flatten into the caller's memory context */ |
| 144 | ExpandedObjectHeader *eoh = DatumGetEOHP(value); |
| 145 | Size resultsize; |
| 146 | char *resultptr; |
| 147 | |
| 148 | resultsize = EOH_get_flat_size(eoh); |
| 149 | resultptr = (char *) palloc(resultsize); |
| 150 | EOH_flatten_into(eoh, (void *) resultptr, resultsize); |
| 151 | res = PointerGetDatum(resultptr); |
| 152 | } |
| 153 | else |
| 154 | { |
| 155 | /* Otherwise, just copy the varlena datum verbatim */ |
| 156 | Size realSize; |
| 157 | char *resultptr; |
| 158 | |
| 159 | realSize = (Size) VARSIZE_ANY(vl); |
| 160 | resultptr = (char *) palloc(realSize); |
| 161 | memcpy(resultptr, vl, realSize); |
| 162 | res = PointerGetDatum(resultptr); |
| 163 | } |
| 164 | } |
| 165 | else |
| 166 | { |
| 167 | /* Pass by reference, but not varlena, so not toasted */ |
| 168 | Size realSize; |
| 169 | char *resultptr; |
| 170 | |
| 171 | realSize = datumGetSize(value, typByVal, typLen); |
| 172 | |
| 173 | resultptr = (char *) palloc(realSize); |
| 174 | memcpy(resultptr, DatumGetPointer(value), realSize); |
| 175 | res = PointerGetDatum(resultptr); |
| 176 | } |
| 177 | return res; |
| 178 | } |
| 179 | |
| 180 | /*------------------------------------------------------------------------- |
| 181 | * datumTransfer |
| 182 | * |
| 183 | * Transfer a non-NULL datum into the current memory context. |
| 184 | * |
| 185 | * This is equivalent to datumCopy() except when the datum is a read-write |
| 186 | * pointer to an expanded object. In that case we merely reparent the object |
| 187 | * into the current context, and return its standard R/W pointer (in case the |
| 188 | * given one is a transient pointer of shorter lifespan). |
| 189 | *------------------------------------------------------------------------- |
| 190 | */ |
| 191 | Datum |
| 192 | datumTransfer(Datum value, bool typByVal, int typLen) |
| 193 | { |
| 194 | if (!typByVal && typLen == -1 && |
| 195 | VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) |
| 196 | value = TransferExpandedObject(value, CurrentMemoryContext); |
| 197 | else |
| 198 | value = datumCopy(value, typByVal, typLen); |
| 199 | return value; |
| 200 | } |
| 201 | |
| 202 | /*------------------------------------------------------------------------- |
| 203 | * datumIsEqual |
| 204 | * |
| 205 | * Return true if two datums are equal, false otherwise |
| 206 | * |
| 207 | * NOTE: XXX! |
| 208 | * We just compare the bytes of the two values, one by one. |
| 209 | * This routine will return false if there are 2 different |
| 210 | * representations of the same value (something along the lines |
| 211 | * of say the representation of zero in one's complement arithmetic). |
| 212 | * Also, it will probably not give the answer you want if either |
| 213 | * datum has been "toasted". |
| 214 | * |
| 215 | * Do not try to make this any smarter than it currently is with respect |
| 216 | * to "toasted" datums, because some of the callers could be working in the |
| 217 | * context of an aborted transaction. |
| 218 | *------------------------------------------------------------------------- |
| 219 | */ |
| 220 | bool |
| 221 | datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) |
| 222 | { |
| 223 | bool res; |
| 224 | |
| 225 | if (typByVal) |
| 226 | { |
| 227 | /* |
| 228 | * just compare the two datums. NOTE: just comparing "len" bytes will |
| 229 | * not do the work, because we do not know how these bytes are aligned |
| 230 | * inside the "Datum". We assume instead that any given datatype is |
| 231 | * consistent about how it fills extraneous bits in the Datum. |
| 232 | */ |
| 233 | res = (value1 == value2); |
| 234 | } |
| 235 | else |
| 236 | { |
| 237 | Size size1, |
| 238 | size2; |
| 239 | char *s1, |
| 240 | *s2; |
| 241 | |
| 242 | /* |
| 243 | * Compare the bytes pointed by the pointers stored in the datums. |
| 244 | */ |
| 245 | size1 = datumGetSize(value1, typByVal, typLen); |
| 246 | size2 = datumGetSize(value2, typByVal, typLen); |
| 247 | if (size1 != size2) |
| 248 | return false; |
| 249 | s1 = (char *) DatumGetPointer(value1); |
| 250 | s2 = (char *) DatumGetPointer(value2); |
| 251 | res = (memcmp(s1, s2, size1) == 0); |
| 252 | } |
| 253 | return res; |
| 254 | } |
| 255 | |
| 256 | /*------------------------------------------------------------------------- |
| 257 | * datum_image_eq |
| 258 | * |
| 259 | * Compares two datums for identical contents, based on byte images. Return |
| 260 | * true if the two datums are equal, false otherwise. |
| 261 | *------------------------------------------------------------------------- |
| 262 | */ |
| 263 | bool |
| 264 | datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) |
| 265 | { |
| 266 | bool result = true; |
| 267 | |
| 268 | if (typLen == -1) |
| 269 | { |
| 270 | Size len1, |
| 271 | len2; |
| 272 | |
| 273 | len1 = toast_raw_datum_size(value1); |
| 274 | len2 = toast_raw_datum_size(value2); |
| 275 | /* No need to de-toast if lengths don't match. */ |
| 276 | if (len1 != len2) |
| 277 | result = false; |
| 278 | else |
| 279 | { |
| 280 | struct varlena *arg1val; |
| 281 | struct varlena *arg2val; |
| 282 | |
| 283 | arg1val = PG_DETOAST_DATUM_PACKED(value1); |
| 284 | arg2val = PG_DETOAST_DATUM_PACKED(value2); |
| 285 | |
| 286 | result = (memcmp(VARDATA_ANY(arg1val), |
| 287 | VARDATA_ANY(arg2val), |
| 288 | len1 - VARHDRSZ) == 0); |
| 289 | |
| 290 | /* Only free memory if it's a copy made here. */ |
| 291 | if ((Pointer) arg1val != (Pointer) value1) |
| 292 | pfree(arg1val); |
| 293 | if ((Pointer) arg2val != (Pointer) value2) |
| 294 | pfree(arg2val); |
| 295 | } |
| 296 | } |
| 297 | else if (typByVal) |
| 298 | { |
| 299 | result = (value1 == value2); |
| 300 | } |
| 301 | else |
| 302 | { |
| 303 | result = (memcmp(DatumGetPointer(value1), |
| 304 | DatumGetPointer(value2), |
| 305 | typLen) == 0); |
| 306 | } |
| 307 | |
| 308 | return result; |
| 309 | } |
| 310 | |
| 311 | /*------------------------------------------------------------------------- |
| 312 | * datumEstimateSpace |
| 313 | * |
| 314 | * Compute the amount of space that datumSerialize will require for a |
| 315 | * particular Datum. |
| 316 | *------------------------------------------------------------------------- |
| 317 | */ |
| 318 | Size |
| 319 | datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) |
| 320 | { |
| 321 | Size sz = sizeof(int); |
| 322 | |
| 323 | if (!isnull) |
| 324 | { |
| 325 | /* no need to use add_size, can't overflow */ |
| 326 | if (typByVal) |
| 327 | sz += sizeof(Datum); |
| 328 | else if (typLen == -1 && |
| 329 | VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) |
| 330 | { |
| 331 | /* Expanded objects need to be flattened, see comment below */ |
| 332 | sz += EOH_get_flat_size(DatumGetEOHP(value)); |
| 333 | } |
| 334 | else |
| 335 | sz += datumGetSize(value, typByVal, typLen); |
| 336 | } |
| 337 | |
| 338 | return sz; |
| 339 | } |
| 340 | |
| 341 | /*------------------------------------------------------------------------- |
| 342 | * datumSerialize |
| 343 | * |
| 344 | * Serialize a possibly-NULL datum into caller-provided storage. |
| 345 | * |
| 346 | * Note: "expanded" objects are flattened so as to produce a self-contained |
| 347 | * representation, but other sorts of toast pointers are transferred as-is. |
| 348 | * This is because the intended use of this function is to pass the value |
| 349 | * to another process within the same database server. The other process |
| 350 | * could not access an "expanded" object within this process's memory, but |
| 351 | * we assume it can dereference the same TOAST pointers this one can. |
| 352 | * |
| 353 | * The format is as follows: first, we write a 4-byte header word, which |
| 354 | * is either the length of a pass-by-reference datum, -1 for a |
| 355 | * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing |
| 356 | * further is written. If it is pass-by-value, sizeof(Datum) bytes |
| 357 | * follow. Otherwise, the number of bytes indicated by the header word |
| 358 | * follow. The caller is responsible for ensuring that there is enough |
| 359 | * storage to store the number of bytes that will be written; use |
| 360 | * datumEstimateSpace() to find out how many will be needed. |
| 361 | * *start_address is updated to point to the byte immediately following |
| 362 | * those written. |
| 363 | *------------------------------------------------------------------------- |
| 364 | */ |
| 365 | void |
| 366 | datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, |
| 367 | char **start_address) |
| 368 | { |
| 369 | ExpandedObjectHeader *eoh = NULL; |
| 370 | int ; |
| 371 | |
| 372 | /* Write header word. */ |
| 373 | if (isnull) |
| 374 | header = -2; |
| 375 | else if (typByVal) |
| 376 | header = -1; |
| 377 | else if (typLen == -1 && |
| 378 | VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) |
| 379 | { |
| 380 | eoh = DatumGetEOHP(value); |
| 381 | header = EOH_get_flat_size(eoh); |
| 382 | } |
| 383 | else |
| 384 | header = datumGetSize(value, typByVal, typLen); |
| 385 | memcpy(*start_address, &header, sizeof(int)); |
| 386 | *start_address += sizeof(int); |
| 387 | |
| 388 | /* If not null, write payload bytes. */ |
| 389 | if (!isnull) |
| 390 | { |
| 391 | if (typByVal) |
| 392 | { |
| 393 | memcpy(*start_address, &value, sizeof(Datum)); |
| 394 | *start_address += sizeof(Datum); |
| 395 | } |
| 396 | else if (eoh) |
| 397 | { |
| 398 | char *tmp; |
| 399 | |
| 400 | /* |
| 401 | * EOH_flatten_into expects the target address to be maxaligned, |
| 402 | * so we can't store directly to *start_address. |
| 403 | */ |
| 404 | tmp = (char *) palloc(header); |
| 405 | EOH_flatten_into(eoh, (void *) tmp, header); |
| 406 | memcpy(*start_address, tmp, header); |
| 407 | *start_address += header; |
| 408 | |
| 409 | /* be tidy. */ |
| 410 | pfree(tmp); |
| 411 | } |
| 412 | else |
| 413 | { |
| 414 | memcpy(*start_address, DatumGetPointer(value), header); |
| 415 | *start_address += header; |
| 416 | } |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | /*------------------------------------------------------------------------- |
| 421 | * datumRestore |
| 422 | * |
| 423 | * Restore a possibly-NULL datum previously serialized by datumSerialize. |
| 424 | * *start_address is updated according to the number of bytes consumed. |
| 425 | *------------------------------------------------------------------------- |
| 426 | */ |
| 427 | Datum |
| 428 | datumRestore(char **start_address, bool *isnull) |
| 429 | { |
| 430 | int ; |
| 431 | void *d; |
| 432 | |
| 433 | /* Read header word. */ |
| 434 | memcpy(&header, *start_address, sizeof(int)); |
| 435 | *start_address += sizeof(int); |
| 436 | |
| 437 | /* If this datum is NULL, we can stop here. */ |
| 438 | if (header == -2) |
| 439 | { |
| 440 | *isnull = true; |
| 441 | return (Datum) 0; |
| 442 | } |
| 443 | |
| 444 | /* OK, datum is not null. */ |
| 445 | *isnull = false; |
| 446 | |
| 447 | /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */ |
| 448 | if (header == -1) |
| 449 | { |
| 450 | Datum val; |
| 451 | |
| 452 | memcpy(&val, *start_address, sizeof(Datum)); |
| 453 | *start_address += sizeof(Datum); |
| 454 | return val; |
| 455 | } |
| 456 | |
| 457 | /* Pass-by-reference case; copy indicated number of bytes. */ |
| 458 | Assert(header > 0); |
| 459 | d = palloc(header); |
| 460 | memcpy(d, *start_address, header); |
| 461 | *start_address += header; |
| 462 | return PointerGetDatum(d); |
| 463 | } |
| 464 | |