| 1 | /* |
| 2 | * This file is part of the MicroPython project, http://micropython.org/ |
| 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (c) 2013, 2014 Damien P. George |
| 7 | * |
| 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 9 | * of this software and associated documentation files (the "Software"), to deal |
| 10 | * in the Software without restriction, including without limitation the rights |
| 11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 12 | * copies of the Software, and to permit persons to whom the Software is |
| 13 | * furnished to do so, subject to the following conditions: |
| 14 | * |
| 15 | * The above copyright notice and this permission notice shall be included in |
| 16 | * all copies or substantial portions of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 24 | * THE SOFTWARE. |
| 25 | */ |
| 26 | |
| 27 | #include <stdint.h> |
| 28 | #include <stdlib.h> |
| 29 | #include <string.h> |
| 30 | #include <assert.h> |
| 31 | |
| 32 | #include "py/mpconfig.h" |
| 33 | #include "py/misc.h" |
| 34 | #include "py/runtime.h" |
| 35 | |
| 36 | #if MICROPY_DEBUG_VERBOSE // print debugging info |
| 37 | #define DEBUG_PRINT (1) |
| 38 | #else // don't print debugging info |
| 39 | #define DEBUG_PRINT (0) |
| 40 | #define DEBUG_printf(...) (void)0 |
| 41 | #endif |
| 42 | |
| 43 | // This table of sizes is used to control the growth of hash tables. |
| 44 | // The first set of sizes are chosen so the allocation fits exactly in a |
| 45 | // 4-word GC block, and it's not so important for these small values to be |
| 46 | // prime. The latter sizes are prime and increase at an increasing rate. |
| 47 | STATIC const uint16_t hash_allocation_sizes[] = { |
| 48 | 0, 2, 4, 6, 8, 10, 12, // +2 |
| 49 | 17, 23, 29, 37, 47, 59, 73, // *1.25 |
| 50 | 97, 127, 167, 223, 293, 389, 521, 691, 919, 1223, 1627, 2161, // *1.33 |
| 51 | 3229, 4831, 7243, 10861, 16273, 24407, 36607, 54907, // *1.5 |
| 52 | }; |
| 53 | |
| 54 | STATIC size_t get_hash_alloc_greater_or_equal_to(size_t x) { |
| 55 | for (size_t i = 0; i < MP_ARRAY_SIZE(hash_allocation_sizes); i++) { |
| 56 | if (hash_allocation_sizes[i] >= x) { |
| 57 | return hash_allocation_sizes[i]; |
| 58 | } |
| 59 | } |
| 60 | // ran out of primes in the table! |
| 61 | // return something sensible, at least make it odd |
| 62 | return (x + x / 2) | 1; |
| 63 | } |
| 64 | |
| 65 | /******************************************************************************/ |
| 66 | /* map */ |
| 67 | |
| 68 | void mp_map_init(mp_map_t *map, size_t n) { |
| 69 | if (n == 0) { |
| 70 | map->alloc = 0; |
| 71 | map->table = NULL; |
| 72 | } else { |
| 73 | map->alloc = n; |
| 74 | map->table = m_new0(mp_map_elem_t, map->alloc); |
| 75 | } |
| 76 | map->used = 0; |
| 77 | map->all_keys_are_qstrs = 1; |
| 78 | map->is_fixed = 0; |
| 79 | map->is_ordered = 0; |
| 80 | } |
| 81 | |
| 82 | void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table) { |
| 83 | map->alloc = n; |
| 84 | map->used = n; |
| 85 | map->all_keys_are_qstrs = 1; |
| 86 | map->is_fixed = 1; |
| 87 | map->is_ordered = 1; |
| 88 | map->table = (mp_map_elem_t *)table; |
| 89 | } |
| 90 | |
| 91 | // Differentiate from mp_map_clear() - semantics is different |
| 92 | void mp_map_deinit(mp_map_t *map) { |
| 93 | if (!map->is_fixed) { |
| 94 | m_del(mp_map_elem_t, map->table, map->alloc); |
| 95 | } |
| 96 | map->used = map->alloc = 0; |
| 97 | } |
| 98 | |
| 99 | void mp_map_clear(mp_map_t *map) { |
| 100 | if (!map->is_fixed) { |
| 101 | m_del(mp_map_elem_t, map->table, map->alloc); |
| 102 | } |
| 103 | map->alloc = 0; |
| 104 | map->used = 0; |
| 105 | map->all_keys_are_qstrs = 1; |
| 106 | map->is_fixed = 0; |
| 107 | map->table = NULL; |
| 108 | } |
| 109 | |
| 110 | STATIC void mp_map_rehash(mp_map_t *map) { |
| 111 | size_t old_alloc = map->alloc; |
| 112 | size_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1); |
| 113 | DEBUG_printf("mp_map_rehash(%p): " UINT_FMT " -> " UINT_FMT "\n" , map, old_alloc, new_alloc); |
| 114 | mp_map_elem_t *old_table = map->table; |
| 115 | mp_map_elem_t *new_table = m_new0(mp_map_elem_t, new_alloc); |
| 116 | // If we reach this point, table resizing succeeded, now we can edit the old map. |
| 117 | map->alloc = new_alloc; |
| 118 | map->used = 0; |
| 119 | map->all_keys_are_qstrs = 1; |
| 120 | map->table = new_table; |
| 121 | for (size_t i = 0; i < old_alloc; i++) { |
| 122 | if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) { |
| 123 | mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value; |
| 124 | } |
| 125 | } |
| 126 | m_del(mp_map_elem_t, old_table, old_alloc); |
| 127 | } |
| 128 | |
| 129 | // MP_MAP_LOOKUP behaviour: |
| 130 | // - returns NULL if not found, else the slot it was found in with key,value non-null |
| 131 | // MP_MAP_LOOKUP_ADD_IF_NOT_FOUND behaviour: |
| 132 | // - returns slot, with key non-null and value=MP_OBJ_NULL if it was added |
| 133 | // MP_MAP_LOOKUP_REMOVE_IF_FOUND behaviour: |
| 134 | // - returns NULL if not found, else the slot if was found in with key null and value non-null |
| 135 | mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) { |
| 136 | // If the map is a fixed array then we must only be called for a lookup |
| 137 | assert(!map->is_fixed || lookup_kind == MP_MAP_LOOKUP); |
| 138 | |
| 139 | // Work out if we can compare just pointers |
| 140 | bool compare_only_ptrs = map->all_keys_are_qstrs; |
| 141 | if (compare_only_ptrs) { |
| 142 | if (mp_obj_is_qstr(index)) { |
| 143 | // Index is a qstr, so can just do ptr comparison. |
| 144 | } else if (mp_obj_is_type(index, &mp_type_str)) { |
| 145 | // Index is a non-interned string. |
| 146 | // We can either intern the string, or force a full equality comparison. |
| 147 | // We chose the latter, since interning costs time and potentially RAM, |
| 148 | // and it won't necessarily benefit subsequent calls because these calls |
| 149 | // most likely won't pass the newly-interned string. |
| 150 | compare_only_ptrs = false; |
| 151 | } else if (lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 152 | // If we are not adding, then we can return straight away a failed |
| 153 | // lookup because we know that the index will never be found. |
| 154 | return NULL; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | // if the map is an ordered array then we must do a brute force linear search |
| 159 | if (map->is_ordered) { |
| 160 | for (mp_map_elem_t *elem = &map->table[0], *top = &map->table[map->used]; elem < top; elem++) { |
| 161 | if (elem->key == index || (!compare_only_ptrs && mp_obj_equal(elem->key, index))) { |
| 162 | #if MICROPY_PY_COLLECTIONS_ORDEREDDICT |
| 163 | if (MP_UNLIKELY(lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND)) { |
| 164 | // remove the found element by moving the rest of the array down |
| 165 | mp_obj_t value = elem->value; |
| 166 | --map->used; |
| 167 | memmove(elem, elem + 1, (top - elem - 1) * sizeof(*elem)); |
| 168 | // put the found element after the end so the caller can access it if needed |
| 169 | // note: caller must NULL the value so the GC can clean up (e.g. see dict_get_helper). |
| 170 | elem = &map->table[map->used]; |
| 171 | elem->key = MP_OBJ_NULL; |
| 172 | elem->value = value; |
| 173 | } |
| 174 | #endif |
| 175 | return elem; |
| 176 | } |
| 177 | } |
| 178 | #if MICROPY_PY_COLLECTIONS_ORDEREDDICT |
| 179 | if (MP_LIKELY(lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)) { |
| 180 | return NULL; |
| 181 | } |
| 182 | if (map->used == map->alloc) { |
| 183 | // TODO: Alloc policy |
| 184 | map->alloc += 4; |
| 185 | map->table = m_renew(mp_map_elem_t, map->table, map->used, map->alloc); |
| 186 | mp_seq_clear(map->table, map->used, map->alloc, sizeof(*map->table)); |
| 187 | } |
| 188 | mp_map_elem_t *elem = map->table + map->used++; |
| 189 | elem->key = index; |
| 190 | if (!mp_obj_is_qstr(index)) { |
| 191 | map->all_keys_are_qstrs = 0; |
| 192 | } |
| 193 | return elem; |
| 194 | #else |
| 195 | return NULL; |
| 196 | #endif |
| 197 | } |
| 198 | |
| 199 | // map is a hash table (not an ordered array), so do a hash lookup |
| 200 | |
| 201 | if (map->alloc == 0) { |
| 202 | if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 203 | mp_map_rehash(map); |
| 204 | } else { |
| 205 | return NULL; |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | // get hash of index, with fast path for common case of qstr |
| 210 | mp_uint_t hash; |
| 211 | if (mp_obj_is_qstr(index)) { |
| 212 | hash = qstr_hash(MP_OBJ_QSTR_VALUE(index)); |
| 213 | } else { |
| 214 | hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index)); |
| 215 | } |
| 216 | |
| 217 | size_t pos = hash % map->alloc; |
| 218 | size_t start_pos = pos; |
| 219 | mp_map_elem_t *avail_slot = NULL; |
| 220 | for (;;) { |
| 221 | mp_map_elem_t *slot = &map->table[pos]; |
| 222 | if (slot->key == MP_OBJ_NULL) { |
| 223 | // found NULL slot, so index is not in table |
| 224 | if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 225 | map->used += 1; |
| 226 | if (avail_slot == NULL) { |
| 227 | avail_slot = slot; |
| 228 | } |
| 229 | avail_slot->key = index; |
| 230 | avail_slot->value = MP_OBJ_NULL; |
| 231 | if (!mp_obj_is_qstr(index)) { |
| 232 | map->all_keys_are_qstrs = 0; |
| 233 | } |
| 234 | return avail_slot; |
| 235 | } else { |
| 236 | return NULL; |
| 237 | } |
| 238 | } else if (slot->key == MP_OBJ_SENTINEL) { |
| 239 | // found deleted slot, remember for later |
| 240 | if (avail_slot == NULL) { |
| 241 | avail_slot = slot; |
| 242 | } |
| 243 | } else if (slot->key == index || (!compare_only_ptrs && mp_obj_equal(slot->key, index))) { |
| 244 | // found index |
| 245 | // Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x |
| 246 | if (lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND) { |
| 247 | // delete element in this slot |
| 248 | map->used--; |
| 249 | if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) { |
| 250 | // optimisation if next slot is empty |
| 251 | slot->key = MP_OBJ_NULL; |
| 252 | } else { |
| 253 | slot->key = MP_OBJ_SENTINEL; |
| 254 | } |
| 255 | // keep slot->value so that caller can access it if needed |
| 256 | } |
| 257 | return slot; |
| 258 | } |
| 259 | |
| 260 | // not yet found, keep searching in this table |
| 261 | pos = (pos + 1) % map->alloc; |
| 262 | |
| 263 | if (pos == start_pos) { |
| 264 | // search got back to starting position, so index is not in table |
| 265 | if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 266 | if (avail_slot != NULL) { |
| 267 | // there was an available slot, so use that |
| 268 | map->used++; |
| 269 | avail_slot->key = index; |
| 270 | avail_slot->value = MP_OBJ_NULL; |
| 271 | if (!mp_obj_is_qstr(index)) { |
| 272 | map->all_keys_are_qstrs = 0; |
| 273 | } |
| 274 | return avail_slot; |
| 275 | } else { |
| 276 | // not enough room in table, rehash it |
| 277 | mp_map_rehash(map); |
| 278 | // restart the search for the new element |
| 279 | start_pos = pos = hash % map->alloc; |
| 280 | } |
| 281 | } else { |
| 282 | return NULL; |
| 283 | } |
| 284 | } |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | /******************************************************************************/ |
| 289 | /* set */ |
| 290 | |
| 291 | #if MICROPY_PY_BUILTINS_SET |
| 292 | |
| 293 | void mp_set_init(mp_set_t *set, size_t n) { |
| 294 | set->alloc = n; |
| 295 | set->used = 0; |
| 296 | set->table = m_new0(mp_obj_t, set->alloc); |
| 297 | } |
| 298 | |
| 299 | STATIC void mp_set_rehash(mp_set_t *set) { |
| 300 | size_t old_alloc = set->alloc; |
| 301 | mp_obj_t *old_table = set->table; |
| 302 | set->alloc = get_hash_alloc_greater_or_equal_to(set->alloc + 1); |
| 303 | set->used = 0; |
| 304 | set->table = m_new0(mp_obj_t, set->alloc); |
| 305 | for (size_t i = 0; i < old_alloc; i++) { |
| 306 | if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) { |
| 307 | mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND); |
| 308 | } |
| 309 | } |
| 310 | m_del(mp_obj_t, old_table, old_alloc); |
| 311 | } |
| 312 | |
| 313 | mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) { |
| 314 | // Note: lookup_kind can be MP_MAP_LOOKUP_ADD_IF_NOT_FOUND_OR_REMOVE_IF_FOUND which |
| 315 | // is handled by using bitwise operations. |
| 316 | |
| 317 | if (set->alloc == 0) { |
| 318 | if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 319 | mp_set_rehash(set); |
| 320 | } else { |
| 321 | return MP_OBJ_NULL; |
| 322 | } |
| 323 | } |
| 324 | mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index)); |
| 325 | size_t pos = hash % set->alloc; |
| 326 | size_t start_pos = pos; |
| 327 | mp_obj_t *avail_slot = NULL; |
| 328 | for (;;) { |
| 329 | mp_obj_t elem = set->table[pos]; |
| 330 | if (elem == MP_OBJ_NULL) { |
| 331 | // found NULL slot, so index is not in table |
| 332 | if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 333 | if (avail_slot == NULL) { |
| 334 | avail_slot = &set->table[pos]; |
| 335 | } |
| 336 | set->used++; |
| 337 | *avail_slot = index; |
| 338 | return index; |
| 339 | } else { |
| 340 | return MP_OBJ_NULL; |
| 341 | } |
| 342 | } else if (elem == MP_OBJ_SENTINEL) { |
| 343 | // found deleted slot, remember for later |
| 344 | if (avail_slot == NULL) { |
| 345 | avail_slot = &set->table[pos]; |
| 346 | } |
| 347 | } else if (mp_obj_equal(elem, index)) { |
| 348 | // found index |
| 349 | if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) { |
| 350 | // delete element |
| 351 | set->used--; |
| 352 | if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) { |
| 353 | // optimisation if next slot is empty |
| 354 | set->table[pos] = MP_OBJ_NULL; |
| 355 | } else { |
| 356 | set->table[pos] = MP_OBJ_SENTINEL; |
| 357 | } |
| 358 | } |
| 359 | return elem; |
| 360 | } |
| 361 | |
| 362 | // not yet found, keep searching in this table |
| 363 | pos = (pos + 1) % set->alloc; |
| 364 | |
| 365 | if (pos == start_pos) { |
| 366 | // search got back to starting position, so index is not in table |
| 367 | if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { |
| 368 | if (avail_slot != NULL) { |
| 369 | // there was an available slot, so use that |
| 370 | set->used++; |
| 371 | *avail_slot = index; |
| 372 | return index; |
| 373 | } else { |
| 374 | // not enough room in table, rehash it |
| 375 | mp_set_rehash(set); |
| 376 | // restart the search for the new element |
| 377 | start_pos = pos = hash % set->alloc; |
| 378 | } |
| 379 | } else { |
| 380 | return MP_OBJ_NULL; |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | } |
| 385 | |
| 386 | mp_obj_t mp_set_remove_first(mp_set_t *set) { |
| 387 | for (size_t pos = 0; pos < set->alloc; pos++) { |
| 388 | if (mp_set_slot_is_filled(set, pos)) { |
| 389 | mp_obj_t elem = set->table[pos]; |
| 390 | // delete element |
| 391 | set->used--; |
| 392 | if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) { |
| 393 | // optimisation if next slot is empty |
| 394 | set->table[pos] = MP_OBJ_NULL; |
| 395 | } else { |
| 396 | set->table[pos] = MP_OBJ_SENTINEL; |
| 397 | } |
| 398 | return elem; |
| 399 | } |
| 400 | } |
| 401 | return MP_OBJ_NULL; |
| 402 | } |
| 403 | |
| 404 | void mp_set_clear(mp_set_t *set) { |
| 405 | m_del(mp_obj_t, set->table, set->alloc); |
| 406 | set->alloc = 0; |
| 407 | set->used = 0; |
| 408 | set->table = NULL; |
| 409 | } |
| 410 | |
| 411 | #endif // MICROPY_PY_BUILTINS_SET |
| 412 | |
| 413 | #if defined(DEBUG_PRINT) && DEBUG_PRINT |
| 414 | void mp_map_dump(mp_map_t *map) { |
| 415 | for (size_t i = 0; i < map->alloc; i++) { |
| 416 | if (map->table[i].key != MP_OBJ_NULL) { |
| 417 | mp_obj_print(map->table[i].key, PRINT_REPR); |
| 418 | } else { |
| 419 | DEBUG_printf("(nil)" ); |
| 420 | } |
| 421 | DEBUG_printf(": %p\n" , map->table[i].value); |
| 422 | } |
| 423 | DEBUG_printf("---\n" ); |
| 424 | } |
| 425 | #endif |
| 426 | |