| 1 | #include <stdarg.h> |
| 2 | #include <string.h> |
| 3 | #include <assert.h> |
| 4 | #include <string.h> |
| 5 | #include <stdio.h> |
| 6 | #include <stdlib.h> |
| 7 | #include <stdint.h> |
| 8 | #include <limits.h> |
| 9 | |
| 10 | #include "config.h" |
| 11 | #include "cmark_ctype.h" |
| 12 | #include "buffer.h" |
| 13 | |
| 14 | /* Used as default value for cmark_strbuf->ptr so that people can always |
| 15 | * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. |
| 16 | */ |
| 17 | unsigned char cmark_strbuf__initbuf[1]; |
| 18 | |
| 19 | #ifndef MIN |
| 20 | #define MIN(x, y) ((x < y) ? x : y) |
| 21 | #endif |
| 22 | |
| 23 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, |
| 24 | bufsize_t initial_size) { |
| 25 | buf->mem = mem; |
| 26 | buf->asize = 0; |
| 27 | buf->size = 0; |
| 28 | buf->ptr = cmark_strbuf__initbuf; |
| 29 | |
| 30 | if (initial_size > 0) |
| 31 | cmark_strbuf_grow(buf, initial_size); |
| 32 | } |
| 33 | |
| 34 | static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { |
| 35 | cmark_strbuf_grow(buf, buf->size + add); |
| 36 | } |
| 37 | |
| 38 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { |
| 39 | assert(target_size > 0); |
| 40 | |
| 41 | if (target_size < buf->asize) |
| 42 | return; |
| 43 | |
| 44 | if (target_size > (bufsize_t)(INT32_MAX / 2)) { |
| 45 | fprintf(stderr, |
| 46 | "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n" , |
| 47 | (INT32_MAX / 2)); |
| 48 | abort(); |
| 49 | } |
| 50 | |
| 51 | /* Oversize the buffer by 50% to guarantee amortized linear time |
| 52 | * complexity on append operations. */ |
| 53 | bufsize_t new_size = target_size + target_size / 2; |
| 54 | new_size += 1; |
| 55 | new_size = (new_size + 7) & ~7; |
| 56 | |
| 57 | buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, |
| 58 | new_size); |
| 59 | buf->asize = new_size; |
| 60 | } |
| 61 | |
| 62 | bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } |
| 63 | |
| 64 | void cmark_strbuf_free(cmark_strbuf *buf) { |
| 65 | if (!buf) |
| 66 | return; |
| 67 | |
| 68 | if (buf->ptr != cmark_strbuf__initbuf) |
| 69 | buf->mem->free(buf->ptr); |
| 70 | |
| 71 | cmark_strbuf_init(buf->mem, buf, 0); |
| 72 | } |
| 73 | |
| 74 | void cmark_strbuf_clear(cmark_strbuf *buf) { |
| 75 | buf->size = 0; |
| 76 | |
| 77 | if (buf->asize > 0) |
| 78 | buf->ptr[0] = '\0'; |
| 79 | } |
| 80 | |
| 81 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, |
| 82 | bufsize_t len) { |
| 83 | if (len <= 0 || data == NULL) { |
| 84 | cmark_strbuf_clear(buf); |
| 85 | } else { |
| 86 | if (data != buf->ptr) { |
| 87 | if (len >= buf->asize) |
| 88 | cmark_strbuf_grow(buf, len); |
| 89 | memmove(buf->ptr, data, len); |
| 90 | } |
| 91 | buf->size = len; |
| 92 | buf->ptr[buf->size] = '\0'; |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { |
| 97 | cmark_strbuf_set(buf, (const unsigned char *)string, |
| 98 | string ? strlen(string) : 0); |
| 99 | } |
| 100 | |
| 101 | void cmark_strbuf_putc(cmark_strbuf *buf, int c) { |
| 102 | S_strbuf_grow_by(buf, 1); |
| 103 | buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); |
| 104 | buf->ptr[buf->size] = '\0'; |
| 105 | } |
| 106 | |
| 107 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, |
| 108 | bufsize_t len) { |
| 109 | if (len <= 0) |
| 110 | return; |
| 111 | |
| 112 | S_strbuf_grow_by(buf, len); |
| 113 | memmove(buf->ptr + buf->size, data, len); |
| 114 | buf->size += len; |
| 115 | buf->ptr[buf->size] = '\0'; |
| 116 | } |
| 117 | |
| 118 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { |
| 119 | cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); |
| 120 | } |
| 121 | |
| 122 | void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, |
| 123 | const cmark_strbuf *buf) { |
| 124 | bufsize_t copylen; |
| 125 | |
| 126 | assert(buf); |
| 127 | if (!data || datasize <= 0) |
| 128 | return; |
| 129 | |
| 130 | data[0] = '\0'; |
| 131 | |
| 132 | if (buf->size == 0 || buf->asize <= 0) |
| 133 | return; |
| 134 | |
| 135 | copylen = buf->size; |
| 136 | if (copylen > datasize - 1) |
| 137 | copylen = datasize - 1; |
| 138 | memmove(data, buf->ptr, copylen); |
| 139 | data[copylen] = '\0'; |
| 140 | } |
| 141 | |
| 142 | void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { |
| 143 | cmark_strbuf t = *buf_a; |
| 144 | *buf_a = *buf_b; |
| 145 | *buf_b = t; |
| 146 | } |
| 147 | |
| 148 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { |
| 149 | unsigned char *data = buf->ptr; |
| 150 | |
| 151 | if (buf->asize == 0) { |
| 152 | /* return an empty string */ |
| 153 | return (unsigned char *)buf->mem->calloc(1, 1); |
| 154 | } |
| 155 | |
| 156 | cmark_strbuf_init(buf->mem, buf, 0); |
| 157 | return data; |
| 158 | } |
| 159 | |
| 160 | int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { |
| 161 | int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); |
| 162 | return (result != 0) ? result |
| 163 | : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; |
| 164 | } |
| 165 | |
| 166 | bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { |
| 167 | if (pos >= buf->size) |
| 168 | return -1; |
| 169 | if (pos < 0) |
| 170 | pos = 0; |
| 171 | |
| 172 | const unsigned char *p = |
| 173 | (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); |
| 174 | if (!p) |
| 175 | return -1; |
| 176 | |
| 177 | return (bufsize_t)(p - (const unsigned char *)buf->ptr); |
| 178 | } |
| 179 | |
| 180 | bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { |
| 181 | if (pos < 0 || buf->size == 0) |
| 182 | return -1; |
| 183 | if (pos >= buf->size) |
| 184 | pos = buf->size - 1; |
| 185 | |
| 186 | bufsize_t i; |
| 187 | for (i = pos; i >= 0; i--) { |
| 188 | if (buf->ptr[i] == (unsigned char)c) |
| 189 | return i; |
| 190 | } |
| 191 | |
| 192 | return -1; |
| 193 | } |
| 194 | |
| 195 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { |
| 196 | if (len < 0) |
| 197 | len = 0; |
| 198 | |
| 199 | if (len < buf->size) { |
| 200 | buf->size = len; |
| 201 | buf->ptr[buf->size] = '\0'; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { |
| 206 | if (n > 0) { |
| 207 | if (n > buf->size) |
| 208 | n = buf->size; |
| 209 | buf->size = buf->size - n; |
| 210 | if (buf->size) |
| 211 | memmove(buf->ptr, buf->ptr + n, buf->size); |
| 212 | |
| 213 | buf->ptr[buf->size] = '\0'; |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | void cmark_strbuf_rtrim(cmark_strbuf *buf) { |
| 218 | if (!buf->size) |
| 219 | return; |
| 220 | |
| 221 | while (buf->size > 0) { |
| 222 | if (!cmark_isspace(buf->ptr[buf->size - 1])) |
| 223 | break; |
| 224 | |
| 225 | buf->size--; |
| 226 | } |
| 227 | |
| 228 | buf->ptr[buf->size] = '\0'; |
| 229 | } |
| 230 | |
| 231 | void cmark_strbuf_trim(cmark_strbuf *buf) { |
| 232 | bufsize_t i = 0; |
| 233 | |
| 234 | if (!buf->size) |
| 235 | return; |
| 236 | |
| 237 | while (i < buf->size && cmark_isspace(buf->ptr[i])) |
| 238 | i++; |
| 239 | |
| 240 | cmark_strbuf_drop(buf, i); |
| 241 | |
| 242 | cmark_strbuf_rtrim(buf); |
| 243 | } |
| 244 | |
| 245 | // Destructively modify string, collapsing consecutive |
| 246 | // space and newline characters into a single space. |
| 247 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { |
| 248 | bool last_char_was_space = false; |
| 249 | bufsize_t r, w; |
| 250 | |
| 251 | for (r = 0, w = 0; r < s->size; ++r) { |
| 252 | if (cmark_isspace(s->ptr[r])) { |
| 253 | if (!last_char_was_space) { |
| 254 | s->ptr[w++] = ' '; |
| 255 | last_char_was_space = true; |
| 256 | } |
| 257 | } else { |
| 258 | s->ptr[w++] = s->ptr[r]; |
| 259 | last_char_was_space = false; |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | cmark_strbuf_truncate(s, w); |
| 264 | } |
| 265 | |
| 266 | // Destructively unescape a string: remove backslashes before punctuation chars. |
| 267 | extern void cmark_strbuf_unescape(cmark_strbuf *buf) { |
| 268 | bufsize_t r, w; |
| 269 | |
| 270 | for (r = 0, w = 0; r < buf->size; ++r) { |
| 271 | if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) |
| 272 | r++; |
| 273 | |
| 274 | buf->ptr[w++] = buf->ptr[r]; |
| 275 | } |
| 276 | |
| 277 | cmark_strbuf_truncate(buf, w); |
| 278 | } |
| 279 | |