1 | #include <stdarg.h> |
2 | #include <string.h> |
3 | #include <assert.h> |
4 | #include <string.h> |
5 | #include <stdio.h> |
6 | #include <stdlib.h> |
7 | #include <stdint.h> |
8 | #include <limits.h> |
9 | |
10 | #include "config.h" |
11 | #include "cmark_ctype.h" |
12 | #include "buffer.h" |
13 | |
14 | /* Used as default value for cmark_strbuf->ptr so that people can always |
15 | * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. |
16 | */ |
17 | unsigned char cmark_strbuf__initbuf[1]; |
18 | |
19 | #ifndef MIN |
20 | #define MIN(x, y) ((x < y) ? x : y) |
21 | #endif |
22 | |
23 | void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, |
24 | bufsize_t initial_size) { |
25 | buf->mem = mem; |
26 | buf->asize = 0; |
27 | buf->size = 0; |
28 | buf->ptr = cmark_strbuf__initbuf; |
29 | |
30 | if (initial_size > 0) |
31 | cmark_strbuf_grow(buf, initial_size); |
32 | } |
33 | |
34 | static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) { |
35 | cmark_strbuf_grow(buf, buf->size + add); |
36 | } |
37 | |
38 | void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { |
39 | assert(target_size > 0); |
40 | |
41 | if (target_size < buf->asize) |
42 | return; |
43 | |
44 | if (target_size > (bufsize_t)(INT32_MAX / 2)) { |
45 | fprintf(stderr, |
46 | "[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n" , |
47 | (INT32_MAX / 2)); |
48 | abort(); |
49 | } |
50 | |
51 | /* Oversize the buffer by 50% to guarantee amortized linear time |
52 | * complexity on append operations. */ |
53 | bufsize_t new_size = target_size + target_size / 2; |
54 | new_size += 1; |
55 | new_size = (new_size + 7) & ~7; |
56 | |
57 | buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL, |
58 | new_size); |
59 | buf->asize = new_size; |
60 | } |
61 | |
62 | bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } |
63 | |
64 | void cmark_strbuf_free(cmark_strbuf *buf) { |
65 | if (!buf) |
66 | return; |
67 | |
68 | if (buf->ptr != cmark_strbuf__initbuf) |
69 | buf->mem->free(buf->ptr); |
70 | |
71 | cmark_strbuf_init(buf->mem, buf, 0); |
72 | } |
73 | |
74 | void cmark_strbuf_clear(cmark_strbuf *buf) { |
75 | buf->size = 0; |
76 | |
77 | if (buf->asize > 0) |
78 | buf->ptr[0] = '\0'; |
79 | } |
80 | |
81 | void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, |
82 | bufsize_t len) { |
83 | if (len <= 0 || data == NULL) { |
84 | cmark_strbuf_clear(buf); |
85 | } else { |
86 | if (data != buf->ptr) { |
87 | if (len >= buf->asize) |
88 | cmark_strbuf_grow(buf, len); |
89 | memmove(buf->ptr, data, len); |
90 | } |
91 | buf->size = len; |
92 | buf->ptr[buf->size] = '\0'; |
93 | } |
94 | } |
95 | |
96 | void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { |
97 | cmark_strbuf_set(buf, (const unsigned char *)string, |
98 | string ? strlen(string) : 0); |
99 | } |
100 | |
101 | void cmark_strbuf_putc(cmark_strbuf *buf, int c) { |
102 | S_strbuf_grow_by(buf, 1); |
103 | buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); |
104 | buf->ptr[buf->size] = '\0'; |
105 | } |
106 | |
107 | void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, |
108 | bufsize_t len) { |
109 | if (len <= 0) |
110 | return; |
111 | |
112 | S_strbuf_grow_by(buf, len); |
113 | memmove(buf->ptr + buf->size, data, len); |
114 | buf->size += len; |
115 | buf->ptr[buf->size] = '\0'; |
116 | } |
117 | |
118 | void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { |
119 | cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); |
120 | } |
121 | |
122 | void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, |
123 | const cmark_strbuf *buf) { |
124 | bufsize_t copylen; |
125 | |
126 | assert(buf); |
127 | if (!data || datasize <= 0) |
128 | return; |
129 | |
130 | data[0] = '\0'; |
131 | |
132 | if (buf->size == 0 || buf->asize <= 0) |
133 | return; |
134 | |
135 | copylen = buf->size; |
136 | if (copylen > datasize - 1) |
137 | copylen = datasize - 1; |
138 | memmove(data, buf->ptr, copylen); |
139 | data[copylen] = '\0'; |
140 | } |
141 | |
142 | void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { |
143 | cmark_strbuf t = *buf_a; |
144 | *buf_a = *buf_b; |
145 | *buf_b = t; |
146 | } |
147 | |
148 | unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { |
149 | unsigned char *data = buf->ptr; |
150 | |
151 | if (buf->asize == 0) { |
152 | /* return an empty string */ |
153 | return (unsigned char *)buf->mem->calloc(1, 1); |
154 | } |
155 | |
156 | cmark_strbuf_init(buf->mem, buf, 0); |
157 | return data; |
158 | } |
159 | |
160 | int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { |
161 | int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); |
162 | return (result != 0) ? result |
163 | : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; |
164 | } |
165 | |
166 | bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { |
167 | if (pos >= buf->size) |
168 | return -1; |
169 | if (pos < 0) |
170 | pos = 0; |
171 | |
172 | const unsigned char *p = |
173 | (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); |
174 | if (!p) |
175 | return -1; |
176 | |
177 | return (bufsize_t)(p - (const unsigned char *)buf->ptr); |
178 | } |
179 | |
180 | bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { |
181 | if (pos < 0 || buf->size == 0) |
182 | return -1; |
183 | if (pos >= buf->size) |
184 | pos = buf->size - 1; |
185 | |
186 | bufsize_t i; |
187 | for (i = pos; i >= 0; i--) { |
188 | if (buf->ptr[i] == (unsigned char)c) |
189 | return i; |
190 | } |
191 | |
192 | return -1; |
193 | } |
194 | |
195 | void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { |
196 | if (len < 0) |
197 | len = 0; |
198 | |
199 | if (len < buf->size) { |
200 | buf->size = len; |
201 | buf->ptr[buf->size] = '\0'; |
202 | } |
203 | } |
204 | |
205 | void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { |
206 | if (n > 0) { |
207 | if (n > buf->size) |
208 | n = buf->size; |
209 | buf->size = buf->size - n; |
210 | if (buf->size) |
211 | memmove(buf->ptr, buf->ptr + n, buf->size); |
212 | |
213 | buf->ptr[buf->size] = '\0'; |
214 | } |
215 | } |
216 | |
217 | void cmark_strbuf_rtrim(cmark_strbuf *buf) { |
218 | if (!buf->size) |
219 | return; |
220 | |
221 | while (buf->size > 0) { |
222 | if (!cmark_isspace(buf->ptr[buf->size - 1])) |
223 | break; |
224 | |
225 | buf->size--; |
226 | } |
227 | |
228 | buf->ptr[buf->size] = '\0'; |
229 | } |
230 | |
231 | void cmark_strbuf_trim(cmark_strbuf *buf) { |
232 | bufsize_t i = 0; |
233 | |
234 | if (!buf->size) |
235 | return; |
236 | |
237 | while (i < buf->size && cmark_isspace(buf->ptr[i])) |
238 | i++; |
239 | |
240 | cmark_strbuf_drop(buf, i); |
241 | |
242 | cmark_strbuf_rtrim(buf); |
243 | } |
244 | |
245 | // Destructively modify string, collapsing consecutive |
246 | // space and newline characters into a single space. |
247 | void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { |
248 | bool last_char_was_space = false; |
249 | bufsize_t r, w; |
250 | |
251 | for (r = 0, w = 0; r < s->size; ++r) { |
252 | if (cmark_isspace(s->ptr[r])) { |
253 | if (!last_char_was_space) { |
254 | s->ptr[w++] = ' '; |
255 | last_char_was_space = true; |
256 | } |
257 | } else { |
258 | s->ptr[w++] = s->ptr[r]; |
259 | last_char_was_space = false; |
260 | } |
261 | } |
262 | |
263 | cmark_strbuf_truncate(s, w); |
264 | } |
265 | |
266 | // Destructively unescape a string: remove backslashes before punctuation chars. |
267 | extern void cmark_strbuf_unescape(cmark_strbuf *buf) { |
268 | bufsize_t r, w; |
269 | |
270 | for (r = 0, w = 0; r < buf->size; ++r) { |
271 | if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) |
272 | r++; |
273 | |
274 | buf->ptr[w++] = buf->ptr[r]; |
275 | } |
276 | |
277 | cmark_strbuf_truncate(buf, w); |
278 | } |
279 | |