1/*
2 * QEMU Enhanced Disk Format Consistency Check
3 *
4 * Copyright IBM, Corp. 2010
5 *
6 * Authors:
7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14#include "qemu/osdep.h"
15#include "qed.h"
16
17typedef struct {
18 BDRVQEDState *s;
19 BdrvCheckResult *result;
20 bool fix; /* whether to fix invalid offsets */
21
22 uint64_t nclusters;
23 uint32_t *used_clusters; /* referenced cluster bitmap */
24
25 QEDRequest request;
26} QEDCheck;
27
28static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
29 return !!(bitmap[n / 32] & (1 << (n % 32)));
30}
31
32static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
33 bitmap[n / 32] |= 1 << (n % 32);
34}
35
36/**
37 * Set bitmap bits for clusters
38 *
39 * @check: Check structure
40 * @offset: Starting offset in bytes
41 * @n: Number of clusters
42 */
43static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
44 unsigned int n)
45{
46 uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
47 unsigned int corruptions = 0;
48
49 while (n-- != 0) {
50 /* Clusters should only be referenced once */
51 if (qed_test_bit(check->used_clusters, cluster)) {
52 corruptions++;
53 }
54
55 qed_set_bit(check->used_clusters, cluster);
56 cluster++;
57 }
58
59 check->result->corruptions += corruptions;
60 return corruptions == 0;
61}
62
63/**
64 * Check an L2 table
65 *
66 * @ret: Number of invalid cluster offsets
67 */
68static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
69{
70 BDRVQEDState *s = check->s;
71 unsigned int i, num_invalid = 0;
72 uint64_t last_offset = 0;
73
74 for (i = 0; i < s->table_nelems; i++) {
75 uint64_t offset = table->offsets[i];
76
77 if (qed_offset_is_unalloc_cluster(offset) ||
78 qed_offset_is_zero_cluster(offset)) {
79 continue;
80 }
81 check->result->bfi.allocated_clusters++;
82 if (last_offset && (last_offset + s->header.cluster_size != offset)) {
83 check->result->bfi.fragmented_clusters++;
84 }
85 last_offset = offset;
86
87 /* Detect invalid cluster offset */
88 if (!qed_check_cluster_offset(s, offset)) {
89 if (check->fix) {
90 table->offsets[i] = 0;
91 check->result->corruptions_fixed++;
92 } else {
93 check->result->corruptions++;
94 }
95
96 num_invalid++;
97 continue;
98 }
99
100 qed_set_used_clusters(check, offset, 1);
101 }
102
103 return num_invalid;
104}
105
106/**
107 * Descend tables and check each cluster is referenced once only
108 */
109static int coroutine_fn qed_check_l1_table(QEDCheck *check, QEDTable *table)
110{
111 BDRVQEDState *s = check->s;
112 unsigned int i, num_invalid_l1 = 0;
113 int ret, last_error = 0;
114
115 /* Mark L1 table clusters used */
116 qed_set_used_clusters(check, s->header.l1_table_offset,
117 s->header.table_size);
118
119 for (i = 0; i < s->table_nelems; i++) {
120 unsigned int num_invalid_l2;
121 uint64_t offset = table->offsets[i];
122
123 if (qed_offset_is_unalloc_cluster(offset)) {
124 continue;
125 }
126
127 /* Detect invalid L2 offset */
128 if (!qed_check_table_offset(s, offset)) {
129 /* Clear invalid offset */
130 if (check->fix) {
131 table->offsets[i] = 0;
132 check->result->corruptions_fixed++;
133 } else {
134 check->result->corruptions++;
135 }
136
137 num_invalid_l1++;
138 continue;
139 }
140
141 if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
142 continue; /* skip an invalid table */
143 }
144
145 ret = qed_read_l2_table_sync(s, &check->request, offset);
146 if (ret) {
147 check->result->check_errors++;
148 last_error = ret;
149 continue;
150 }
151
152 num_invalid_l2 = qed_check_l2_table(check,
153 check->request.l2_table->table);
154
155 /* Write out fixed L2 table */
156 if (num_invalid_l2 > 0 && check->fix) {
157 ret = qed_write_l2_table_sync(s, &check->request, 0,
158 s->table_nelems, false);
159 if (ret) {
160 check->result->check_errors++;
161 last_error = ret;
162 continue;
163 }
164 }
165 }
166
167 /* Drop reference to final table */
168 qed_unref_l2_cache_entry(check->request.l2_table);
169 check->request.l2_table = NULL;
170
171 /* Write out fixed L1 table */
172 if (num_invalid_l1 > 0 && check->fix) {
173 ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
174 if (ret) {
175 check->result->check_errors++;
176 last_error = ret;
177 }
178 }
179
180 return last_error;
181}
182
183/**
184 * Check for unreferenced (leaked) clusters
185 */
186static void qed_check_for_leaks(QEDCheck *check)
187{
188 BDRVQEDState *s = check->s;
189 uint64_t i;
190
191 for (i = s->header.header_size; i < check->nclusters; i++) {
192 if (!qed_test_bit(check->used_clusters, i)) {
193 check->result->leaks++;
194 }
195 }
196}
197
198/**
199 * Mark an image clean once it passes check or has been repaired
200 */
201static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
202{
203 /* Skip if there were unfixable corruptions or I/O errors */
204 if (result->corruptions > 0 || result->check_errors > 0) {
205 return;
206 }
207
208 /* Skip if image is already marked clean */
209 if (!(s->header.features & QED_F_NEED_CHECK)) {
210 return;
211 }
212
213 /* Ensure fixes reach storage before clearing check bit */
214 bdrv_flush(s->bs);
215
216 s->header.features &= ~QED_F_NEED_CHECK;
217 qed_write_header_sync(s);
218}
219
220/* Called with table_lock held. */
221int coroutine_fn qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
222{
223 QEDCheck check = {
224 .s = s,
225 .result = result,
226 .nclusters = qed_bytes_to_clusters(s, s->file_size),
227 .request = { .l2_table = NULL },
228 .fix = fix,
229 };
230 int ret;
231
232 check.used_clusters = g_try_new0(uint32_t, (check.nclusters + 31) / 32);
233 if (check.nclusters && check.used_clusters == NULL) {
234 return -ENOMEM;
235 }
236
237 check.result->bfi.total_clusters =
238 DIV_ROUND_UP(s->header.image_size, s->header.cluster_size);
239 ret = qed_check_l1_table(&check, s->l1_table);
240 if (ret == 0) {
241 /* Only check for leaks if entire image was scanned successfully */
242 qed_check_for_leaks(&check);
243
244 if (fix) {
245 qed_check_mark_clean(s, result);
246 }
247 }
248
249 g_free(check.used_clusters);
250 return ret;
251}
252