qed.h source code [qemu/block/qed.h]

1	/*
2	* QEMU Enhanced Disk Format
3	*
4	* Copyright IBM, Corp. 2010
5	*
6	* Authors:
7	* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
8	* Anthony Liguori <aliguori@us.ibm.com>
9	*
10	* This work is licensed under the terms of the GNU LGPL, version 2 or later.
11	* See the COPYING.LIB file in the top-level directory.
12	*
13	*/
14
15	#ifndef BLOCK_QED_H
16	#define BLOCK_QED_H
17
18	#include "block/block_int.h"
19	#include "qemu/cutils.h"
20
21	/ The layout of a QED file is as follows:*
22	*
23	* +--------+----------+----------+----------+-----+
24	* \| header \| L1 table \| cluster0 \| cluster1 \| ... \|
25	* +--------+----------+----------+----------+-----+
26	*
27	* There is a 2-level pagetable for cluster allocation:
28	*
29	* +----------+
30	* \| L1 table \|
31	* +----------+
32	* ,------' \| '------.
33	* +----------+ \| +----------+
34	* \| L2 table \| ... \| L2 table \|
35	* +----------+ +----------+
36	* ,------' \| '------.
37	* +----------+ \| +----------+
38	* \| Data \| ... \| Data \|
39	* +----------+ +----------+
40	*
41	* The L1 table is fixed size and always present. L2 tables are allocated on
42	* demand. The L1 table size determines the maximum possible image size; it
43	* can be influenced using the cluster_size and table_size values.
44	*
45	* All fields are little-endian on disk.
46	*/
47	#define QED_DEFAULT_CLUSTER_SIZE 65536
48	enum {
49	QED_MAGIC = `'Q'` \| `'E'` << `8` \| `'D'` << `16` \| `'\0'` << `24`,
50
51	/ The image supports a backing file /
52	QED_F_BACKING_FILE = `0x01`,
53
54	/ The image needs a consistency check before use /
55	QED_F_NEED_CHECK = `0x02`,
56
57	/ The backing file format must not be probed, treat as raw image /
58	QED_F_BACKING_FORMAT_NO_PROBE = `0x04`,
59
60	/ Feature bits must be used when the on-disk format changes /
61	QED_FEATURE_MASK = QED_F_BACKING_FILE \| / supported feature bits /
62	QED_F_NEED_CHECK \|
63	QED_F_BACKING_FORMAT_NO_PROBE,
64	QED_COMPAT_FEATURE_MASK = `0`, / supported compat feature bits /
65	QED_AUTOCLEAR_FEATURE_MASK = `0`, / supported autoclear feature bits /
66
67	/ Data is stored in groups of sectors called clusters. Cluster size must*
68	* be large to avoid keeping too much metadata. I/O requests that have
69	* sub-cluster size will require read-modify-write.
70	*/
71	QED_MIN_CLUSTER_SIZE = `4` * `1024`, / in bytes /
72	QED_MAX_CLUSTER_SIZE = `64` * `1024` * `1024`,
73
74	/ Allocated clusters are tracked using a 2-level pagetable. Table size is*
75	* a multiple of clusters so large maximum image sizes can be supported
76	* without jacking up the cluster size too much.
77	*/
78	QED_MIN_TABLE_SIZE = `1`, / in clusters /
79	QED_MAX_TABLE_SIZE = `16`,
80	QED_DEFAULT_TABLE_SIZE = `4`,
81
82	/ Delay to flush and clean image after last allocating write completes /
83	QED_NEED_CHECK_TIMEOUT = `5`, / in seconds /
84	};
85
86	typedef struct {
87	uint32_t magic; / QED\0 /
88
89	uint32_t cluster_size; / in bytes /
90	uint32_t table_size; / for L1 and L2 tables, in clusters /
91	uint32_t header_size; / in clusters /
92
93	uint64_t features; / format feature bits /
94	uint64_t compat_features; / compatible feature bits /
95	uint64_t autoclear_features; / self-resetting feature bits /
96
97	uint64_t l1_table_offset; / in bytes /
98	uint64_t image_size; / total logical image size, in bytes /
99
100	/ if (features & QED_F_BACKING_FILE) /
101	uint32_t backing_filename_offset; / in bytes from start of header /
102	uint32_t backing_filename_size; / in bytes /
103	} QEMU_PACKED QEDHeader;
104
105	typedef struct {
106	uint64_t offsets[`0`]; / in bytes /
107	} QEDTable;
108
109	/ The L2 cache is a simple write-through cache for L2 structures /
110	typedef struct CachedL2Table {
111	QEDTable *table;
112	uint64_t offset; / offset=0 indicates an invalidate entry /
113	QTAILQ_ENTRY(CachedL2Table) node;
114	int ref;
115	} CachedL2Table;
116
117	typedef struct {
118	QTAILQ_HEAD(, CachedL2Table) entries;
119	unsigned int n_entries;
120	} L2TableCache;
121
122	typedef struct QEDRequest {
123	CachedL2Table *l2_table;
124	} QEDRequest;
125
126	enum {
127	QED_AIOCB_WRITE = `0x0001`, / read or write? /
128	QED_AIOCB_ZERO = `0x0002`, / zero write, used with QED_AIOCB_WRITE /
129	};
130
131	typedef struct QEDAIOCB {
132	BlockDriverState *bs;
133	QSIMPLEQ_ENTRY(QEDAIOCB) next; / next request /
134	int flags; / QED_AIOCB_* bits ORed together /
135	uint64_t end_pos; / request end on block device, in bytes /
136
137	/ User scatter-gather list /
138	QEMUIOVector *qiov;
139	size_t qiov_offset; / byte count already processed /
140
141	/ Current cluster scatter-gather list /
142	QEMUIOVector cur_qiov;
143	QEMUIOVector *backing_qiov;
144	uint64_t cur_pos; / position on block device, in bytes /
145	uint64_t cur_cluster; / cluster offset in image file /
146	unsigned int cur_nclusters; / number of clusters being accessed /
147	int find_cluster_ret; / used for L1/L2 update /
148
149	QEDRequest request;
150	} QEDAIOCB;
151
152	typedef struct {
153	BlockDriverState bs; /* device /
154
155	/ Written only by an allocating write or the timer handler (the latter*
156	* while allocating reqs are plugged).
157	*/
158	QEDHeader header; / always cpu-endian /
159
160	/ Protected by table_lock. /
161	CoMutex table_lock;
162	QEDTable *l1_table;
163	L2TableCache l2_cache; / l2 table cache /
164	uint32_t table_nelems;
165	uint32_t l1_shift;
166	uint32_t l2_shift;
167	uint32_t l2_mask;
168	uint64_t file_size; / length of image file, in bytes /
169
170	/ Allocating write request queue /
171	QEDAIOCB *allocating_acb;
172	CoQueue allocating_write_reqs;
173	bool allocating_write_reqs_plugged;
174
175	/ Periodic flush and clear need check flag /
176	QEMUTimer *need_check_timer;
177	} BDRVQEDState;
178
179	enum {
180	QED_CLUSTER_FOUND, / cluster found /
181	QED_CLUSTER_ZERO, / zero cluster found /
182	QED_CLUSTER_L2, / cluster missing in L2 /
183	QED_CLUSTER_L1, / cluster missing in L1 /
184	};
185
186	/**
187	* Header functions
188	*/
189	int qed_write_header_sync(BDRVQEDState *s);
190
191	/**
192	* L2 cache functions
193	*/
194	void qed_init_l2_cache(L2TableCache *l2_cache);
195	void qed_free_l2_cache(L2TableCache *l2_cache);
196	CachedL2Table qed_alloc_l2_cache_entry(L2TableCache l2_cache);
197	void qed_unref_l2_cache_entry(CachedL2Table *entry);
198	CachedL2Table qed_find_l2_cache_entry(L2TableCache l2_cache, uint64_t offset);
199	void qed_commit_l2_cache_entry(L2TableCache l2_cache, CachedL2Table l2_table);
200
201	/**
202	* Table I/O functions
203	*/
204	int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s);
205	int coroutine_fn qed_write_l1_table(BDRVQEDState s, unsigned* int index,
206	unsigned int n);
207	int coroutine_fn qed_write_l1_table_sync(BDRVQEDState s, unsigned* int index,
208	unsigned int n);
209	int coroutine_fn qed_read_l2_table_sync(BDRVQEDState s, QEDRequest request,
210	uint64_t offset);
211	int coroutine_fn qed_read_l2_table(BDRVQEDState s, QEDRequest request,
212	uint64_t offset);
213	int coroutine_fn qed_write_l2_table(BDRVQEDState s, QEDRequest request,
214	unsigned int index, unsigned int n,
215	bool flush);
216	int coroutine_fn qed_write_l2_table_sync(BDRVQEDState s, QEDRequest request,
217	unsigned int index, unsigned int n,
218	bool flush);
219
220	/**
221	* Cluster functions
222	*/
223	int coroutine_fn qed_find_cluster(BDRVQEDState s, QEDRequest request,
224	uint64_t pos, size_t *len,
225	uint64_t *img_offset);
226
227	/**
228	* Consistency check
229	*/
230	int coroutine_fn qed_check(BDRVQEDState s, BdrvCheckResult result, bool fix);
231
232	QEDTable qed_alloc_table(BDRVQEDState s);
233
234	/**
235	* Round down to the start of a cluster
236	*/
237	static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
238	{
239	return offset & ~(uint64_t)(s->header.cluster_size - `1`);
240	}
241
242	static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
243	{
244	return offset & (s->header.cluster_size - `1`);
245	}
246
247	static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
248	{
249	return qed_start_of_cluster(s, bytes + (s->header.cluster_size - `1`)) /
250	(s->header.cluster_size - `1`);
251	}
252
253	static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
254	{
255	return pos >> s->l1_shift;
256	}
257
258	static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
259	{
260	return (pos >> s->l2_shift) & s->l2_mask;
261	}
262
263	/**
264	* Test if a cluster offset is valid
265	*/
266	static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
267	{
268	uint64_t header_size = (uint64_t)s->header.header_size *
269	s->header.cluster_size;
270
271	if (offset & (s->header.cluster_size - `1`)) {
272	return false;
273	}
274	return offset >= header_size && offset < s->file_size;
275	}
276
277	/**
278	* Test if a table offset is valid
279	*/
280	static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
281	{
282	uint64_t end_offset = offset + (s->header.table_size - `1`) *
283	s->header.cluster_size;
284
285	/ Overflow check /
286	if (end_offset <= offset) {
287	return false;
288	}
289
290	return qed_check_cluster_offset(s, offset) &&
291	qed_check_cluster_offset(s, end_offset);
292	}
293
294	static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
295	uint64_t offset)
296	{
297	if (qed_offset_into_cluster(s, offset)) {
298	return false;
299	}
300	return true;
301	}
302
303	static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
304	{
305	if (offset == `0`) {
306	return true;
307	}
308	return false;
309	}
310
311	static inline bool qed_offset_is_zero_cluster(uint64_t offset)
312	{
313	if (offset == `1`) {
314	return true;
315	}
316	return false;
317	}
318
319	#endif /* BLOCK_QED_H */
320

Browse the source code of qemu/block/qed.h