1 | /* |
2 | * Block driver for Hyper-V VHDX Images |
3 | * |
4 | * Copyright (c) 2013 Red Hat, Inc., |
5 | * |
6 | * Authors: |
7 | * Jeff Cody <jcody@redhat.com> |
8 | * |
9 | * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 |
10 | * by Microsoft: |
11 | * https://www.microsoft.com/en-us/download/details.aspx?id=34750 |
12 | * |
13 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. |
14 | * See the COPYING.LIB file in the top-level directory. |
15 | * |
16 | */ |
17 | |
18 | #ifndef BLOCK_VHDX_H |
19 | #define BLOCK_VHDX_H |
20 | #include "qemu/units.h" |
21 | |
22 | #define DEFAULT_LOG_SIZE 1048576 /* 1MiB */ |
23 | /* Note: can't use 1 * MiB, because it's passed to stringify() */ |
24 | |
25 | /* Structures and fields present in the VHDX file */ |
26 | |
27 | /* The header section has the following blocks, |
28 | * each block is 64KB: |
29 | * |
30 | * _____________________________________________________________________________ |
31 | * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) | |
32 | * |----------|---------------|------------|--------------|--------------------| |
33 | * | | | | | | |
34 | * 0.........64KB...........128KB........192KB..........256KB................1MB |
35 | */ |
36 | |
37 | #define (64 * KiB) |
38 | |
39 | #define VHDX_FILE_ID_OFFSET 0 |
40 | #define (VHDX_HEADER_BLOCK_SIZE * 1) |
41 | #define (VHDX_HEADER_BLOCK_SIZE * 2) |
42 | #define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) |
43 | #define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4) |
44 | |
45 | #define (1 * MiB) |
46 | /* |
47 | * A note on the use of MS-GUID fields. For more details on the GUID, |
48 | * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. |
49 | * |
50 | * The VHDX specification only states that these are MS GUIDs, and which |
51 | * bytes are data1-data4. It makes no mention of what algorithm should be used |
52 | * to generate the GUID, nor what standard. However, looking at the specified |
53 | * known GUID fields, it appears the GUIDs are: |
54 | * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4) |
55 | * Random algorithm (noted by 0x4XXX for .data3) |
56 | */ |
57 | |
58 | /* ---- HEADER SECTION STRUCTURES ---- */ |
59 | |
60 | /* These structures are ones that are defined in the VHDX specification |
61 | * document */ |
62 | |
63 | #define VHDX_FILE_SIGNATURE 0x656C696678646876ULL /* "vhdxfile" in ASCII */ |
64 | typedef struct VHDXFileIdentifier { |
65 | uint64_t signature; /* "vhdxfile" in ASCII */ |
66 | uint16_t creator[256]; /* optional; utf-16 string to identify |
67 | the vhdx file creator. Diagnostic |
68 | only */ |
69 | } VHDXFileIdentifier; |
70 | |
71 | |
72 | /* the guid is a 16 byte unique ID - the definition for this used by |
73 | * Microsoft is not just 16 bytes though - it is a structure that is defined, |
74 | * so we need to follow it here so that endianness does not trip us up */ |
75 | |
76 | typedef struct QEMU_PACKED MSGUID { |
77 | uint32_t data1; |
78 | uint16_t data2; |
79 | uint16_t data3; |
80 | uint8_t data4[8]; |
81 | } MSGUID; |
82 | |
83 | #define guid_eq(a, b) \ |
84 | (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) |
85 | |
86 | #define (4 * KiB) /* although the vhdx_header struct in disk |
87 | is only 582 bytes, for purposes of crc |
88 | the header is the first 4KB of the 64KB |
89 | block */ |
90 | |
91 | /* The full header is 4KB, although the actual header data is much smaller. |
92 | * But for the checksum calculation, it is over the entire 4KB structure, |
93 | * not just the defined portion of it */ |
94 | #define 0x64616568 |
95 | typedef struct QEMU_PACKED { |
96 | uint32_t ; /* "head" in ASCII */ |
97 | uint32_t ; /* CRC-32C hash of the whole header */ |
98 | uint64_t ; /* Seq number of this header. Each |
99 | VHDX file has 2 of these headers, |
100 | and only the header with the highest |
101 | sequence number is valid */ |
102 | MSGUID ; /* 128 bit unique identifier. Must be |
103 | updated to new, unique value before |
104 | the first modification is made to |
105 | file */ |
106 | MSGUID ; /* 128 bit unique identifier. Must be |
107 | updated to new, unique value before |
108 | the first modification is made to |
109 | visible data. Visbile data is |
110 | defined as: |
111 | - system & user metadata |
112 | - raw block data |
113 | - disk size |
114 | - any change that will |
115 | cause the virtual disk |
116 | sector read to differ |
117 | |
118 | This does not need to change if |
119 | blocks are re-arranged */ |
120 | MSGUID ; /* 128 bit unique identifier. If zero, |
121 | there is no valid log. If non-zero, |
122 | log entries with this guid are |
123 | valid. */ |
124 | uint16_t ; /* version of the log format. Must be |
125 | set to zero */ |
126 | uint16_t ; /* version of the vhdx file. Currently, |
127 | only supported version is "1" */ |
128 | uint32_t ; /* length of the log. Must be multiple |
129 | of 1MB */ |
130 | uint64_t ; /* byte offset in the file of the log. |
131 | Must also be a multiple of 1MB */ |
132 | } ; |
133 | |
134 | /* Header for the region table block */ |
135 | #define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */ |
136 | typedef struct QEMU_PACKED { |
137 | uint32_t ; /* "regi" in ASCII */ |
138 | uint32_t ; /* CRC-32C hash of the 64KB table */ |
139 | uint32_t ; /* number of valid entries */ |
140 | uint32_t ; |
141 | } ; |
142 | |
143 | /* Individual region table entry. There may be a maximum of 2047 of these |
144 | * |
145 | * There are two known region table properties. Both are required. |
146 | * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08 |
147 | * Metadata: 8B7CA20647904B9AB8FE575F050F886E |
148 | */ |
149 | #define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand |
150 | this entry in order to open |
151 | file */ |
152 | typedef struct QEMU_PACKED VHDXRegionTableEntry { |
153 | MSGUID guid; /* 128-bit unique identifier */ |
154 | uint64_t file_offset; /* offset of the object in the file. |
155 | Must be multiple of 1MB */ |
156 | uint32_t length; /* length, in bytes, of the object */ |
157 | uint32_t data_bits; |
158 | } VHDXRegionTableEntry; |
159 | |
160 | |
161 | /* ---- LOG ENTRY STRUCTURES ---- */ |
162 | #define VHDX_LOG_MIN_SIZE (1 * MiB) |
163 | #define VHDX_LOG_SECTOR_SIZE (4 * KiB) |
164 | #define VHDX_LOG_HDR_SIZE 64 |
165 | #define VHDX_LOG_SIGNATURE 0x65676f6c |
166 | typedef struct QEMU_PACKED { |
167 | uint32_t ; /* "loge" in ASCII */ |
168 | uint32_t ; /* CRC-32C hash of the 64KB table */ |
169 | uint32_t ; /* length in bytes, multiple of 1MB */ |
170 | uint32_t ; /* byte offset of first log entry of a |
171 | seq, where this entry is the last |
172 | entry */ |
173 | uint64_t ; /* incremented with each log entry. |
174 | May not be zero. */ |
175 | uint32_t ; /* number of descriptors in this log |
176 | entry, must be >= 0 */ |
177 | uint32_t ; |
178 | MSGUID ; /* value of the log_guid from |
179 | vhdx_header. If not found in |
180 | vhdx_header, it is invalid */ |
181 | uint64_t ; /* see spec for full details - this |
182 | should be vhdx file size in bytes */ |
183 | uint64_t ; /* size in bytes that all allocated |
184 | file structures fit into */ |
185 | } ; |
186 | |
187 | #define VHDX_LOG_DESC_SIZE 32 |
188 | #define VHDX_LOG_DESC_SIGNATURE 0x63736564 |
189 | #define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a |
190 | typedef struct QEMU_PACKED VHDXLogDescriptor { |
191 | uint32_t signature; /* "zero" or "desc" in ASCII */ |
192 | union { |
193 | uint32_t reserved; /* zero desc */ |
194 | uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the |
195 | data sector */ |
196 | }; |
197 | union { |
198 | uint64_t zero_length; /* zero desc: length of the section to |
199 | zero */ |
200 | uint64_t leading_bytes; /* data desc: bytes 0-7 of the data |
201 | sector */ |
202 | }; |
203 | uint64_t file_offset; /* file offset to write zeros - multiple |
204 | of 4kB */ |
205 | uint64_t sequence_number; /* must match same field in |
206 | vhdx_log_entry_header */ |
207 | } VHDXLogDescriptor; |
208 | |
209 | #define VHDX_LOG_DATA_SIGNATURE 0x61746164 |
210 | typedef struct QEMU_PACKED VHDXLogDataSector { |
211 | uint32_t data_signature; /* "data" in ASCII */ |
212 | uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ |
213 | uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive). |
214 | see the data descriptor field for the |
215 | other mising bytes */ |
216 | uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */ |
217 | } VHDXLogDataSector; |
218 | |
219 | |
220 | |
221 | /* block states - different state values depending on whether it is a |
222 | * payload block, or a sector block. */ |
223 | |
224 | #define PAYLOAD_BLOCK_NOT_PRESENT 0 |
225 | #define PAYLOAD_BLOCK_UNDEFINED 1 |
226 | #define PAYLOAD_BLOCK_ZERO 2 |
227 | #define PAYLOAD_BLOCK_UNMAPPED 3 |
228 | #define PAYLOAD_BLOCK_UNMAPPED_v095 5 |
229 | #define PAYLOAD_BLOCK_FULLY_PRESENT 6 |
230 | #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 |
231 | |
232 | #define SB_BLOCK_NOT_PRESENT 0 |
233 | #define SB_BLOCK_PRESENT 6 |
234 | |
235 | /* per the spec */ |
236 | #define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23) |
237 | |
238 | /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state |
239 | other bits are reserved */ |
240 | #define VHDX_BAT_STATE_BIT_MASK 0x07 |
241 | #define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000ULL /* upper 44 bits */ |
242 | typedef uint64_t VHDXBatEntry; |
243 | |
244 | /* ---- METADATA REGION STRUCTURES ---- */ |
245 | |
246 | #define VHDX_METADATA_ENTRY_SIZE 32 |
247 | #define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ |
248 | #define VHDX_METADATA_TABLE_MAX_SIZE \ |
249 | (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) |
250 | #define VHDX_METADATA_SIGNATURE 0x617461646174656DULL /* "metadata" in ASCII */ |
251 | typedef struct QEMU_PACKED { |
252 | uint64_t ; /* "metadata" in ASCII */ |
253 | uint16_t ; |
254 | uint16_t ; /* number table entries. <= 2047 */ |
255 | uint32_t [5]; |
256 | } ; |
257 | |
258 | #define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */ |
259 | #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set, |
260 | otherwise file metdata */ |
261 | #define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this |
262 | entry to open the file */ |
263 | typedef struct QEMU_PACKED VHDXMetadataTableEntry { |
264 | MSGUID item_id; /* 128-bit identifier for metadata */ |
265 | uint32_t offset; /* byte offset of the metadata. At |
266 | least 64kB. Relative to start of |
267 | metadata region */ |
268 | /* note: if length = 0, so is offset */ |
269 | uint32_t length; /* length of metadata. <= 1MB. */ |
270 | uint32_t data_bits; /* least-significant 3 bits are flags, |
271 | the rest are reserved (see above) */ |
272 | uint32_t reserved2; |
273 | } VHDXMetadataTableEntry; |
274 | |
275 | #define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to |
276 | be BLOCK_NOT_PRESENT. |
277 | If set indicates a fixed |
278 | size VHDX file */ |
279 | #define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ |
280 | #define VHDX_BLOCK_SIZE_MIN (1 * MiB) |
281 | #define VHDX_BLOCK_SIZE_MAX (256 * MiB) |
282 | typedef struct QEMU_PACKED VHDXFileParameters { |
283 | uint32_t block_size; /* size of each payload block, always |
284 | power of 2, <= 256MB and >= 1MB. */ |
285 | uint32_t data_bits; /* least-significant 2 bits are flags, |
286 | the rest are reserved (see above) */ |
287 | } VHDXFileParameters; |
288 | |
289 | #define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB) |
290 | typedef struct QEMU_PACKED VHDXVirtualDiskSize { |
291 | uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. |
292 | Must be multiple of the sector size, |
293 | max of 64TB */ |
294 | } VHDXVirtualDiskSize; |
295 | |
296 | typedef struct QEMU_PACKED VHDXPage83Data { |
297 | MSGUID page_83_data; /* unique id for scsi devices that |
298 | support page 0x83 */ |
299 | } VHDXPage83Data; |
300 | |
301 | typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize { |
302 | uint32_t logical_sector_size; /* virtual disk sector size (in bytes). |
303 | Can only be 512 or 4096 bytes */ |
304 | } VHDXVirtualDiskLogicalSectorSize; |
305 | |
306 | typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { |
307 | uint32_t physical_sector_size; /* physical sector size (in bytes). |
308 | Can only be 512 or 4096 bytes */ |
309 | } VHDXVirtualDiskPhysicalSectorSize; |
310 | |
311 | typedef struct QEMU_PACKED { |
312 | MSGUID ; /* type of the parent virtual disk. */ |
313 | uint16_t ; |
314 | uint16_t ; /* number of key/value pairs for this |
315 | locator */ |
316 | } ; |
317 | |
318 | /* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */ |
319 | typedef struct QEMU_PACKED VHDXParentLocatorEntry { |
320 | uint32_t key_offset; /* offset in metadata for key, > 0 */ |
321 | uint32_t value_offset; /* offset in metadata for value, >0 */ |
322 | uint16_t key_length; /* length of entry key, > 0 */ |
323 | uint16_t value_length; /* length of entry value, > 0 */ |
324 | } VHDXParentLocatorEntry; |
325 | |
326 | |
327 | /* ----- END VHDX SPECIFICATION STRUCTURES ---- */ |
328 | |
329 | typedef struct VHDXMetadataEntries { |
330 | VHDXMetadataTableEntry file_parameters_entry; |
331 | VHDXMetadataTableEntry virtual_disk_size_entry; |
332 | VHDXMetadataTableEntry page83_data_entry; |
333 | VHDXMetadataTableEntry logical_sector_size_entry; |
334 | VHDXMetadataTableEntry phys_sector_size_entry; |
335 | VHDXMetadataTableEntry parent_locator_entry; |
336 | uint16_t present; |
337 | } VHDXMetadataEntries; |
338 | |
339 | typedef struct VHDXLogEntries { |
340 | uint64_t offset; |
341 | uint64_t length; |
342 | uint32_t write; |
343 | uint32_t read; |
344 | VHDXLogEntryHeader *hdr; |
345 | void *desc_buffer; |
346 | uint64_t sequence; |
347 | uint32_t tail; |
348 | } VHDXLogEntries; |
349 | |
350 | typedef struct VHDXRegionEntry { |
351 | uint64_t start; |
352 | uint64_t end; |
353 | QLIST_ENTRY(VHDXRegionEntry) entries; |
354 | } VHDXRegionEntry; |
355 | |
356 | typedef struct BDRVVHDXState { |
357 | CoMutex lock; |
358 | |
359 | int ; |
360 | VHDXHeader *[2]; |
361 | |
362 | VHDXRegionTableHeader rt; |
363 | VHDXRegionTableEntry bat_rt; /* region table for the BAT */ |
364 | VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ |
365 | |
366 | VHDXMetadataTableHeader metadata_hdr; |
367 | VHDXMetadataEntries metadata_entries; |
368 | |
369 | VHDXFileParameters params; |
370 | uint32_t block_size; |
371 | uint32_t block_size_bits; |
372 | uint32_t sectors_per_block; |
373 | uint32_t sectors_per_block_bits; |
374 | |
375 | uint64_t virtual_disk_size; |
376 | uint32_t logical_sector_size; |
377 | uint32_t physical_sector_size; |
378 | |
379 | uint64_t chunk_ratio; |
380 | uint32_t chunk_ratio_bits; |
381 | uint32_t logical_sector_size_bits; |
382 | |
383 | uint32_t bat_entries; |
384 | VHDXBatEntry *bat; |
385 | uint64_t bat_offset; |
386 | |
387 | bool first_visible_write; |
388 | MSGUID session_guid; |
389 | |
390 | VHDXLogEntries log; |
391 | |
392 | VHDXParentLocatorHeader ; |
393 | VHDXParentLocatorEntry *parent_entries; |
394 | |
395 | Error *migration_blocker; |
396 | |
397 | bool log_replayed_on_open; |
398 | |
399 | QLIST_HEAD(, VHDXRegionEntry) regions; |
400 | } BDRVVHDXState; |
401 | |
402 | void vhdx_guid_generate(MSGUID *guid); |
403 | |
404 | int (BlockDriverState *bs, BDRVVHDXState *s, bool rw, |
405 | MSGUID *log_guid); |
406 | |
407 | uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); |
408 | uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, |
409 | int crc_offset); |
410 | |
411 | bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); |
412 | |
413 | int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, |
414 | Error **errp); |
415 | |
416 | int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s, |
417 | void *data, uint32_t length, uint64_t offset); |
418 | |
419 | static inline void leguid_to_cpus(MSGUID *guid) |
420 | { |
421 | guid->data1 = le32_to_cpu(guid->data1); |
422 | guid->data2 = le16_to_cpu(guid->data2); |
423 | guid->data3 = le16_to_cpu(guid->data3); |
424 | } |
425 | |
426 | static inline void cpu_to_leguids(MSGUID *guid) |
427 | { |
428 | guid->data1 = cpu_to_le32(guid->data1); |
429 | guid->data2 = cpu_to_le16(guid->data2); |
430 | guid->data3 = cpu_to_le16(guid->data3); |
431 | } |
432 | |
433 | void (VHDXHeader *h); |
434 | void (VHDXHeader *orig_h, VHDXHeader *new_h); |
435 | void vhdx_log_desc_le_import(VHDXLogDescriptor *d); |
436 | void vhdx_log_desc_le_export(VHDXLogDescriptor *d); |
437 | void vhdx_log_data_le_import(VHDXLogDataSector *d); |
438 | void vhdx_log_data_le_export(VHDXLogDataSector *d); |
439 | void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); |
440 | void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); |
441 | void (VHDXRegionTableHeader *hdr); |
442 | void (VHDXRegionTableHeader *hdr); |
443 | void vhdx_region_entry_le_import(VHDXRegionTableEntry *e); |
444 | void vhdx_region_entry_le_export(VHDXRegionTableEntry *e); |
445 | void (VHDXMetadataTableHeader *hdr); |
446 | void (VHDXMetadataTableHeader *hdr); |
447 | void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e); |
448 | void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e); |
449 | int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); |
450 | |
451 | #endif |
452 | |