1/*
2 * Block driver for Hyper-V VHDX Images
3 *
4 * Copyright (c) 2013 Red Hat, Inc.,
5 *
6 * Authors:
7 * Jeff Cody <jcody@redhat.com>
8 *
9 * This is based on the "VHDX Format Specification v1.00", published 8/25/2012
10 * by Microsoft:
11 * https://www.microsoft.com/en-us/download/details.aspx?id=34750
12 *
13 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14 * See the COPYING.LIB file in the top-level directory.
15 *
16 */
17
18#ifndef BLOCK_VHDX_H
19#define BLOCK_VHDX_H
20#include "qemu/units.h"
21
22#define DEFAULT_LOG_SIZE 1048576 /* 1MiB */
23/* Note: can't use 1 * MiB, because it's passed to stringify() */
24
25/* Structures and fields present in the VHDX file */
26
27/* The header section has the following blocks,
28 * each block is 64KB:
29 *
30 * _____________________________________________________________________________
31 * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) |
32 * |----------|---------------|------------|--------------|--------------------|
33 * | | | | | |
34 * 0.........64KB...........128KB........192KB..........256KB................1MB
35 */
36
37#define VHDX_HEADER_BLOCK_SIZE (64 * KiB)
38
39#define VHDX_FILE_ID_OFFSET 0
40#define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1)
41#define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2)
42#define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3)
43#define VHDX_REGION_TABLE2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 4)
44
45#define VHDX_HEADER_SECTION_END (1 * MiB)
46/*
47 * A note on the use of MS-GUID fields. For more details on the GUID,
48 * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
49 *
50 * The VHDX specification only states that these are MS GUIDs, and which
51 * bytes are data1-data4. It makes no mention of what algorithm should be used
52 * to generate the GUID, nor what standard. However, looking at the specified
53 * known GUID fields, it appears the GUIDs are:
54 * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4)
55 * Random algorithm (noted by 0x4XXX for .data3)
56 */
57
58/* ---- HEADER SECTION STRUCTURES ---- */
59
60/* These structures are ones that are defined in the VHDX specification
61 * document */
62
63#define VHDX_FILE_SIGNATURE 0x656C696678646876ULL /* "vhdxfile" in ASCII */
64typedef struct VHDXFileIdentifier {
65 uint64_t signature; /* "vhdxfile" in ASCII */
66 uint16_t creator[256]; /* optional; utf-16 string to identify
67 the vhdx file creator. Diagnostic
68 only */
69} VHDXFileIdentifier;
70
71
72/* the guid is a 16 byte unique ID - the definition for this used by
73 * Microsoft is not just 16 bytes though - it is a structure that is defined,
74 * so we need to follow it here so that endianness does not trip us up */
75
76typedef struct QEMU_PACKED MSGUID {
77 uint32_t data1;
78 uint16_t data2;
79 uint16_t data3;
80 uint8_t data4[8];
81} MSGUID;
82
83#define guid_eq(a, b) \
84 (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
85
86#define VHDX_HEADER_SIZE (4 * KiB) /* although the vhdx_header struct in disk
87 is only 582 bytes, for purposes of crc
88 the header is the first 4KB of the 64KB
89 block */
90
91/* The full header is 4KB, although the actual header data is much smaller.
92 * But for the checksum calculation, it is over the entire 4KB structure,
93 * not just the defined portion of it */
94#define VHDX_HEADER_SIGNATURE 0x64616568
95typedef struct QEMU_PACKED VHDXHeader {
96 uint32_t signature; /* "head" in ASCII */
97 uint32_t checksum; /* CRC-32C hash of the whole header */
98 uint64_t sequence_number; /* Seq number of this header. Each
99 VHDX file has 2 of these headers,
100 and only the header with the highest
101 sequence number is valid */
102 MSGUID file_write_guid; /* 128 bit unique identifier. Must be
103 updated to new, unique value before
104 the first modification is made to
105 file */
106 MSGUID data_write_guid; /* 128 bit unique identifier. Must be
107 updated to new, unique value before
108 the first modification is made to
109 visible data. Visbile data is
110 defined as:
111 - system & user metadata
112 - raw block data
113 - disk size
114 - any change that will
115 cause the virtual disk
116 sector read to differ
117
118 This does not need to change if
119 blocks are re-arranged */
120 MSGUID log_guid; /* 128 bit unique identifier. If zero,
121 there is no valid log. If non-zero,
122 log entries with this guid are
123 valid. */
124 uint16_t log_version; /* version of the log format. Must be
125 set to zero */
126 uint16_t version; /* version of the vhdx file. Currently,
127 only supported version is "1" */
128 uint32_t log_length; /* length of the log. Must be multiple
129 of 1MB */
130 uint64_t log_offset; /* byte offset in the file of the log.
131 Must also be a multiple of 1MB */
132} VHDXHeader;
133
134/* Header for the region table block */
135#define VHDX_REGION_SIGNATURE 0x69676572 /* "regi" in ASCII */
136typedef struct QEMU_PACKED VHDXRegionTableHeader {
137 uint32_t signature; /* "regi" in ASCII */
138 uint32_t checksum; /* CRC-32C hash of the 64KB table */
139 uint32_t entry_count; /* number of valid entries */
140 uint32_t reserved;
141} VHDXRegionTableHeader;
142
143/* Individual region table entry. There may be a maximum of 2047 of these
144 *
145 * There are two known region table properties. Both are required.
146 * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08
147 * Metadata: 8B7CA20647904B9AB8FE575F050F886E
148 */
149#define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand
150 this entry in order to open
151 file */
152typedef struct QEMU_PACKED VHDXRegionTableEntry {
153 MSGUID guid; /* 128-bit unique identifier */
154 uint64_t file_offset; /* offset of the object in the file.
155 Must be multiple of 1MB */
156 uint32_t length; /* length, in bytes, of the object */
157 uint32_t data_bits;
158} VHDXRegionTableEntry;
159
160
161/* ---- LOG ENTRY STRUCTURES ---- */
162#define VHDX_LOG_MIN_SIZE (1 * MiB)
163#define VHDX_LOG_SECTOR_SIZE (4 * KiB)
164#define VHDX_LOG_HDR_SIZE 64
165#define VHDX_LOG_SIGNATURE 0x65676f6c
166typedef struct QEMU_PACKED VHDXLogEntryHeader {
167 uint32_t signature; /* "loge" in ASCII */
168 uint32_t checksum; /* CRC-32C hash of the 64KB table */
169 uint32_t entry_length; /* length in bytes, multiple of 1MB */
170 uint32_t tail; /* byte offset of first log entry of a
171 seq, where this entry is the last
172 entry */
173 uint64_t sequence_number; /* incremented with each log entry.
174 May not be zero. */
175 uint32_t descriptor_count; /* number of descriptors in this log
176 entry, must be >= 0 */
177 uint32_t reserved;
178 MSGUID log_guid; /* value of the log_guid from
179 vhdx_header. If not found in
180 vhdx_header, it is invalid */
181 uint64_t flushed_file_offset; /* see spec for full details - this
182 should be vhdx file size in bytes */
183 uint64_t last_file_offset; /* size in bytes that all allocated
184 file structures fit into */
185} VHDXLogEntryHeader;
186
187#define VHDX_LOG_DESC_SIZE 32
188#define VHDX_LOG_DESC_SIGNATURE 0x63736564
189#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
190typedef struct QEMU_PACKED VHDXLogDescriptor {
191 uint32_t signature; /* "zero" or "desc" in ASCII */
192 union {
193 uint32_t reserved; /* zero desc */
194 uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the
195 data sector */
196 };
197 union {
198 uint64_t zero_length; /* zero desc: length of the section to
199 zero */
200 uint64_t leading_bytes; /* data desc: bytes 0-7 of the data
201 sector */
202 };
203 uint64_t file_offset; /* file offset to write zeros - multiple
204 of 4kB */
205 uint64_t sequence_number; /* must match same field in
206 vhdx_log_entry_header */
207} VHDXLogDescriptor;
208
209#define VHDX_LOG_DATA_SIGNATURE 0x61746164
210typedef struct QEMU_PACKED VHDXLogDataSector {
211 uint32_t data_signature; /* "data" in ASCII */
212 uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */
213 uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive).
214 see the data descriptor field for the
215 other mising bytes */
216 uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */
217} VHDXLogDataSector;
218
219
220
221/* block states - different state values depending on whether it is a
222 * payload block, or a sector block. */
223
224#define PAYLOAD_BLOCK_NOT_PRESENT 0
225#define PAYLOAD_BLOCK_UNDEFINED 1
226#define PAYLOAD_BLOCK_ZERO 2
227#define PAYLOAD_BLOCK_UNMAPPED 3
228#define PAYLOAD_BLOCK_UNMAPPED_v095 5
229#define PAYLOAD_BLOCK_FULLY_PRESENT 6
230#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
231
232#define SB_BLOCK_NOT_PRESENT 0
233#define SB_BLOCK_PRESENT 6
234
235/* per the spec */
236#define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23)
237
238/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
239 other bits are reserved */
240#define VHDX_BAT_STATE_BIT_MASK 0x07
241#define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000ULL /* upper 44 bits */
242typedef uint64_t VHDXBatEntry;
243
244/* ---- METADATA REGION STRUCTURES ---- */
245
246#define VHDX_METADATA_ENTRY_SIZE 32
247#define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */
248#define VHDX_METADATA_TABLE_MAX_SIZE \
249 (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
250#define VHDX_METADATA_SIGNATURE 0x617461646174656DULL /* "metadata" in ASCII */
251typedef struct QEMU_PACKED VHDXMetadataTableHeader {
252 uint64_t signature; /* "metadata" in ASCII */
253 uint16_t reserved;
254 uint16_t entry_count; /* number table entries. <= 2047 */
255 uint32_t reserved2[5];
256} VHDXMetadataTableHeader;
257
258#define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */
259#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set,
260 otherwise file metdata */
261#define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this
262 entry to open the file */
263typedef struct QEMU_PACKED VHDXMetadataTableEntry {
264 MSGUID item_id; /* 128-bit identifier for metadata */
265 uint32_t offset; /* byte offset of the metadata. At
266 least 64kB. Relative to start of
267 metadata region */
268 /* note: if length = 0, so is offset */
269 uint32_t length; /* length of metadata. <= 1MB. */
270 uint32_t data_bits; /* least-significant 3 bits are flags,
271 the rest are reserved (see above) */
272 uint32_t reserved2;
273} VHDXMetadataTableEntry;
274
275#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to
276 be BLOCK_NOT_PRESENT.
277 If set indicates a fixed
278 size VHDX file */
279#define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */
280#define VHDX_BLOCK_SIZE_MIN (1 * MiB)
281#define VHDX_BLOCK_SIZE_MAX (256 * MiB)
282typedef struct QEMU_PACKED VHDXFileParameters {
283 uint32_t block_size; /* size of each payload block, always
284 power of 2, <= 256MB and >= 1MB. */
285 uint32_t data_bits; /* least-significant 2 bits are flags,
286 the rest are reserved (see above) */
287} VHDXFileParameters;
288
289#define VHDX_MAX_IMAGE_SIZE ((uint64_t) 64 * TiB)
290typedef struct QEMU_PACKED VHDXVirtualDiskSize {
291 uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes.
292 Must be multiple of the sector size,
293 max of 64TB */
294} VHDXVirtualDiskSize;
295
296typedef struct QEMU_PACKED VHDXPage83Data {
297 MSGUID page_83_data; /* unique id for scsi devices that
298 support page 0x83 */
299} VHDXPage83Data;
300
301typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
302 uint32_t logical_sector_size; /* virtual disk sector size (in bytes).
303 Can only be 512 or 4096 bytes */
304} VHDXVirtualDiskLogicalSectorSize;
305
306typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
307 uint32_t physical_sector_size; /* physical sector size (in bytes).
308 Can only be 512 or 4096 bytes */
309} VHDXVirtualDiskPhysicalSectorSize;
310
311typedef struct QEMU_PACKED VHDXParentLocatorHeader {
312 MSGUID locator_type; /* type of the parent virtual disk. */
313 uint16_t reserved;
314 uint16_t key_value_count; /* number of key/value pairs for this
315 locator */
316} VHDXParentLocatorHeader;
317
318/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
319typedef struct QEMU_PACKED VHDXParentLocatorEntry {
320 uint32_t key_offset; /* offset in metadata for key, > 0 */
321 uint32_t value_offset; /* offset in metadata for value, >0 */
322 uint16_t key_length; /* length of entry key, > 0 */
323 uint16_t value_length; /* length of entry value, > 0 */
324} VHDXParentLocatorEntry;
325
326
327/* ----- END VHDX SPECIFICATION STRUCTURES ---- */
328
329typedef struct VHDXMetadataEntries {
330 VHDXMetadataTableEntry file_parameters_entry;
331 VHDXMetadataTableEntry virtual_disk_size_entry;
332 VHDXMetadataTableEntry page83_data_entry;
333 VHDXMetadataTableEntry logical_sector_size_entry;
334 VHDXMetadataTableEntry phys_sector_size_entry;
335 VHDXMetadataTableEntry parent_locator_entry;
336 uint16_t present;
337} VHDXMetadataEntries;
338
339typedef struct VHDXLogEntries {
340 uint64_t offset;
341 uint64_t length;
342 uint32_t write;
343 uint32_t read;
344 VHDXLogEntryHeader *hdr;
345 void *desc_buffer;
346 uint64_t sequence;
347 uint32_t tail;
348} VHDXLogEntries;
349
350typedef struct VHDXRegionEntry {
351 uint64_t start;
352 uint64_t end;
353 QLIST_ENTRY(VHDXRegionEntry) entries;
354} VHDXRegionEntry;
355
356typedef struct BDRVVHDXState {
357 CoMutex lock;
358
359 int curr_header;
360 VHDXHeader *headers[2];
361
362 VHDXRegionTableHeader rt;
363 VHDXRegionTableEntry bat_rt; /* region table for the BAT */
364 VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
365
366 VHDXMetadataTableHeader metadata_hdr;
367 VHDXMetadataEntries metadata_entries;
368
369 VHDXFileParameters params;
370 uint32_t block_size;
371 uint32_t block_size_bits;
372 uint32_t sectors_per_block;
373 uint32_t sectors_per_block_bits;
374
375 uint64_t virtual_disk_size;
376 uint32_t logical_sector_size;
377 uint32_t physical_sector_size;
378
379 uint64_t chunk_ratio;
380 uint32_t chunk_ratio_bits;
381 uint32_t logical_sector_size_bits;
382
383 uint32_t bat_entries;
384 VHDXBatEntry *bat;
385 uint64_t bat_offset;
386
387 bool first_visible_write;
388 MSGUID session_guid;
389
390 VHDXLogEntries log;
391
392 VHDXParentLocatorHeader parent_header;
393 VHDXParentLocatorEntry *parent_entries;
394
395 Error *migration_blocker;
396
397 bool log_replayed_on_open;
398
399 QLIST_HEAD(, VHDXRegionEntry) regions;
400} BDRVVHDXState;
401
402void vhdx_guid_generate(MSGUID *guid);
403
404int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
405 MSGUID *log_guid);
406
407uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
408uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
409 int crc_offset);
410
411bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
412
413int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
414 Error **errp);
415
416int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
417 void *data, uint32_t length, uint64_t offset);
418
419static inline void leguid_to_cpus(MSGUID *guid)
420{
421 guid->data1 = le32_to_cpu(guid->data1);
422 guid->data2 = le16_to_cpu(guid->data2);
423 guid->data3 = le16_to_cpu(guid->data3);
424}
425
426static inline void cpu_to_leguids(MSGUID *guid)
427{
428 guid->data1 = cpu_to_le32(guid->data1);
429 guid->data2 = cpu_to_le16(guid->data2);
430 guid->data3 = cpu_to_le16(guid->data3);
431}
432
433void vhdx_header_le_import(VHDXHeader *h);
434void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
435void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
436void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
437void vhdx_log_data_le_import(VHDXLogDataSector *d);
438void vhdx_log_data_le_export(VHDXLogDataSector *d);
439void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
440void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
441void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
442void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
443void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
444void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
445void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
446void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
447void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
448void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
449int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
450
451#endif
452