Oleksandr Andrushchenko | 365d88a | 2020-08-06 12:42:46 +0300 | [diff] [blame] | 1 | /* SPDX-License-Identifier: MIT |
| 2 | * |
| 3 | * blkif.h |
| 4 | * |
| 5 | * Unified block-device I/O interface for Xen guest OSes. |
| 6 | * |
| 7 | * Copyright (c) 2003-2004, Keir Fraser |
| 8 | * Copyright (c) 2012, Spectra Logic Corporation |
| 9 | */ |
| 10 | |
| 11 | #ifndef __XEN_PUBLIC_IO_BLKIF_H__ |
| 12 | #define __XEN_PUBLIC_IO_BLKIF_H__ |
| 13 | |
| 14 | #include "ring.h" |
| 15 | #include "../grant_table.h" |
| 16 | |
| 17 | /* |
| 18 | * Front->back notifications: When enqueuing a new request, sending a |
| 19 | * notification can be made conditional on req_event (i.e., the generic |
| 20 | * hold-off mechanism provided by the ring macros). Backends must set |
| 21 | * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). |
| 22 | * |
| 23 | * Back->front notifications: When enqueuing a new response, sending a |
| 24 | * notification can be made conditional on rsp_event (i.e., the generic |
| 25 | * hold-off mechanism provided by the ring macros). Frontends must set |
| 26 | * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). |
| 27 | */ |
| 28 | |
| 29 | #ifndef blkif_vdev_t |
| 30 | #define blkif_vdev_t u16 |
| 31 | #endif |
| 32 | #define blkif_sector_t u64 |
| 33 | |
| 34 | /* |
| 35 | * Feature and Parameter Negotiation |
| 36 | * ================================= |
| 37 | * The two halves of a Xen block driver utilize nodes within the XenStore to |
| 38 | * communicate capabilities and to negotiate operating parameters. This |
| 39 | * section enumerates these nodes which reside in the respective front and |
| 40 | * backend portions of the XenStore, following the XenBus convention. |
| 41 | * |
| 42 | * All data in the XenStore is stored as strings. Nodes specifying numeric |
| 43 | * values are encoded in decimal. Integer value ranges listed below are |
| 44 | * expressed as fixed sized integer types capable of storing the conversion |
| 45 | * of a properly formated node string, without loss of information. |
| 46 | * |
| 47 | * Any specified default value is in effect if the corresponding XenBus node |
| 48 | * is not present in the XenStore. |
| 49 | * |
| 50 | * XenStore nodes in sections marked "PRIVATE" are solely for use by the |
| 51 | * driver side whose XenBus tree contains them. |
| 52 | * |
| 53 | * XenStore nodes marked "DEPRECATED" in their notes section should only be |
| 54 | * used to provide interoperability with legacy implementations. |
| 55 | * |
| 56 | * See the XenBus state transition diagram below for details on when XenBus |
| 57 | * nodes must be published and when they can be queried. |
| 58 | * |
| 59 | ***************************************************************************** |
| 60 | * Backend XenBus Nodes |
| 61 | ***************************************************************************** |
| 62 | * |
| 63 | *------------------ Backend Device Identification (PRIVATE) ------------------ |
| 64 | * |
| 65 | * mode |
| 66 | * Values: "r" (read only), "w" (writable) |
| 67 | * |
| 68 | * The read or write access permissions to the backing store to be |
| 69 | * granted to the frontend. |
| 70 | * |
| 71 | * params |
| 72 | * Values: string |
| 73 | * |
| 74 | * A free formatted string providing sufficient information for the |
| 75 | * hotplug script to attach the device and provide a suitable |
| 76 | * handler (ie: a block device) for blkback to use. |
| 77 | * |
| 78 | * physical-device |
| 79 | * Values: "MAJOR:MINOR" |
| 80 | * Notes: 11 |
| 81 | * |
| 82 | * MAJOR and MINOR are the major number and minor number of the |
| 83 | * backing device respectively. |
| 84 | * |
| 85 | * physical-device-path |
| 86 | * Values: path string |
| 87 | * |
| 88 | * A string that contains the absolute path to the disk image. On |
| 89 | * NetBSD and Linux this is always a block device, while on FreeBSD |
| 90 | * it can be either a block device or a regular file. |
| 91 | * |
| 92 | * type |
| 93 | * Values: "file", "phy", "tap" |
| 94 | * |
| 95 | * The type of the backing device/object. |
| 96 | * |
| 97 | * |
| 98 | * direct-io-safe |
| 99 | * Values: 0/1 (boolean) |
| 100 | * Default Value: 0 |
| 101 | * |
| 102 | * The underlying storage is not affected by the direct IO memory |
| 103 | * lifetime bug. See: |
| 104 | * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html |
| 105 | * |
| 106 | * Therefore this option gives the backend permission to use |
| 107 | * O_DIRECT, notwithstanding that bug. |
| 108 | * |
| 109 | * That is, if this option is enabled, use of O_DIRECT is safe, |
| 110 | * in circumstances where we would normally have avoided it as a |
| 111 | * workaround for that bug. This option is not relevant for all |
| 112 | * backends, and even not necessarily supported for those for |
| 113 | * which it is relevant. A backend which knows that it is not |
| 114 | * affected by the bug can ignore this option. |
| 115 | * |
| 116 | * This option doesn't require a backend to use O_DIRECT, so it |
| 117 | * should not be used to try to control the caching behaviour. |
| 118 | * |
| 119 | *--------------------------------- Features --------------------------------- |
| 120 | * |
| 121 | * feature-barrier |
| 122 | * Values: 0/1 (boolean) |
| 123 | * Default Value: 0 |
| 124 | * |
| 125 | * A value of "1" indicates that the backend can process requests |
| 126 | * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests |
| 127 | * of this type may still be returned at any time with the |
| 128 | * BLKIF_RSP_EOPNOTSUPP result code. |
| 129 | * |
| 130 | * feature-flush-cache |
| 131 | * Values: 0/1 (boolean) |
| 132 | * Default Value: 0 |
| 133 | * |
| 134 | * A value of "1" indicates that the backend can process requests |
| 135 | * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests |
| 136 | * of this type may still be returned at any time with the |
| 137 | * BLKIF_RSP_EOPNOTSUPP result code. |
| 138 | * |
| 139 | * feature-discard |
| 140 | * Values: 0/1 (boolean) |
| 141 | * Default Value: 0 |
| 142 | * |
| 143 | * A value of "1" indicates that the backend can process requests |
| 144 | * containing the BLKIF_OP_DISCARD request opcode. Requests |
| 145 | * of this type may still be returned at any time with the |
| 146 | * BLKIF_RSP_EOPNOTSUPP result code. |
| 147 | * |
| 148 | * feature-persistent |
| 149 | * Values: 0/1 (boolean) |
| 150 | * Default Value: 0 |
| 151 | * Notes: 7 |
| 152 | * |
| 153 | * A value of "1" indicates that the backend can keep the grants used |
| 154 | * by the frontend driver mapped, so the same set of grants should be |
| 155 | * used in all transactions. The maximum number of grants the backend |
| 156 | * can map persistently depends on the implementation, but ideally it |
| 157 | * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this |
| 158 | * feature the backend doesn't need to unmap each grant, preventing |
| 159 | * costly TLB flushes. The backend driver should only map grants |
| 160 | * persistently if the frontend supports it. If a backend driver chooses |
| 161 | * to use the persistent protocol when the frontend doesn't support it, |
| 162 | * it will probably hit the maximum number of persistently mapped grants |
| 163 | * (due to the fact that the frontend won't be reusing the same grants), |
| 164 | * and fall back to non-persistent mode. Backend implementations may |
| 165 | * shrink or expand the number of persistently mapped grants without |
| 166 | * notifying the frontend depending on memory constraints (this might |
| 167 | * cause a performance degradation). |
| 168 | * |
| 169 | * If a backend driver wants to limit the maximum number of persistently |
| 170 | * mapped grants to a value less than RING_SIZE * |
| 171 | * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to |
| 172 | * discard the grants that are less commonly used. Using a LRU in the |
| 173 | * backend driver paired with a LIFO queue in the frontend will |
| 174 | * allow us to have better performance in this scenario. |
| 175 | * |
| 176 | *----------------------- Request Transport Parameters ------------------------ |
| 177 | * |
| 178 | * max-ring-page-order |
| 179 | * Values: <uint32_t> |
| 180 | * Default Value: 0 |
| 181 | * Notes: 1, 3 |
| 182 | * |
| 183 | * The maximum supported size of the request ring buffer in units of |
| 184 | * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, |
| 185 | * etc.). |
| 186 | * |
| 187 | * max-ring-pages |
| 188 | * Values: <uint32_t> |
| 189 | * Default Value: 1 |
| 190 | * Notes: DEPRECATED, 2, 3 |
| 191 | * |
| 192 | * The maximum supported size of the request ring buffer in units of |
| 193 | * machine pages. The value must be a power of 2. |
| 194 | * |
| 195 | *------------------------- Backend Device Properties ------------------------- |
| 196 | * |
| 197 | * discard-enable |
| 198 | * Values: 0/1 (boolean) |
| 199 | * Default Value: 1 |
| 200 | * |
| 201 | * This optional property, set by the toolstack, instructs the backend |
| 202 | * to offer (or not to offer) discard to the frontend. If the property |
| 203 | * is missing the backend should offer discard if the backing storage |
| 204 | * actually supports it. |
| 205 | * |
| 206 | * discard-alignment |
| 207 | * Values: <uint32_t> |
| 208 | * Default Value: 0 |
| 209 | * Notes: 4, 5 |
| 210 | * |
| 211 | * The offset, in bytes from the beginning of the virtual block device, |
| 212 | * to the first, addressable, discard extent on the underlying device. |
| 213 | * |
| 214 | * discard-granularity |
| 215 | * Values: <uint32_t> |
| 216 | * Default Value: <"sector-size"> |
| 217 | * Notes: 4 |
| 218 | * |
| 219 | * The size, in bytes, of the individually addressable discard extents |
| 220 | * of the underlying device. |
| 221 | * |
| 222 | * discard-secure |
| 223 | * Values: 0/1 (boolean) |
| 224 | * Default Value: 0 |
| 225 | * Notes: 10 |
| 226 | * |
| 227 | * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD |
| 228 | * requests with the BLKIF_DISCARD_SECURE flag set. |
| 229 | * |
| 230 | * info |
| 231 | * Values: <uint32_t> (bitmap) |
| 232 | * |
| 233 | * A collection of bit flags describing attributes of the backing |
| 234 | * device. The VDISK_* macros define the meaning of each bit |
| 235 | * location. |
| 236 | * |
| 237 | * sector-size |
| 238 | * Values: <uint32_t> |
| 239 | * |
| 240 | * The logical block size, in bytes, of the underlying storage. This |
| 241 | * must be a power of two with a minimum value of 512. |
| 242 | * |
| 243 | * NOTE: Because of implementation bugs in some frontends this must be |
| 244 | * set to 512, unless the frontend advertizes a non-zero value |
| 245 | * in its "feature-large-sector-size" xenbus node. (See below). |
| 246 | * |
| 247 | * physical-sector-size |
| 248 | * Values: <uint32_t> |
| 249 | * Default Value: <"sector-size"> |
| 250 | * |
| 251 | * The physical block size, in bytes, of the backend storage. This |
| 252 | * must be an integer multiple of "sector-size". |
| 253 | * |
| 254 | * sectors |
| 255 | * Values: <u64> |
| 256 | * |
| 257 | * The size of the backend device, expressed in units of "sector-size". |
| 258 | * The product of "sector-size" and "sectors" must also be an integer |
| 259 | * multiple of "physical-sector-size", if that node is present. |
| 260 | * |
| 261 | ***************************************************************************** |
| 262 | * Frontend XenBus Nodes |
| 263 | ***************************************************************************** |
| 264 | * |
| 265 | *----------------------- Request Transport Parameters ----------------------- |
| 266 | * |
| 267 | * event-channel |
| 268 | * Values: <uint32_t> |
| 269 | * |
| 270 | * The identifier of the Xen event channel used to signal activity |
| 271 | * in the ring buffer. |
| 272 | * |
| 273 | * ring-ref |
| 274 | * Values: <uint32_t> |
| 275 | * Notes: 6 |
| 276 | * |
| 277 | * The Xen grant reference granting permission for the backend to map |
| 278 | * the sole page in a single page sized ring buffer. |
| 279 | * |
| 280 | * ring-ref%u |
| 281 | * Values: <uint32_t> |
| 282 | * Notes: 6 |
| 283 | * |
| 284 | * For a frontend providing a multi-page ring, a "number of ring pages" |
| 285 | * sized list of nodes, each containing a Xen grant reference granting |
| 286 | * permission for the backend to map the page of the ring located |
| 287 | * at page index "%u". Page indexes are zero based. |
| 288 | * |
| 289 | * protocol |
| 290 | * Values: string (XEN_IO_PROTO_ABI_*) |
| 291 | * Default Value: XEN_IO_PROTO_ABI_NATIVE |
| 292 | * |
| 293 | * The machine ABI rules governing the format of all ring request and |
| 294 | * response structures. |
| 295 | * |
| 296 | * ring-page-order |
| 297 | * Values: <uint32_t> |
| 298 | * Default Value: 0 |
| 299 | * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) |
| 300 | * Notes: 1, 3 |
| 301 | * |
| 302 | * The size of the frontend allocated request ring buffer in units |
| 303 | * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, |
| 304 | * etc.). |
| 305 | * |
| 306 | * num-ring-pages |
| 307 | * Values: <uint32_t> |
| 308 | * Default Value: 1 |
| 309 | * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) |
| 310 | * Notes: DEPRECATED, 2, 3 |
| 311 | * |
| 312 | * The size of the frontend allocated request ring buffer in units of |
| 313 | * machine pages. The value must be a power of 2. |
| 314 | * |
| 315 | *--------------------------------- Features --------------------------------- |
| 316 | * |
| 317 | * feature-persistent |
| 318 | * Values: 0/1 (boolean) |
| 319 | * Default Value: 0 |
| 320 | * Notes: 7, 8, 9 |
| 321 | * |
| 322 | * A value of "1" indicates that the frontend will reuse the same grants |
| 323 | * for all transactions, allowing the backend to map them with write |
| 324 | * access (even when it should be read-only). If the frontend hits the |
| 325 | * maximum number of allowed persistently mapped grants, it can fallback |
| 326 | * to non persistent mode. This will cause a performance degradation, |
| 327 | * since the the backend driver will still try to map those grants |
| 328 | * persistently. Since the persistent grants protocol is compatible with |
| 329 | * the previous protocol, a frontend driver can choose to work in |
| 330 | * persistent mode even when the backend doesn't support it. |
| 331 | * |
| 332 | * It is recommended that the frontend driver stores the persistently |
| 333 | * mapped grants in a LIFO queue, so a subset of all persistently mapped |
| 334 | * grants gets used commonly. This is done in case the backend driver |
| 335 | * decides to limit the maximum number of persistently mapped grants |
| 336 | * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. |
| 337 | * |
| 338 | * feature-large-sector-size |
| 339 | * Values: 0/1 (boolean) |
| 340 | * Default Value: 0 |
| 341 | * |
| 342 | * A value of "1" indicates that the frontend will correctly supply and |
| 343 | * interpret all sector-based quantities in terms of the "sector-size" |
| 344 | * value supplied in the backend info, whatever that may be set to. |
| 345 | * If this node is not present or its value is "0" then it is assumed |
| 346 | * that the frontend requires that the logical block size is 512 as it |
| 347 | * is hardcoded (which is the case in some frontend implementations). |
| 348 | * |
| 349 | *------------------------- Virtual Device Properties ------------------------- |
| 350 | * |
| 351 | * device-type |
| 352 | * Values: "disk", "cdrom", "floppy", etc. |
| 353 | * |
| 354 | * virtual-device |
| 355 | * Values: <uint32_t> |
| 356 | * |
| 357 | * A value indicating the physical device to virtualize within the |
| 358 | * frontend's domain. (e.g. "The first ATA disk", "The third SCSI |
| 359 | * disk", etc.) |
| 360 | * |
| 361 | * See docs/misc/vbd-interface.txt for details on the format of this |
| 362 | * value. |
| 363 | * |
| 364 | * Notes |
| 365 | * ----- |
| 366 | * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer |
| 367 | * PV drivers. |
| 368 | * (2) Multi-page ring buffer scheme first used in some RedHat distributions |
| 369 | * including a distribution deployed on certain nodes of the Amazon |
| 370 | * EC2 cluster. |
| 371 | * (3) Support for multi-page ring buffers was implemented independently, |
| 372 | * in slightly different forms, by both Citrix and RedHat/Amazon. |
| 373 | * For full interoperability, block front and backends should publish |
| 374 | * identical ring parameters, adjusted for unit differences, to the |
| 375 | * XenStore nodes used in both schemes. |
| 376 | * (4) Devices that support discard functionality may internally allocate space |
| 377 | * (discardable extents) in units that are larger than the exported logical |
| 378 | * block size. If the backing device has such discardable extents the |
| 379 | * backend should provide both discard-granularity and discard-alignment. |
| 380 | * Providing just one of the two may be considered an error by the frontend. |
| 381 | * Backends supporting discard should include discard-granularity and |
| 382 | * discard-alignment even if it supports discarding individual sectors. |
| 383 | * Frontends should assume discard-alignment == 0 and discard-granularity |
| 384 | * == sector size if these keys are missing. |
| 385 | * (5) The discard-alignment parameter allows a physical device to be |
| 386 | * partitioned into virtual devices that do not necessarily begin or |
| 387 | * end on a discardable extent boundary. |
| 388 | * (6) When there is only a single page allocated to the request ring, |
| 389 | * 'ring-ref' is used to communicate the grant reference for this |
| 390 | * page to the backend. When using a multi-page ring, the 'ring-ref' |
| 391 | * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. |
| 392 | * (7) When using persistent grants data has to be copied from/to the page |
| 393 | * where the grant is currently mapped. The overhead of doing this copy |
| 394 | * however doesn't suppress the speed improvement of not having to unmap |
| 395 | * the grants. |
| 396 | * (8) The frontend driver has to allow the backend driver to map all grants |
| 397 | * with write access, even when they should be mapped read-only, since |
| 398 | * further requests may reuse these grants and require write permissions. |
| 399 | * (9) Linux implementation doesn't have a limit on the maximum number of |
| 400 | * grants that can be persistently mapped in the frontend driver, but |
| 401 | * due to the frontent driver implementation it should never be bigger |
| 402 | * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. |
| 403 | *(10) The discard-secure property may be present and will be set to 1 if the |
| 404 | * backing device supports secure discard. |
| 405 | *(11) Only used by Linux and NetBSD. |
| 406 | */ |
| 407 | |
| 408 | /* |
| 409 | * Multiple hardware queues/rings: |
| 410 | * If supported, the backend will write the key "multi-queue-max-queues" to |
| 411 | * the directory for that vbd, and set its value to the maximum supported |
| 412 | * number of queues. |
| 413 | * Frontends that are aware of this feature and wish to use it can write the |
| 414 | * key "multi-queue-num-queues" with the number they wish to use, which must be |
| 415 | * greater than zero, and no more than the value reported by the backend in |
| 416 | * "multi-queue-max-queues". |
| 417 | * |
| 418 | * For frontends requesting just one queue, the usual event-channel and |
| 419 | * ring-ref keys are written as before, simplifying the backend processing |
| 420 | * to avoid distinguishing between a frontend that doesn't understand the |
| 421 | * multi-queue feature, and one that does, but requested only one queue. |
| 422 | * |
| 423 | * Frontends requesting two or more queues must not write the toplevel |
| 424 | * event-channel and ring-ref keys, instead writing those keys under sub-keys |
| 425 | * having the name "queue-N" where N is the integer ID of the queue/ring for |
| 426 | * which those keys belong. Queues are indexed from zero. |
| 427 | * For example, a frontend with two queues must write the following set of |
| 428 | * queue-related keys: |
| 429 | * |
| 430 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" |
| 431 | * /local/domain/1/device/vbd/0/queue-0 = "" |
| 432 | * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" |
| 433 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" |
| 434 | * /local/domain/1/device/vbd/0/queue-1 = "" |
| 435 | * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" |
| 436 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" |
| 437 | * |
| 438 | * It is also possible to use multiple queues/rings together with |
| 439 | * feature multi-page ring buffer. |
| 440 | * For example, a frontend requests two queues/rings and the size of each ring |
| 441 | * buffer is two pages must write the following set of related keys: |
| 442 | * |
| 443 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" |
| 444 | * /local/domain/1/device/vbd/0/ring-page-order = "1" |
| 445 | * /local/domain/1/device/vbd/0/queue-0 = "" |
| 446 | * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" |
| 447 | * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" |
| 448 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" |
| 449 | * /local/domain/1/device/vbd/0/queue-1 = "" |
| 450 | * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" |
| 451 | * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" |
| 452 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" |
| 453 | * |
| 454 | */ |
| 455 | |
| 456 | /* |
| 457 | * STATE DIAGRAMS |
| 458 | * |
| 459 | ***************************************************************************** |
| 460 | * Startup * |
| 461 | ***************************************************************************** |
| 462 | * |
| 463 | * Tool stack creates front and back nodes with state XenbusStateInitialising. |
| 464 | * |
| 465 | * Front Back |
| 466 | * ================================= ===================================== |
| 467 | * XenbusStateInitialising XenbusStateInitialising |
| 468 | * o Query virtual device o Query backend device identification |
| 469 | * properties. data. |
| 470 | * o Setup OS device instance. o Open and validate backend device. |
| 471 | * o Publish backend features and |
| 472 | * transport parameters. |
| 473 | * | |
| 474 | * | |
| 475 | * V |
| 476 | * XenbusStateInitWait |
| 477 | * |
| 478 | * o Query backend features and |
| 479 | * transport parameters. |
| 480 | * o Allocate and initialize the |
| 481 | * request ring. |
| 482 | * o Publish transport parameters |
| 483 | * that will be in effect during |
| 484 | * this connection. |
| 485 | * | |
| 486 | * | |
| 487 | * V |
| 488 | * XenbusStateInitialised |
| 489 | * |
| 490 | * o Query frontend transport parameters. |
| 491 | * o Connect to the request ring and |
| 492 | * event channel. |
| 493 | * o Publish backend device properties. |
| 494 | * | |
| 495 | * | |
| 496 | * V |
| 497 | * XenbusStateConnected |
| 498 | * |
| 499 | * o Query backend device properties. |
| 500 | * o Finalize OS virtual device |
| 501 | * instance. |
| 502 | * | |
| 503 | * | |
| 504 | * V |
| 505 | * XenbusStateConnected |
| 506 | * |
| 507 | * Note: Drivers that do not support any optional features, or the negotiation |
| 508 | * of transport parameters, can skip certain states in the state machine: |
| 509 | * |
| 510 | * o A frontend may transition to XenbusStateInitialised without |
| 511 | * waiting for the backend to enter XenbusStateInitWait. In this |
| 512 | * case, default transport parameters are in effect and any |
| 513 | * transport parameters published by the frontend must contain |
| 514 | * their default values. |
| 515 | * |
| 516 | * o A backend may transition to XenbusStateInitialised, bypassing |
| 517 | * XenbusStateInitWait, without waiting for the frontend to first |
| 518 | * enter the XenbusStateInitialised state. In this case, default |
| 519 | * transport parameters are in effect and any transport parameters |
| 520 | * published by the backend must contain their default values. |
| 521 | * |
| 522 | * Drivers that support optional features and/or transport parameter |
| 523 | * negotiation must tolerate these additional state transition paths. |
| 524 | * In general this means performing the work of any skipped state |
| 525 | * transition, if it has not already been performed, in addition to the |
| 526 | * work associated with entry into the current state. |
| 527 | */ |
| 528 | |
| 529 | /* |
| 530 | * REQUEST CODES. |
| 531 | */ |
| 532 | #define BLKIF_OP_READ 0 |
| 533 | #define BLKIF_OP_WRITE 1 |
| 534 | /* |
| 535 | * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER |
| 536 | * operation code ("barrier request") must be completed prior to the |
| 537 | * execution of the barrier request. All writes issued after the barrier |
| 538 | * request must not execute until after the completion of the barrier request. |
| 539 | * |
| 540 | * Optional. See "feature-barrier" XenBus node documentation above. |
| 541 | */ |
| 542 | #define BLKIF_OP_WRITE_BARRIER 2 |
| 543 | /* |
| 544 | * Commit any uncommitted contents of the backing device's volatile cache |
| 545 | * to stable storage. |
| 546 | * |
| 547 | * Optional. See "feature-flush-cache" XenBus node documentation above. |
| 548 | */ |
| 549 | #define BLKIF_OP_FLUSH_DISKCACHE 3 |
| 550 | /* |
| 551 | * Used in SLES sources for device specific command packet |
| 552 | * contained within the request. Reserved for that purpose. |
| 553 | */ |
| 554 | #define BLKIF_OP_RESERVED_1 4 |
| 555 | /* |
| 556 | * Indicate to the backend device that a region of storage is no longer in |
| 557 | * use, and may be discarded at any time without impact to the client. If |
| 558 | * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the |
| 559 | * discarded region on the device must be rendered unrecoverable before the |
| 560 | * command returns. |
| 561 | * |
| 562 | * This operation is analogous to performing a trim (ATA) or unamp (SCSI), |
| 563 | * command on a native device. |
| 564 | * |
| 565 | * More information about trim/unmap operations can be found at: |
| 566 | * http://t13.org/Documents/UploadedDocuments/docs2008/ |
| 567 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc |
| 568 | * http://www.seagate.com/staticfiles/support/disc/manuals/ |
| 569 | * Interface%20manuals/100293068c.pdf |
| 570 | * |
| 571 | * Optional. See "feature-discard", "discard-alignment", |
| 572 | * "discard-granularity", and "discard-secure" in the XenBus node |
| 573 | * documentation above. |
| 574 | */ |
| 575 | #define BLKIF_OP_DISCARD 5 |
| 576 | |
| 577 | /* |
| 578 | * Recognized if "feature-max-indirect-segments" in present in the backend |
| 579 | * xenbus info. The "feature-max-indirect-segments" node contains the maximum |
| 580 | * number of segments allowed by the backend per request. If the node is |
| 581 | * present, the frontend might use blkif_request_indirect structs in order to |
| 582 | * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The |
| 583 | * maximum number of indirect segments is fixed by the backend, but the |
| 584 | * frontend can issue requests with any number of indirect segments as long as |
| 585 | * it's less than the number provided by the backend. The indirect_grefs field |
| 586 | * in blkif_request_indirect should be filled by the frontend with the |
| 587 | * grant references of the pages that are holding the indirect segments. |
| 588 | * These pages are filled with an array of blkif_request_segment that hold the |
| 589 | * information about the segments. The number of indirect pages to use is |
| 590 | * determined by the number of segments an indirect request contains. Every |
| 591 | * indirect page can contain a maximum of |
| 592 | * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to |
| 593 | * calculate the number of indirect pages to use we have to do |
| 594 | * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). |
| 595 | * |
| 596 | * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* |
| 597 | * create the "feature-max-indirect-segments" node! |
| 598 | */ |
| 599 | #define BLKIF_OP_INDIRECT 6 |
| 600 | |
| 601 | /* |
| 602 | * Maximum scatter/gather segments per request. |
| 603 | * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. |
| 604 | * NB. This could be 12 if the ring indexes weren't stored in the same page. |
| 605 | */ |
| 606 | #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 |
| 607 | |
| 608 | /* |
| 609 | * Maximum number of indirect pages to use per request. |
| 610 | */ |
| 611 | #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 |
| 612 | |
| 613 | /* |
| 614 | * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as |
| 615 | * 'sector_number' in blkif_request, blkif_request_discard and |
| 616 | * blkif_request_indirect are sector-based quantities. See the description |
| 617 | * of the "feature-large-sector-size" frontend xenbus node above for |
| 618 | * more information. |
| 619 | */ |
| 620 | struct blkif_request_segment { |
| 621 | grant_ref_t gref; /* reference to I/O buffer frame */ |
| 622 | /* @first_sect: first sector in frame to transfer (inclusive). */ |
| 623 | /* @last_sect: last sector in frame to transfer (inclusive). */ |
| 624 | u8 first_sect, last_sect; |
| 625 | }; |
| 626 | |
| 627 | /* |
| 628 | * Starting ring element for any I/O request. |
| 629 | */ |
| 630 | struct blkif_request { |
| 631 | u8 operation; /* BLKIF_OP_??? */ |
| 632 | u8 nr_segments; /* number of segments */ |
| 633 | blkif_vdev_t handle; /* only for read/write requests */ |
| 634 | u64 id; /* private guest value, echoed in resp */ |
| 635 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
| 636 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
| 637 | }; |
| 638 | |
| 639 | /* |
| 640 | * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD |
| 641 | * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) |
| 642 | */ |
| 643 | struct blkif_request_discard { |
| 644 | u8 operation; /* BLKIF_OP_DISCARD */ |
| 645 | u8 flag; /* BLKIF_DISCARD_SECURE or zero */ |
| 646 | #define BLKIF_DISCARD_SECURE (1 << 0) /* ignored if discard-secure=0 */ |
| 647 | blkif_vdev_t handle; /* same as for read/write requests */ |
| 648 | u64 id; /* private guest value, echoed in resp */ |
| 649 | blkif_sector_t sector_number;/* start sector idx on disk */ |
| 650 | u64 nr_sectors; /* number of contiguous sectors to discard*/ |
| 651 | }; |
| 652 | |
| 653 | struct blkif_request_indirect { |
| 654 | u8 operation; /* BLKIF_OP_INDIRECT */ |
| 655 | u8 indirect_op; /* BLKIF_OP_{READ/WRITE} */ |
| 656 | u16 nr_segments; /* number of segments */ |
| 657 | u64 id; /* private guest value, echoed in resp */ |
| 658 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
| 659 | blkif_vdev_t handle; /* same as for read/write requests */ |
| 660 | grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; |
| 661 | #ifdef __i386__ |
| 662 | u64 pad; /* Make it 64 byte aligned on i386 */ |
| 663 | #endif |
| 664 | }; |
| 665 | |
| 666 | struct blkif_response { |
| 667 | u64 id; /* copied from request */ |
| 668 | u8 operation; /* copied from request */ |
| 669 | s16 status; /* BLKIF_RSP_??? */ |
| 670 | }; |
| 671 | |
| 672 | /* |
| 673 | * STATUS RETURN CODES. |
| 674 | */ |
| 675 | /* Operation not supported (only happens on barrier writes). */ |
| 676 | #define BLKIF_RSP_EOPNOTSUPP -2 |
| 677 | /* Operation failed for some unspecified reason (-EIO). */ |
| 678 | #define BLKIF_RSP_ERROR -1 |
| 679 | /* Operation completed successfully. */ |
| 680 | #define BLKIF_RSP_OKAY 0 |
| 681 | |
| 682 | /* |
| 683 | * Generate blkif ring structures and types. |
| 684 | */ |
| 685 | DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); |
| 686 | |
| 687 | #define VDISK_CDROM 0x1 |
| 688 | #define VDISK_REMOVABLE 0x2 |
| 689 | #define VDISK_READONLY 0x4 |
| 690 | |
| 691 | #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ |
| 692 | |
| 693 | /* |
| 694 | * Local variables: |
| 695 | * mode: C |
| 696 | * c-file-style: "BSD" |
| 697 | * c-basic-offset: 4 |
| 698 | * tab-width: 4 |
| 699 | * indent-tabs-mode: nil |
| 700 | * End: |
| 701 | */ |