[ena] Add support for low latency transmit queues

Newer generations of the ENA hardware require the use of low latency transmit queues, where the submission queues and the initial portion of the transmitted packet are written to on-device memory via BAR2 instead of being read from host memory. Detect support for low latency queues and set the placement policy appropriately. We attempt the use of low latency queues only if the device reports that it supports inline headers, 128-byte entries, and two descriptors prior to the inlined header, on the basis that we don't care about using low latency queues on older versions of the hardware since those versions will support normal host memory submission queues anyway. We reuse the redundant memory allocated for the submission queue as the bounce buffer for constructing the descriptors and inlined packet data, since this avoids needing a separate allocation just for the bounce buffer. We construct a metadata submission queue entry prior to the actual submission queue entry, since experimentation suggests that newer generations of the hardware require this to be present even though it conveys no information beyond its own existence. Signed-off-by: Michael Brown <mcb30@ipxe.org>
2026-03-16 03:02:07 +03:00 · 2025-10-16 15:58:23 +01:00
parent 0d15d7f0a5
commit c1badf71ca
2 changed files with 263 additions and 17 deletions
--- a/src/drivers/net/ena.c
+++ b/src/drivers/net/ena.c
@@ -450,6 +450,7 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
 	union ena_aq_req *req;
 	union ena_acq_rsp *rsp;
 	unsigned int i;
+	size_t llqe;
 	int rc;

 	/* Allocate submission queue entries */
@@ -464,8 +465,7 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
 	req = ena_admin_req ( ena );
 	req->header.opcode = ENA_CREATE_SQ;
 	req->create_sq.direction = sq->direction;
-	req->create_sq.policy = cpu_to_le16 ( ENA_SQ_HOST_MEMORY |
-					      ENA_SQ_CONTIGUOUS );
+	req->create_sq.policy = cpu_to_le16 ( sq->policy );
 	req->create_sq.cq_id = cpu_to_le16 ( cq->id );
 	req->create_sq.count = cpu_to_le16 ( sq->count );
 	req->create_sq.address = cpu_to_le64 ( virt_to_bus ( sq->sqe.raw ) );
@@ -480,6 +480,14 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
 	/* Parse response */
 	sq->id = le16_to_cpu ( rsp->create_sq.id );
 	sq->doorbell = le32_to_cpu ( rsp->create_sq.doorbell );
+	llqe = le32_to_cpu ( rsp->create_sq.llqe );
+	if ( sq->policy & ENA_SQ_DEVICE_MEMORY ) {
+		assert ( ena->mem != NULL );
+		assert ( sq->len >= sizeof ( *sq->sqe.llq ) );
+		sq->llqe = ( ena->mem + llqe );
+	} else {
+		sq->llqe = NULL;
+	}

 	/* Reset producer counter and phase */
 	sq->prod = 0;
@@ -494,10 +502,16 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
 	for ( i = 0 ; i < sq->count ; i++ )
 		sq->ids[i] = i;

-	DBGC ( ena, "ENA %p %s SQ%d at [%08lx,%08lx) fill %d db +%04x CQ%d\n",
-	       ena, ena_direction ( sq->direction ), sq->id,
-	       virt_to_phys ( sq->sqe.raw ),
-	       ( virt_to_phys ( sq->sqe.raw ) + sq->len ),
+	DBGC ( ena, "ENA %p %s SQ%d at ",
+	       ena, ena_direction ( sq->direction ), sq->id );
+	if ( sq->policy & ENA_SQ_DEVICE_MEMORY ) {
+		DBGC ( ena, "LLQ [+%08zx,+%08zx)", llqe,
+		       ( llqe + ( sq->count * sizeof ( sq->sqe.llq[0] ) ) ) );
+	} else {
+		DBGC ( ena, "[%08lx,%08lx)", virt_to_phys ( sq->sqe.raw ),
+		       ( virt_to_phys ( sq->sqe.raw ) + sq->len ) );
+	}
+	DBGC ( ena, " fill %d db +%04x CQ%d\n",
 	       sq->fill, sq->doorbell, cq->id );
 	return 0;

@@ -744,6 +758,101 @@ static int ena_set_host_attributes ( struct ena_nic *ena ) {
 	return 0;
 }

+/**
+ * Configure low latency queues
+ *
+ * @v ena		ENA device
+ * @ret rc		Return status code
+ */
+static int ena_llq_config ( struct ena_nic *ena ) {
+	union ena_aq_req *req;
+	union ena_acq_rsp *rsp;
+	union ena_feature *feature;
+	uint16_t header;
+	uint16_t size;
+	uint16_t desc;
+	uint16_t stride;
+	uint16_t mode;
+	int rc;
+
+	/* Construct request */
+	req = ena_admin_req ( ena );
+	req->header.opcode = ENA_GET_FEATURE;
+	req->get_feature.id = ENA_LLQ_CONFIG;
+
+	/* Issue request */
+	if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) {
+		DBGC ( ena, "ENA %p could not get LLQ configuration: %s\n",
+		       ena, strerror ( rc ) );
+		return rc;
+	}
+
+	/* Parse response */
+	feature = &rsp->get_feature.feature;
+	header = le16_to_cpu ( feature->llq.header.supported );
+	size = le16_to_cpu ( feature->llq.size.supported );
+	desc = le16_to_cpu ( feature->llq.desc.supported );
+	stride = le16_to_cpu ( feature->llq.stride.supported );
+	mode = le16_to_cpu ( feature->llq.mode );
+	DBGC ( ena, "ENA %p LLQ supports %02x:%02x:%02x:%02x:%02x with %dx%d "
+	       "entries\n", ena, header, size, desc, stride, mode,
+	       le32_to_cpu ( feature->llq.queues ),
+	       le32_to_cpu ( feature->llq.count ) );
+
+	/* Check for a supported configuration */
+	if ( ! feature->llq.queues ) {
+		DBGC ( ena, "ENA %p LLQ has no queues\n", ena );
+		return -ENOTSUP;
+	}
+	if ( ! ( header & ENA_LLQ_HEADER_INLINE ) ) {
+		DBGC ( ena, "ENA %p LLQ does not support inline headers\n",
+		       ena );
+		return -ENOTSUP;
+	}
+	if ( ! ( size & ENA_LLQ_SIZE_128 ) ) {
+		DBGC ( ena, "ENA %p LLQ does not support 128-byte entries\n",
+		       ena );
+		return -ENOTSUP;
+	}
+	if ( ! ( desc & ENA_LLQ_DESC_2 ) ) {
+		DBGC ( ena, "ENA %p LLQ does not support two-descriptor "
+		       "entries\n", ena );
+		return -ENOTSUP;
+	}
+
+	/* Enable a minimal configuration */
+	header = ENA_LLQ_HEADER_INLINE;
+	size = ENA_LLQ_SIZE_128;
+	desc = ENA_LLQ_DESC_2;
+	stride &= ( -stride ); /* Don't care: use first supported option */
+	DBGC ( ena, "ENA %p LLQ enabling %02x:%02x:%02x:%02x:%02x\n",
+	       ena, header, size, desc, stride, mode );
+
+	/* Construct request */
+	req = ena_admin_req ( ena );
+	req->header.opcode = ENA_SET_FEATURE;
+	req->set_feature.id = ENA_LLQ_CONFIG;
+	feature = &req->set_feature.feature;
+	feature->llq.header.enabled = cpu_to_le16 ( header );
+	feature->llq.size.enabled = cpu_to_le16 ( size );
+	feature->llq.desc.enabled = cpu_to_le16 ( desc );
+	feature->llq.stride.enabled = cpu_to_le16 ( stride );
+	feature->llq.mode = cpu_to_le16 ( mode );
+
+	/* Issue request */
+	if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) {
+		DBGC ( ena, "ENA %p could not set LLQ configuration: %s\n",
+		       ena, strerror ( rc ) );
+		return rc;
+	}
+
+	/* Use on-device memory for transmit queue */
+	ena->tx.sq.policy |= ENA_SQ_DEVICE_MEMORY;
+	ena->tx.sq.inlined = sizeof ( ena->tx.sq.sqe.llq->inlined );
+
+	return 0;
+}
+
 /**
 * Get statistics (for debugging)
 *
@@ -954,9 +1063,15 @@ static void ena_close ( struct net_device *netdev ) {
 static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
 	struct ena_nic *ena = netdev->priv;
 	struct ena_tx_sqe *sqe;
+	struct ena_tx_llqe *llqe;
+	const uint64_t *src;
+	uint64_t *dest;
 	physaddr_t address;
 	unsigned int index;
 	unsigned int id;
+	unsigned int i;
+	uint8_t flags;
+	size_t inlined;
 	size_t len;

 	/* Get next submission queue entry */
@@ -968,17 +1083,50 @@ static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
 	sqe = &ena->tx.sq.sqe.tx[index];
 	id = ena->tx_ids[index];

-	/* Construct submission queue entry */
+	/* Construct submission queue entry values */
 	address = virt_to_bus ( iobuf->data );
 	len = iob_len ( iobuf );
+	inlined = ena->tx.sq.inlined;
+	if ( inlined > len )
+		inlined = len;
+	len -= inlined;
+	address += inlined;
+	flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL |
+		  ena->tx.sq.phase );
+
+	/* Prepare low-latency queue bounce buffer, if applicable */
+	llqe = ena->tx.sq.sqe.llq;
+	if ( ena->tx.sq.llqe ) {
+
+		/* Construct zero-information metadata queue entry */
+		llqe->meta.meta = ENA_TX_SQE_META;
+		llqe->meta.flags = ( flags & ~( ENA_SQE_LAST | ENA_SQE_CPL ) );
+
+		/* Copy inlined data */
+		memcpy ( llqe->inlined, iobuf->data, inlined );
+
+		/* Place submission queue entry within bounce buffer */
+		sqe = &llqe->sqe;
+		flags &= ~ENA_SQE_FIRST;
+	}
+
+	/* Construct submission queue entry */
 	sqe->len = cpu_to_le16 ( len );
 	sqe->id = cpu_to_le16 ( id );
 	sqe->address = cpu_to_le64 ( address );
+	sqe->inlined = inlined;
 	wmb();
-	sqe->flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL |
-		       ena->tx.sq.phase );
+	sqe->flags = flags;
 	wmb();

+	/* Copy bounce buffer to on-device memory, if applicable */
+	if ( ena->tx.sq.llqe ) {
+		src = ( ( const void * ) llqe );
+		dest = ( ena->tx.sq.llqe + ( index * sizeof ( *llqe ) ) );
+		for ( i = 0 ; i < ( sizeof ( *llqe ) / sizeof ( *src ) ); i++ )
+			writeq ( *(src++), dest++ );
+	}
+
 	/* Increment producer counter */
 	ena->tx.sq.prod++;
 	if ( ( ena->tx.sq.prod % ENA_TX_COUNT ) == 0 )
@@ -1282,6 +1430,12 @@ static int ena_probe ( struct pci_device *pci ) {
 	if ( ( rc = ena_get_device_attributes ( netdev ) ) != 0 )
 		goto err_get_device_attributes;

+	/* Attempt to configure low latency queues, if applicable.
+	 * Ignore any errors and continue without using LLQs.
+	 */
+	if ( ena->mem && ( ena->features & ENA_FEATURE_LLQ ) )
+		ena_llq_config ( ena );
+
 	/* Register network device */
 	if ( ( rc = register_netdev ( netdev ) ) != 0 )
 		goto err_register_netdev;
--- a/src/drivers/net/ena.h
+++ b/src/drivers/net/ena.h
@@ -145,6 +145,62 @@ struct ena_device_attributes {
 	uint32_t mtu;
 } __attribute__ (( packed ));

+/** Device supports low latency queues */
+#define ENA_FEATURE_LLQ 0x00000010
+
+/** Low latency queue config */
+#define ENA_LLQ_CONFIG 4
+
+/** A low latency queue option */
+struct ena_llq_option {
+	/** Bitmask of supported option values */
+	uint16_t supported;
+	/** Single-entry bitmask of the enabled option value */
+	uint16_t enabled;
+} __attribute__ (( packed ));
+
+/** Low latency queue config */
+struct ena_llq_config {
+	/** Maximum number of low latency queues */
+	uint32_t queues;
+	/** Maximum queue depth */
+	uint32_t count;
+	/** Header locations */
+	struct ena_llq_option header;
+	/** Entry sizes */
+	struct ena_llq_option size;
+	/** Descriptor counts */
+	struct ena_llq_option desc;
+	/** Descriptor strides */
+	struct ena_llq_option stride;
+	/** Reserved */
+	uint8_t reserved_a[4];
+	/** Acceleration mode */
+	uint16_t mode;
+	/** Maximum burst size */
+	uint16_t burst;
+	/** Reserved */
+	uint8_t reserved_b[4];
+} __attribute__ (( packed ));
+
+/** Low latency queue header locations */
+enum ena_llq_header {
+	/** Headers are placed inline immediately after descriptors */
+	ENA_LLQ_HEADER_INLINE = 0x0001,
+};
+
+/** Low latency queue entry sizes */
+enum ena_llq_size {
+	/** Entries are 128 bytes */
+	ENA_LLQ_SIZE_128 = 0x0001,
+};
+
+/** Low latency queue descriptor count */
+enum ena_llq_desc {
+	/** Two descriptors before inline headers */
+	ENA_LLQ_DESC_2 = 0x0002,
+};
+
 /** Async event notification queue config */
 #define ENA_AENQ_CONFIG 26

@@ -237,6 +293,8 @@ struct ena_host_info {
 union ena_feature {
 	/** Device attributes */
 	struct ena_device_attributes device;
+	/** Low latency queue configuration */
+	struct ena_llq_config llq;
 	/** Async event notification queue config */
 	struct ena_aenq_config aenq;
 	/** Host attributes */
@@ -280,6 +338,8 @@ struct ena_create_sq_req {
 enum ena_sq_policy {
 	/** Use host memory */
 	ENA_SQ_HOST_MEMORY = 0x0001,
+	/** Use on-device memory (must be used in addition to host memory) */
+	ENA_SQ_DEVICE_MEMORY = 0x0002,
 	/** Memory is contiguous */
 	ENA_SQ_CONTIGUOUS = 0x0100,
 };
@@ -291,13 +351,13 @@ struct ena_create_sq_rsp {
 	/** Submission queue identifier */
 	uint16_t id;
 	/** Reserved */
-	uint8_t reserved[2];
+	uint8_t reserved_a[2];
 	/** Doorbell register offset */
 	uint32_t doorbell;
 	/** LLQ descriptor ring offset */
-	uint32_t llq_desc;
-	/** LLQ header offset */
-	uint32_t llq_data;
+	uint32_t llqe;
+	/** Reserved */
+	uint8_t reserved_b[4];
 } __attribute__ (( packed ));

 /** Destroy submission queue */
@@ -563,18 +623,31 @@ struct ena_aenq {
 struct ena_tx_sqe {
 	/** Length */
 	uint16_t len;
-	/** Reserved */
-	uint8_t reserved_a;
+	/** Metadata flags */
+	uint8_t meta;
 	/** Flags */
 	uint8_t flags;
 	/** Reserved */
 	uint8_t reserved_b[3];
 	/** Request identifier */
 	uint8_t id;
-	/** Address */
-	uint64_t address;
+	/** Address and inlined length */
+	union {
+		/** Address */
+		uint64_t address;
+		/** Inlined length */
+		struct {
+			/** Reserved */
+			uint8_t reserved[7];
+			/** Inlined length */
+			uint8_t inlined;
+		} __attribute__ (( packed ));
+	} __attribute__ (( packed ));
 } __attribute__ (( packed ));

+/** This is a metadata entry */
+#define ENA_TX_SQE_META 0x80
+
 /** Receive submission queue entry */
 struct ena_rx_sqe {
 	/** Length */
@@ -637,6 +710,16 @@ struct ena_rx_cqe {
 /** Completion queue ownership phase flag */
 #define ENA_CQE_PHASE 0x01

+/** Low latency transmit queue bounce buffer */
+struct ena_tx_llqe {
+	/** Pointless metadata descriptor */
+	struct ena_tx_sqe meta;
+	/** Transmit descriptor */
+	struct ena_tx_sqe sqe;
+	/** Inlined header data */
+	uint8_t inlined[96];
+} __attribute__ (( packed ));
+
 /** Submission queue */
 struct ena_sq {
 	/** Entries */
@@ -645,11 +728,15 @@ struct ena_sq {
 		struct ena_tx_sqe *tx;
 		/** Receive submission queue entries */
 		struct ena_rx_sqe *rx;
+		/** Low latency queue bounce buffer */
+		struct ena_tx_llqe *llq;
 		/** Raw data */
 		void *raw;
 	} sqe;
 	/** Buffer IDs */
 	uint8_t *ids;
+	/** Low latency queue base */
+	void *llqe;
 	/** Doorbell register offset */
 	unsigned int doorbell;
 	/** Total length of entries */
@@ -658,6 +745,8 @@ struct ena_sq {
 	unsigned int prod;
 	/** Phase */
 	unsigned int phase;
+	/** Queue policy */
+	uint16_t policy;
 	/** Submission queue identifier */
 	uint16_t id;
 	/** Direction */
@@ -668,6 +757,8 @@ struct ena_sq {
 	uint8_t max;
 	/** Fill level (limited to completion queue size) */
 	uint8_t fill;
+	/** Maximum inline header length */
+	uint8_t inlined;
 };

 /**
@@ -685,6 +776,7 @@ ena_sq_init ( struct ena_sq *sq, unsigned int direction, unsigned int count,
 	      unsigned int max, size_t size, uint8_t *ids ) {

 	sq->len = ( count * size );
+	sq->policy = ( ENA_SQ_HOST_MEMORY | ENA_SQ_CONTIGUOUS );
 	sq->direction = direction;
 	sq->count = count;
 	sq->max = max;