[gve] Use dummy interrupt to trigger completion writeback in DQO mode

When operating in the DQO operating mode, the device will defer
writing transmit and receive completions until an entire internal
cacheline (128 bytes) is full, or until an associated interrupt is
asserted.  Since each receive descriptor is 32 bytes, this will cause
received packets to be effectively delayed until up to three further
packets have arrived.  When network traffic volumes are very low (such
as during DHCP, DNS lookups, or TCP handshakes), this typically
induces delays of up to 30 seconds and results in a very poor user
experience.

Work around this hardware problem in the same way as for the Intel
40GbE and 100GbE NICs: by enabling dummy MSI-X interrupts to trick the
hardware into believing that it needs to write out completions to host
memory.

There is no documentation around the interrupt rearming mechanism.
The value written to the interrupt doorbell does not include a
consumer counter value, and so must be relying on some undocumented
ordering constraints.  Comments in the Linux driver source suggest
that the authors believe that the device will automatically and
atomically mask an MSI-X interrupt at the point of asserting it, that
any further interrupts arriving before the doorbell is written will be
recorded in the pending bit array, and that writing the doorbell will
therefore immediately assert a new interrupt if needed.

In the absence of any documentation, choose to rearm the interrupt
once per observed completion.  This is overkill, but is less impactful
than the alternative of rearming the interrupt unconditionally on
every poll.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
Michael Brown
2025-10-09 17:12:20 +01:00
parent c2d7ddd0c2
commit d2e1e591ab
2 changed files with 37 additions and 3 deletions

View File

@@ -591,6 +591,7 @@ static int gve_configure ( struct gve_nic *gve ) {
struct gve_events *events = &gve->events;
struct gve_irqs *irqs = &gve->irqs;
union gve_admin_command *cmd;
uint32_t doorbell;
unsigned int db_off;
unsigned int i;
int rc;
@@ -612,12 +613,14 @@ static int gve_configure ( struct gve_nic *gve ) {
return rc;
/* Disable all interrupts */
doorbell = ( ( gve->mode & GVE_MODE_DQO ) ?
0 : bswap_32 ( GVE_GQI_IRQ_DISABLE ) );
for ( i = 0 ; i < GVE_IRQ_COUNT ; i++ ) {
db_off = ( be32_to_cpu ( irqs->irq[i].db_idx ) *
sizeof ( uint32_t ) );
DBGC ( gve, "GVE %p IRQ %d doorbell +%#04x\n", gve, i, db_off );
irqs->db[i] = ( gve->db + db_off );
writel ( bswap_32 ( GVE_IRQ_DISABLE ), irqs->db[i] );
writel ( doorbell, irqs->db[i] );
}
return 0;
@@ -810,6 +813,13 @@ static int gve_create_queue ( struct gve_nic *gve, struct gve_queue *queue ) {
queue->event = &gve->events.event[evt_idx];
assert ( queue->event->count == 0 );
/* Unmask dummy interrupt */
pci_msix_unmask ( &gve->msix, type->irq );
/* Rearm queue interrupt if applicable */
if ( gve->mode & GVE_MODE_DQO )
writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[type->irq] );
return 0;
}
@@ -824,6 +834,9 @@ static int gve_destroy_queue ( struct gve_nic *gve, struct gve_queue *queue ) {
const struct gve_queue_type *type = queue->type;
int rc;
/* Mask dummy interrupt */
pci_msix_mask ( &gve->msix, type->irq );
/* Issue command */
if ( ( rc = gve_admin_simple ( gve, type->destroy, 0 ) ) != 0 )
return rc;
@@ -1496,6 +1509,9 @@ static void gve_poll_tx ( struct net_device *netdev ) {
rmb();
tx->done++;
/* Re-arm interrupt */
writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[GVE_TX_IRQ] );
/* Ignore non-packet completions */
if ( ( ! ( dqo->flags & GVE_DQO_TXF_PKT ) ) ||
( dqo->tag.count < 0 ) ) {
@@ -1586,6 +1602,9 @@ static void gve_poll_rx ( struct net_device *netdev ) {
break;
rmb();
/* Re-arm interrupt */
writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[GVE_RX_IRQ] );
/* Parse completion */
len = ( le16_to_cpu ( dqo->len ) &
( GVE_BUF_SIZE - 1 ) );
@@ -1917,6 +1936,10 @@ static int gve_probe ( struct pci_device *pci ) {
dma_set_mask_64bit ( gve->dma );
assert ( netdev->dma == NULL );
/* Configure dummy MSI-X interrupt */
if ( ( rc = pci_msix_enable ( pci, &gve->msix ) ) != 0 )
goto err_msix;
/* Allocate admin queue */
if ( ( rc = gve_admin_alloc ( gve ) ) != 0 )
goto err_admin;
@@ -1937,6 +1960,8 @@ static int gve_probe ( struct pci_device *pci ) {
gve_reset ( gve );
gve_admin_free ( gve );
err_admin:
pci_msix_disable ( pci, &gve->msix );
err_msix:
iounmap ( gve->db );
err_db:
iounmap ( gve->cfg );
@@ -1965,6 +1990,9 @@ static void gve_remove ( struct pci_device *pci ) {
/* Free admin queue */
gve_admin_free ( gve );
/* Disable dummy MSI-X interrupt */
pci_msix_disable ( pci, &gve->msix );
/* Unmap registers */
iounmap ( gve->db );
iounmap ( gve->cfg );

View File

@@ -16,6 +16,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <stdint.h>
#include <ipxe/dma.h>
#include <ipxe/pci.h>
#include <ipxe/pcimsix.h>
#include <ipxe/in.h>
#include <ipxe/process.h>
#include <ipxe/retry.h>
@@ -443,8 +444,11 @@ struct gve_irqs {
volatile uint32_t *db[GVE_IRQ_COUNT];
};
/** Disable interrupts */
#define GVE_IRQ_DISABLE 0x40000000UL
/** Disable in-order queue interrupt */
#define GVE_GQI_IRQ_DISABLE 0x40000000UL
/** Rearm out-of-order queue interrupt */
#define GVE_DQO_IRQ_REARM 0x00000019UL
/**
* Queue resources
@@ -856,6 +860,8 @@ struct gve_nic {
struct net_device *netdev;
/** DMA device */
struct dma_device *dma;
/** Dummy MSI-X interrupt */
struct pci_msix msix;
/** Admin queue */
struct gve_admin admin;