Files
ipxe/src/drivers/net/virtio-net.c
Michael Brown b9d68b9de0 [ethernet] Use standard 1500 byte MTU unless explicitly overridden
Devices that support jumbo frames will currently default to the
largest possible MTU.  This assumption is valid for virtual adapters
such as virtio-net, where the MTU must have been configured by a
system administrator, but is unsafe in the general case of a physical
adapter.

Default to the standard Ethernet MTU, unless explicitly overridden
either by the driver or via the ${netX/mtu} setting.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
2018-07-17 12:14:43 +01:00

691 lines
19 KiB
C

/*
* (c) Copyright 2010 Stefan Hajnoczi <stefanha@gmail.com>
*
* based on the Etherboot virtio-net driver
*
* (c) Copyright 2008 Bull S.A.S.
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
* some parts from Linux Virtio PCI driver
*
* Copyright IBM Corp. 2007
* Authors: Anthony Liguori <aliguori@us.ibm.com>
*
* some parts from Linux Virtio Ring
*
* Copyright Rusty Russell IBM Corporation 2007
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <ipxe/list.h>
#include <ipxe/iobuf.h>
#include <ipxe/netdevice.h>
#include <ipxe/pci.h>
#include <ipxe/if_ether.h>
#include <ipxe/ethernet.h>
#include <ipxe/virtio-pci.h>
#include <ipxe/virtio-ring.h>
#include "virtio-net.h"
/*
* Virtio network device driver
*
* Specification:
* http://ozlabs.org/~rusty/virtio-spec/
*
* The virtio network device is supported by Linux virtualization software
* including QEMU/KVM and lguest. This driver supports the virtio over PCI
* transport; virtual machines have one virtio-net PCI adapter per NIC.
*
* Virtio-net is different from hardware NICs because virtio devices
* communicate with the hypervisor via virtqueues, not traditional descriptor
* rings. Virtqueues are unordered queues, they support add_buf() and
* get_buf() operations. To transmit a packet, the driver has to add the
* packet buffer onto the virtqueue. To receive a packet, the driver must
* first add an empty buffer to the virtqueue and then get the filled packet
* buffer on completion.
*
* Virtqueues are an abstraction that is commonly implemented using the vring
* descriptor ring layout. The vring is the actual shared memory structure
* that allows the virtual machine to communicate buffers with the hypervisor.
* Because the vring layout is optimized for flexibility and performance rather
* than space, it is heavy-weight and allocated like traditional descriptor
* rings in the open() function of the driver and not in probe().
*
* There is no true interrupt enable/disable. Virtqueues have callback
* enable/disable flags but these are only hints. The hypervisor may still
* raise an interrupt. Nevertheless, this driver disables callbacks in the
* hopes of avoiding interrupts.
*/
/* Driver types are declared here so virtio-net.h can be easily synced with its
* Linux source.
*/
/* Virtqueue indices */
enum {
RX_INDEX = 0,
TX_INDEX,
QUEUE_NB
};
/** Max number of pending rx packets */
#define NUM_RX_BUF 8
struct virtnet_nic {
/** Base pio register address */
unsigned long ioaddr;
/** 0 for legacy, 1 for virtio 1.0 */
int virtio_version;
/** Virtio 1.0 device data */
struct virtio_pci_modern_device vdev;
/** RX/TX virtqueues */
struct vring_virtqueue *virtqueue;
/** RX packets handed to the NIC waiting to be filled in */
struct list_head rx_iobufs;
/** Pending rx packet count */
unsigned int rx_num_iobufs;
/** Virtio net dummy packet headers */
struct virtio_net_hdr_modern empty_header[QUEUE_NB];
};
/** Add an iobuf to a virtqueue
*
* @v netdev Network device
* @v vq_idx Virtqueue index (RX_INDEX or TX_INDEX)
* @v iobuf I/O buffer
*
* The virtqueue is kicked after the iobuf has been added.
*/
static void virtnet_enqueue_iob ( struct net_device *netdev,
int vq_idx, struct io_buffer *iobuf ) {
struct virtnet_nic *virtnet = netdev->priv;
struct vring_virtqueue *vq = &virtnet->virtqueue[vq_idx];
struct virtio_net_hdr_modern *header = &virtnet->empty_header[vq_idx];
unsigned int out = ( vq_idx == TX_INDEX ) ? 2 : 0;
unsigned int in = ( vq_idx == TX_INDEX ) ? 0 : 2;
size_t header_len = ( virtnet->virtio_version ?
sizeof ( *header ) : sizeof ( header->legacy ) );
struct vring_list list[] = {
{
/* Share a single zeroed virtio net header between all
* packets in a ring. This works because this driver
* does not use any advanced features so none of the
* header fields get used.
*
* Some host implementations (notably Google Compute
* Platform) are known to unconditionally write back
* to header->flags for received packets. Work around
* this by using separate RX and TX headers.
*/
.addr = ( char* ) header,
.length = header_len,
},
{
.addr = ( char* ) iobuf->data,
.length = iob_len ( iobuf ),
},
};
DBGC2 ( virtnet, "VIRTIO-NET %p enqueuing iobuf %p on vq %d\n",
virtnet, iobuf, vq_idx );
vring_add_buf ( vq, list, out, in, iobuf, 0 );
vring_kick ( virtnet->virtio_version ? &virtnet->vdev : NULL,
virtnet->ioaddr, vq, 1 );
}
/** Try to keep rx virtqueue filled with iobufs
*
* @v netdev Network device
*/
static void virtnet_refill_rx_virtqueue ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
size_t len = ( netdev->max_pkt_len + 4 /* VLAN */ );
while ( virtnet->rx_num_iobufs < NUM_RX_BUF ) {
struct io_buffer *iobuf;
/* Try to allocate a buffer, stop for now if out of memory */
iobuf = alloc_iob ( len );
if ( ! iobuf )
break;
/* Keep track of iobuf so close() can free it */
list_add ( &iobuf->list, &virtnet->rx_iobufs );
/* Mark packet length until we know the actual size */
iob_put ( iobuf, len );
virtnet_enqueue_iob ( netdev, RX_INDEX, iobuf );
virtnet->rx_num_iobufs++;
}
}
/** Helper to free all virtqueue memory
*
* @v netdev Network device
*/
static void virtnet_free_virtqueues ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
int i;
for ( i = 0; i < QUEUE_NB; i++ ) {
virtio_pci_unmap_capability ( &virtnet->virtqueue[i].notification );
vp_free_vq ( &virtnet->virtqueue[i] );
}
free ( virtnet->virtqueue );
virtnet->virtqueue = NULL;
}
/** Open network device, legacy virtio 0.9.5
*
* @v netdev Network device
* @ret rc Return status code
*/
static int virtnet_open_legacy ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
unsigned long ioaddr = virtnet->ioaddr;
u32 features;
int i;
/* Reset for sanity */
vp_reset ( ioaddr );
/* Allocate virtqueues */
virtnet->virtqueue = zalloc ( QUEUE_NB *
sizeof ( *virtnet->virtqueue ) );
if ( ! virtnet->virtqueue )
return -ENOMEM;
/* Initialize rx/tx virtqueues */
for ( i = 0; i < QUEUE_NB; i++ ) {
if ( vp_find_vq ( ioaddr, i, &virtnet->virtqueue[i] ) == -1 ) {
DBGC ( virtnet, "VIRTIO-NET %p cannot register queue %d\n",
virtnet, i );
virtnet_free_virtqueues ( netdev );
return -ENOENT;
}
}
/* Initialize rx packets */
INIT_LIST_HEAD ( &virtnet->rx_iobufs );
virtnet->rx_num_iobufs = 0;
virtnet_refill_rx_virtqueue ( netdev );
/* Disable interrupts before starting */
netdev_irq ( netdev, 0 );
/* Driver is ready */
features = vp_get_features ( ioaddr );
vp_set_features ( ioaddr, features & ( ( 1 << VIRTIO_NET_F_MAC ) |
( 1 << VIRTIO_NET_F_MTU ) ) );
vp_set_status ( ioaddr, VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK );
return 0;
}
/** Open network device, modern virtio 1.0
*
* @v netdev Network device
* @ret rc Return status code
*/
static int virtnet_open_modern ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
u64 features;
u8 status;
/* Negotiate features */
features = vpm_get_features ( &virtnet->vdev );
if ( ! ( features & VIRTIO_F_VERSION_1 ) ) {
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -EINVAL;
}
vpm_set_features ( &virtnet->vdev, features & (
( 1ULL << VIRTIO_NET_F_MAC ) |
( 1ULL << VIRTIO_NET_F_MTU ) |
( 1ULL << VIRTIO_F_VERSION_1 ) |
( 1ULL << VIRTIO_F_ANY_LAYOUT ) |
( 1ULL << VIRTIO_F_IOMMU_PLATFORM ) ) );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FEATURES_OK );
status = vpm_get_status ( &virtnet->vdev );
if ( ! ( status & VIRTIO_CONFIG_S_FEATURES_OK ) ) {
DBGC ( virtnet, "VIRTIO-NET %p device didn't accept features\n",
virtnet );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -EINVAL;
}
/* Allocate virtqueues */
virtnet->virtqueue = zalloc ( QUEUE_NB *
sizeof ( *virtnet->virtqueue ) );
if ( ! virtnet->virtqueue ) {
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -ENOMEM;
}
/* Initialize rx/tx virtqueues */
if ( vpm_find_vqs ( &virtnet->vdev, QUEUE_NB, virtnet->virtqueue ) ) {
DBGC ( virtnet, "VIRTIO-NET %p cannot register queues\n",
virtnet );
virtnet_free_virtqueues ( netdev );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_FAILED );
return -ENOENT;
}
/* Disable interrupts before starting */
netdev_irq ( netdev, 0 );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER_OK );
/* Initialize rx packets */
INIT_LIST_HEAD ( &virtnet->rx_iobufs );
virtnet->rx_num_iobufs = 0;
virtnet_refill_rx_virtqueue ( netdev );
return 0;
}
/** Open network device
*
* @v netdev Network device
* @ret rc Return status code
*/
static int virtnet_open ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
if ( virtnet->virtio_version ) {
return virtnet_open_modern ( netdev );
} else {
return virtnet_open_legacy ( netdev );
}
}
/** Close network device
*
* @v netdev Network device
*/
static void virtnet_close ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
struct io_buffer *iobuf;
struct io_buffer *next_iobuf;
if ( virtnet->virtio_version ) {
vpm_reset ( &virtnet->vdev );
} else {
vp_reset ( virtnet->ioaddr );
}
/* Virtqueues can be freed now that NIC is reset */
virtnet_free_virtqueues ( netdev );
/* Free rx iobufs */
list_for_each_entry_safe ( iobuf, next_iobuf, &virtnet->rx_iobufs, list ) {
free_iob ( iobuf );
}
INIT_LIST_HEAD ( &virtnet->rx_iobufs );
virtnet->rx_num_iobufs = 0;
}
/** Transmit packet
*
* @v netdev Network device
* @v iobuf I/O buffer
* @ret rc Return status code
*/
static int virtnet_transmit ( struct net_device *netdev,
struct io_buffer *iobuf ) {
virtnet_enqueue_iob ( netdev, TX_INDEX, iobuf );
return 0;
}
/** Complete packet transmission
*
* @v netdev Network device
*/
static void virtnet_process_tx_packets ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
struct vring_virtqueue *tx_vq = &virtnet->virtqueue[TX_INDEX];
while ( vring_more_used ( tx_vq ) ) {
struct io_buffer *iobuf = vring_get_buf ( tx_vq, NULL );
DBGC2 ( virtnet, "VIRTIO-NET %p tx complete iobuf %p\n",
virtnet, iobuf );
netdev_tx_complete ( netdev, iobuf );
}
}
/** Complete packet reception
*
* @v netdev Network device
*/
static void virtnet_process_rx_packets ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
struct vring_virtqueue *rx_vq = &virtnet->virtqueue[RX_INDEX];
while ( vring_more_used ( rx_vq ) ) {
unsigned int len;
struct io_buffer *iobuf = vring_get_buf ( rx_vq, &len );
/* Release ownership of iobuf */
list_del ( &iobuf->list );
virtnet->rx_num_iobufs--;
/* Update iobuf length */
iob_unput ( iobuf, iob_len ( iobuf ) );
iob_put ( iobuf, len - sizeof ( struct virtio_net_hdr ) );
DBGC2 ( virtnet, "VIRTIO-NET %p rx complete iobuf %p len %zd\n",
virtnet, iobuf, iob_len ( iobuf ) );
/* Pass completed packet to the network stack */
netdev_rx ( netdev, iobuf );
}
virtnet_refill_rx_virtqueue ( netdev );
}
/** Poll for completed and received packets
*
* @v netdev Network device
*/
static void virtnet_poll ( struct net_device *netdev ) {
struct virtnet_nic *virtnet = netdev->priv;
/* Acknowledge interrupt. This is necessary for UNDI operation and
* interrupts that are raised despite VRING_AVAIL_F_NO_INTERRUPT being
* set (that flag is just a hint and the hypervisor does not have to
* honor it).
*/
if ( virtnet->virtio_version ) {
vpm_get_isr ( &virtnet->vdev );
} else {
vp_get_isr ( virtnet->ioaddr );
}
virtnet_process_tx_packets ( netdev );
virtnet_process_rx_packets ( netdev );
}
/** Enable or disable interrupts
*
* @v netdev Network device
* @v enable Interrupts should be enabled
*/
static void virtnet_irq ( struct net_device *netdev, int enable ) {
struct virtnet_nic *virtnet = netdev->priv;
int i;
for ( i = 0; i < QUEUE_NB; i++ ) {
if ( enable )
vring_enable_cb ( &virtnet->virtqueue[i] );
else
vring_disable_cb ( &virtnet->virtqueue[i] );
}
}
/** virtio-net device operations */
static struct net_device_operations virtnet_operations = {
.open = virtnet_open,
.close = virtnet_close,
.transmit = virtnet_transmit,
.poll = virtnet_poll,
.irq = virtnet_irq,
};
/**
* Probe PCI device, legacy virtio 0.9.5
*
* @v pci PCI device
* @ret rc Return status code
*/
static int virtnet_probe_legacy ( struct pci_device *pci ) {
unsigned long ioaddr = pci->ioaddr;
struct net_device *netdev;
struct virtnet_nic *virtnet;
u32 features;
u16 mtu;
int rc;
/* Allocate and hook up net device */
netdev = alloc_etherdev ( sizeof ( *virtnet ) );
if ( ! netdev )
return -ENOMEM;
netdev_init ( netdev, &virtnet_operations );
virtnet = netdev->priv;
virtnet->ioaddr = ioaddr;
pci_set_drvdata ( pci, netdev );
netdev->dev = &pci->dev;
DBGC ( virtnet, "VIRTIO-NET %p busaddr=%s ioaddr=%#lx irq=%d\n",
virtnet, pci->dev.name, ioaddr, pci->irq );
/* Enable PCI bus master and reset NIC */
adjust_pci_device ( pci );
vp_reset ( ioaddr );
/* Load MAC address and MTU */
features = vp_get_features ( ioaddr );
if ( features & ( 1 << VIRTIO_NET_F_MAC ) ) {
vp_get ( ioaddr, offsetof ( struct virtio_net_config, mac ),
netdev->hw_addr, ETH_ALEN );
DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet,
eth_ntoa ( netdev->hw_addr ) );
}
if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) {
vp_get ( ioaddr, offsetof ( struct virtio_net_config, mtu ),
&mtu, sizeof ( mtu ) );
DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet, mtu );
netdev->max_pkt_len = ( mtu + ETH_HLEN );
netdev->mtu = mtu;
}
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
goto err_register_netdev;
/* Mark link as up, control virtqueue is not used */
netdev_link_up ( netdev );
return 0;
unregister_netdev ( netdev );
err_register_netdev:
vp_reset ( ioaddr );
netdev_nullify ( netdev );
netdev_put ( netdev );
return rc;
}
/**
* Probe PCI device, modern virtio 1.0
*
* @v pci PCI device
* @v found_dev Set to non-zero if modern device was found (probe may still fail)
* @ret rc Return status code
*/
static int virtnet_probe_modern ( struct pci_device *pci, int *found_dev ) {
struct net_device *netdev;
struct virtnet_nic *virtnet;
u64 features;
u16 mtu;
int rc, common, isr, notify, config, device;
common = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_COMMON_CFG );
if ( ! common ) {
DBG ( "Common virtio capability not found!\n" );
return -ENODEV;
}
*found_dev = 1;
isr = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_ISR_CFG );
notify = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_NOTIFY_CFG );
config = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_PCI_CFG );
if ( ! isr || ! notify || ! config ) {
DBG ( "Missing virtio capabilities %i/%i/%i/%i\n",
common, isr, notify, config );
return -EINVAL;
}
device = virtio_pci_find_capability ( pci, VIRTIO_PCI_CAP_DEVICE_CFG );
/* Allocate and hook up net device */
netdev = alloc_etherdev ( sizeof ( *virtnet ) );
if ( ! netdev )
return -ENOMEM;
netdev_init ( netdev, &virtnet_operations );
virtnet = netdev->priv;
pci_set_drvdata ( pci, netdev );
netdev->dev = &pci->dev;
DBGC ( virtnet, "VIRTIO-NET modern %p busaddr=%s irq=%d\n",
virtnet, pci->dev.name, pci->irq );
virtnet->vdev.pci = pci;
rc = virtio_pci_map_capability ( pci, common,
sizeof ( struct virtio_pci_common_cfg ), 4,
0, sizeof ( struct virtio_pci_common_cfg ),
&virtnet->vdev.common );
if ( rc )
goto err_map_common;
rc = virtio_pci_map_capability ( pci, isr, sizeof ( u8 ), 1,
0, 1,
&virtnet->vdev.isr );
if ( rc )
goto err_map_isr;
virtnet->vdev.notify_cap_pos = notify;
virtnet->vdev.cfg_cap_pos = config;
/* Map the device capability */
if ( device ) {
rc = virtio_pci_map_capability ( pci, device,
0, 4, 0, sizeof ( struct virtio_net_config ),
&virtnet->vdev.device );
if ( rc )
goto err_map_device;
}
/* Enable the PCI device */
adjust_pci_device ( pci );
/* Reset the device and set initial status bits */
vpm_reset ( &virtnet->vdev );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_ACKNOWLEDGE );
vpm_add_status ( &virtnet->vdev, VIRTIO_CONFIG_S_DRIVER );
/* Load MAC address and MTU */
if ( device ) {
features = vpm_get_features ( &virtnet->vdev );
if ( features & ( 1ULL << VIRTIO_NET_F_MAC ) ) {
vpm_get ( &virtnet->vdev,
offsetof ( struct virtio_net_config, mac ),
netdev->hw_addr, ETH_ALEN );
DBGC ( virtnet, "VIRTIO-NET %p mac=%s\n", virtnet,
eth_ntoa ( netdev->hw_addr ) );
}
if ( features & ( 1ULL << VIRTIO_NET_F_MTU ) ) {
vpm_get ( &virtnet->vdev,
offsetof ( struct virtio_net_config, mtu ),
&mtu, sizeof ( mtu ) );
DBGC ( virtnet, "VIRTIO-NET %p mtu=%d\n", virtnet,
mtu );
netdev->max_pkt_len = ( mtu + ETH_HLEN );
}
}
/* We need a valid MAC address */
if ( ! is_valid_ether_addr ( netdev->hw_addr ) ) {
rc = -EADDRNOTAVAIL;
goto err_mac_address;
}
/* Register network device */
if ( ( rc = register_netdev ( netdev ) ) != 0 )
goto err_register_netdev;
/* Mark link as up, control virtqueue is not used */
netdev_link_up ( netdev );
virtnet->virtio_version = 1;
return 0;
unregister_netdev ( netdev );
err_register_netdev:
err_mac_address:
vpm_reset ( &virtnet->vdev );
netdev_nullify ( netdev );
netdev_put ( netdev );
virtio_pci_unmap_capability ( &virtnet->vdev.device );
err_map_device:
virtio_pci_unmap_capability ( &virtnet->vdev.isr );
err_map_isr:
virtio_pci_unmap_capability ( &virtnet->vdev.common );
err_map_common:
return rc;
}
/**
* Probe PCI device
*
* @v pci PCI device
* @ret rc Return status code
*/
static int virtnet_probe ( struct pci_device *pci ) {
int found_modern = 0;
int rc = virtnet_probe_modern ( pci, &found_modern );
if ( ! found_modern && pci->device < 0x1040 ) {
/* fall back to the legacy probe */
rc = virtnet_probe_legacy ( pci );
}
return rc;
}
/**
* Remove device
*
* @v pci PCI device
*/
static void virtnet_remove ( struct pci_device *pci ) {
struct net_device *netdev = pci_get_drvdata ( pci );
struct virtnet_nic *virtnet = netdev->priv;
virtio_pci_unmap_capability ( &virtnet->vdev.device );
virtio_pci_unmap_capability ( &virtnet->vdev.isr );
virtio_pci_unmap_capability ( &virtnet->vdev.common );
unregister_netdev ( netdev );
netdev_nullify ( netdev );
netdev_put ( netdev );
}
static struct pci_device_id virtnet_nics[] = {
PCI_ROM(0x1af4, 0x1000, "virtio-net", "Virtio Network Interface", 0),
PCI_ROM(0x1af4, 0x1041, "virtio-net", "Virtio Network Interface 1.0", 0),
};
struct pci_driver virtnet_driver __pci_driver = {
.ids = virtnet_nics,
.id_count = ( sizeof ( virtnet_nics ) / sizeof ( virtnet_nics[0] ) ),
.probe = virtnet_probe,
.remove = virtnet_remove,
};