Files
ipxe/src/net/infiniband.c
Christian Iversen b9de7e6eda [infiniband] Require drivers to specify the number of ports
Require drivers to report the total number of Infiniband ports.  This
is necessary to report the correct number of ports on devices with
dynamic port types.

For example, dual-port Mellanox cards configured for (eth, ib) would
be rejected by the subnet manager, because they report using "port 2,
out of 1".

Signed-off-by: Christian Iversen <ci@iversenit.dk>
2021-01-27 01:15:35 +00:00

1040 lines
26 KiB
C

/*
* Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* You can also choose to distribute this program under the terms of
* the Unmodified Binary Distribution Licence (as given in the file
* COPYING.UBDL), provided that you have satisfied its requirements.
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <byteswap.h>
#include <errno.h>
#include <assert.h>
#include <ipxe/list.h>
#include <ipxe/errortab.h>
#include <ipxe/if_arp.h>
#include <ipxe/netdevice.h>
#include <ipxe/iobuf.h>
#include <ipxe/process.h>
#include <ipxe/profile.h>
#include <ipxe/infiniband.h>
#include <ipxe/ib_mi.h>
#include <ipxe/ib_sma.h>
/** @file
*
* Infiniband protocol
*
*/
/** List of Infiniband devices */
struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
/** List of open Infiniband devices, in reverse order of opening */
static struct list_head open_ib_devices = LIST_HEAD_INIT ( open_ib_devices );
/** Infiniband device index */
static unsigned int ibdev_index = 0;
/** Post send work queue entry profiler */
static struct profiler ib_post_send_profiler __profiler =
{ .name = "ib.post_send" };
/** Post receive work queue entry profiler */
static struct profiler ib_post_recv_profiler __profiler =
{ .name = "ib.post_recv" };
/* Disambiguate the various possible EINPROGRESSes */
#define EINPROGRESS_INIT __einfo_error ( EINFO_EINPROGRESS_INIT )
#define EINFO_EINPROGRESS_INIT __einfo_uniqify \
( EINFO_EINPROGRESS, 0x01, "Initialising" )
#define EINPROGRESS_ARMED __einfo_error ( EINFO_EINPROGRESS_ARMED )
#define EINFO_EINPROGRESS_ARMED __einfo_uniqify \
( EINFO_EINPROGRESS, 0x02, "Armed" )
/** Human-readable message for the link statuses */
struct errortab infiniband_errors[] __errortab = {
__einfo_errortab ( EINFO_EINPROGRESS_INIT ),
__einfo_errortab ( EINFO_EINPROGRESS_ARMED ),
};
/***************************************************************************
*
* Completion queues
*
***************************************************************************
*/
/**
* Create completion queue
*
* @v ibdev Infiniband device
* @v num_cqes Number of completion queue entries
* @v op Completion queue operations
* @v new_cq New completion queue to fill in
* @ret rc Return status code
*/
int ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
struct ib_completion_queue_operations *op,
struct ib_completion_queue **new_cq ) {
struct ib_completion_queue *cq;
int rc;
DBGC ( ibdev, "IBDEV %s creating completion queue\n", ibdev->name );
/* Allocate and initialise data structure */
cq = zalloc ( sizeof ( *cq ) );
if ( ! cq ) {
rc = -ENOMEM;
goto err_alloc_cq;
}
cq->ibdev = ibdev;
list_add_tail ( &cq->list, &ibdev->cqs );
cq->num_cqes = num_cqes;
INIT_LIST_HEAD ( &cq->work_queues );
cq->op = op;
/* Perform device-specific initialisation and get CQN */
if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not initialise completion "
"queue: %s\n", ibdev->name, strerror ( rc ) );
goto err_dev_create_cq;
}
DBGC ( ibdev, "IBDEV %s created %d-entry completion queue %p (%p) "
"with CQN %#lx\n", ibdev->name, num_cqes, cq,
ib_cq_get_drvdata ( cq ), cq->cqn );
*new_cq = cq;
return 0;
ibdev->op->destroy_cq ( ibdev, cq );
err_dev_create_cq:
list_del ( &cq->list );
free ( cq );
err_alloc_cq:
return rc;
}
/**
* Destroy completion queue
*
* @v ibdev Infiniband device
* @v cq Completion queue
*/
void ib_destroy_cq ( struct ib_device *ibdev,
struct ib_completion_queue *cq ) {
DBGC ( ibdev, "IBDEV %s destroying completion queue %#lx\n",
ibdev->name, cq->cqn );
assert ( list_empty ( &cq->work_queues ) );
ibdev->op->destroy_cq ( ibdev, cq );
list_del ( &cq->list );
free ( cq );
}
/**
* Poll completion queue
*
* @v ibdev Infiniband device
* @v cq Completion queue
*/
void ib_poll_cq ( struct ib_device *ibdev,
struct ib_completion_queue *cq ) {
struct ib_work_queue *wq;
/* Poll completion queue */
ibdev->op->poll_cq ( ibdev, cq );
/* Refill receive work queues */
list_for_each_entry ( wq, &cq->work_queues, list ) {
if ( ! wq->is_send )
ib_refill_recv ( ibdev, wq->qp );
}
}
/***************************************************************************
*
* Work queues
*
***************************************************************************
*/
/**
* Create queue pair
*
* @v ibdev Infiniband device
* @v type Queue pair type
* @v num_send_wqes Number of send work queue entries
* @v send_cq Send completion queue
* @v num_recv_wqes Number of receive work queue entries
* @v recv_cq Receive completion queue
* @v op Queue pair operations
* @v name Queue pair name
* @v new_qp New queue pair to fill in
* @ret rc Return status code
*
* The queue pair will be left in the INIT state; you must call
* ib_modify_qp() before it is ready to use for sending and receiving.
*/
int ib_create_qp ( struct ib_device *ibdev, enum ib_queue_pair_type type,
unsigned int num_send_wqes,
struct ib_completion_queue *send_cq,
unsigned int num_recv_wqes,
struct ib_completion_queue *recv_cq,
struct ib_queue_pair_operations *op, const char *name,
struct ib_queue_pair **new_qp ) {
struct ib_queue_pair *qp;
size_t total_size;
int rc;
DBGC ( ibdev, "IBDEV %s creating queue pair\n", ibdev->name );
/* Allocate and initialise data structure */
total_size = ( sizeof ( *qp ) +
( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
qp = zalloc ( total_size );
if ( ! qp ) {
rc = -ENOMEM;
goto err_alloc_qp;
}
qp->ibdev = ibdev;
list_add_tail ( &qp->list, &ibdev->qps );
qp->type = type;
qp->send.qp = qp;
qp->send.is_send = 1;
qp->send.cq = send_cq;
list_add_tail ( &qp->send.list, &send_cq->work_queues );
qp->send.psn = ( random() & 0xffffffUL );
qp->send.num_wqes = num_send_wqes;
qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
qp->recv.qp = qp;
qp->recv.cq = recv_cq;
list_add_tail ( &qp->recv.list, &recv_cq->work_queues );
qp->recv.psn = ( random() & 0xffffffUL );
qp->recv.num_wqes = num_recv_wqes;
qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
INIT_LIST_HEAD ( &qp->mgids );
qp->op = op;
qp->name = name;
/* Perform device-specific initialisation and get QPN */
if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not initialise queue pair: "
"%s\n", ibdev->name, strerror ( rc ) );
goto err_dev_create_qp;
}
DBGC ( ibdev, "IBDEV %s created queue pair %p (%p) with QPN %#lx\n",
ibdev->name, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
DBGC ( ibdev, "IBDEV %s QPN %#lx has %d send entries at [%p,%p)\n",
ibdev->name, qp->qpn, num_send_wqes, qp->send.iobufs,
qp->recv.iobufs );
DBGC ( ibdev, "IBDEV %s QPN %#lx has %d receive entries at [%p,%p)\n",
ibdev->name, qp->qpn, num_recv_wqes, qp->recv.iobufs,
( ( ( void * ) qp ) + total_size ) );
/* Calculate externally-visible QPN */
switch ( type ) {
case IB_QPT_SMI:
qp->ext_qpn = IB_QPN_SMI;
break;
case IB_QPT_GSI:
qp->ext_qpn = IB_QPN_GSI;
break;
default:
qp->ext_qpn = qp->qpn;
break;
}
if ( qp->ext_qpn != qp->qpn ) {
DBGC ( ibdev, "IBDEV %s QPN %#lx has external QPN %#lx\n",
ibdev->name, qp->qpn, qp->ext_qpn );
}
*new_qp = qp;
return 0;
ibdev->op->destroy_qp ( ibdev, qp );
err_dev_create_qp:
list_del ( &qp->send.list );
list_del ( &qp->recv.list );
list_del ( &qp->list );
free ( qp );
err_alloc_qp:
return rc;
}
/**
* Modify queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @ret rc Return status code
*/
int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
int rc;
DBGC ( ibdev, "IBDEV %s modifying QPN %#lx\n", ibdev->name, qp->qpn );
if ( ( rc = ibdev->op->modify_qp ( ibdev, qp ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not modify QPN %#lx: %s\n",
ibdev->name, qp->qpn, strerror ( rc ) );
return rc;
}
return 0;
}
/**
* Destroy queue pair
*
* @v ibdev Infiniband device
* @v qp Queue pair
*/
void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
struct io_buffer *iobuf;
unsigned int i;
DBGC ( ibdev, "IBDEV %s destroying QPN %#lx\n",
ibdev->name, qp->qpn );
assert ( list_empty ( &qp->mgids ) );
/* Perform device-specific destruction */
ibdev->op->destroy_qp ( ibdev, qp );
/* Complete any remaining I/O buffers with errors */
for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
}
for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
ib_complete_recv ( ibdev, qp, NULL, NULL, iobuf,
-ECANCELED );
}
}
/* Remove work queues from completion queue */
list_del ( &qp->send.list );
list_del ( &qp->recv.list );
/* Free QP */
list_del ( &qp->list );
free ( qp );
}
/**
* Find queue pair by QPN
*
* @v ibdev Infiniband device
* @v qpn Queue pair number
* @ret qp Queue pair, or NULL
*/
struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
unsigned long qpn ) {
struct ib_queue_pair *qp;
list_for_each_entry ( qp, &ibdev->qps, list ) {
if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
return qp;
}
return NULL;
}
/**
* Find queue pair by multicast GID
*
* @v ibdev Infiniband device
* @v gid Multicast GID
* @ret qp Queue pair, or NULL
*/
struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
union ib_gid *gid ) {
struct ib_queue_pair *qp;
struct ib_multicast_gid *mgid;
list_for_each_entry ( qp, &ibdev->qps, list ) {
list_for_each_entry ( mgid, &qp->mgids, list ) {
if ( memcmp ( &mgid->gid, gid,
sizeof ( mgid->gid ) ) == 0 ) {
return qp;
}
}
}
return NULL;
}
/**
* Find work queue belonging to completion queue
*
* @v cq Completion queue
* @v qpn Queue pair number
* @v is_send Find send work queue (rather than receive)
* @ret wq Work queue, or NULL if not found
*/
struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
unsigned long qpn, int is_send ) {
struct ib_work_queue *wq;
list_for_each_entry ( wq, &cq->work_queues, list ) {
if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
return wq;
}
return NULL;
}
/**
* Post send work queue entry
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v dest Destination address vector
* @v iobuf I/O buffer
* @ret rc Return status code
*/
int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
struct ib_address_vector *dest,
struct io_buffer *iobuf ) {
struct ib_address_vector dest_copy;
int rc;
/* Start profiling */
profile_start ( &ib_post_send_profiler );
/* Check queue fill level */
if ( qp->send.fill >= qp->send.num_wqes ) {
DBGC ( ibdev, "IBDEV %s QPN %#lx send queue full\n",
ibdev->name, qp->qpn );
return -ENOBUFS;
}
/* Use default address vector if none specified */
if ( ! dest )
dest = &qp->av;
/* Make modifiable copy of address vector */
memcpy ( &dest_copy, dest, sizeof ( dest_copy ) );
dest = &dest_copy;
/* Fill in optional parameters in address vector */
if ( ! dest->qkey )
dest->qkey = qp->qkey;
if ( ! dest->rate )
dest->rate = IB_RATE_2_5;
/* Post to hardware */
if ( ( rc = ibdev->op->post_send ( ibdev, qp, dest, iobuf ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s QPN %#lx could not post send WQE: "
"%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
return rc;
}
/* Increase fill level */
qp->send.fill++;
/* Stop profiling */
profile_stop ( &ib_post_send_profiler );
return 0;
}
/**
* Post receive work queue entry
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v iobuf I/O buffer
* @ret rc Return status code
*/
int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
struct io_buffer *iobuf ) {
int rc;
/* Start profiling */
profile_start ( &ib_post_recv_profiler );
/* Check packet length */
if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
DBGC ( ibdev, "IBDEV %s QPN %#lx wrong RX buffer size (%zd)\n",
ibdev->name, qp->qpn, iob_tailroom ( iobuf ) );
return -EINVAL;
}
/* Check queue fill level */
if ( qp->recv.fill >= qp->recv.num_wqes ) {
DBGC ( ibdev, "IBDEV %s QPN %#lx receive queue full\n",
ibdev->name, qp->qpn );
return -ENOBUFS;
}
/* Post to hardware */
if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s QPN %#lx could not post receive WQE: "
"%s\n", ibdev->name, qp->qpn, strerror ( rc ) );
return rc;
}
/* Increase fill level */
qp->recv.fill++;
/* Stop profiling */
profile_stop ( &ib_post_recv_profiler );
return 0;
}
/**
* Complete send work queue entry
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v iobuf I/O buffer
* @v rc Completion status code
*/
void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
struct io_buffer *iobuf, int rc ) {
if ( qp->send.cq->op->complete_send ) {
qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
} else {
free_iob ( iobuf );
}
qp->send.fill--;
}
/**
* Complete receive work queue entry
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v dest Destination address vector, or NULL
* @v source Source address vector, or NULL
* @v iobuf I/O buffer
* @v rc Completion status code
*/
void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
struct ib_address_vector *dest,
struct ib_address_vector *source,
struct io_buffer *iobuf, int rc ) {
if ( qp->recv.cq->op->complete_recv ) {
qp->recv.cq->op->complete_recv ( ibdev, qp, dest, source,
iobuf, rc );
} else {
free_iob ( iobuf );
}
qp->recv.fill--;
}
/**
* Refill receive work queue
*
* @v ibdev Infiniband device
* @v qp Queue pair
*/
void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
struct io_buffer *iobuf;
int rc;
/* Keep filling while unfilled entries remain */
while ( qp->recv.fill < qp->recv.num_wqes ) {
/* Allocate I/O buffer */
iobuf = qp->op->alloc_iob ( IB_MAX_PAYLOAD_SIZE );
if ( ! iobuf ) {
/* Non-fatal; we will refill on next attempt */
return;
}
/* Post I/O buffer */
if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not refill: %s\n",
ibdev->name, strerror ( rc ) );
free_iob ( iobuf );
/* Give up */
return;
}
}
}
/***************************************************************************
*
* Link control
*
***************************************************************************
*/
/**
* Get link state
*
* @v ibdev Infiniband device
* @ret rc Link status code
*/
int ib_link_rc ( struct ib_device *ibdev ) {
switch ( ibdev->port_state ) {
case IB_PORT_STATE_DOWN: return -ENOTCONN;
case IB_PORT_STATE_INIT: return -EINPROGRESS_INIT;
case IB_PORT_STATE_ARMED: return -EINPROGRESS_ARMED;
case IB_PORT_STATE_ACTIVE: return 0;
default: return -EINVAL;
}
}
/**
* Textual representation of Infiniband link state
*
* @v ibdev Infiniband device
* @ret link_text Link state text
*/
static const char * ib_link_state_text ( struct ib_device *ibdev ) {
switch ( ibdev->port_state ) {
case IB_PORT_STATE_DOWN: return "DOWN";
case IB_PORT_STATE_INIT: return "INIT";
case IB_PORT_STATE_ARMED: return "ARMED";
case IB_PORT_STATE_ACTIVE: return "ACTIVE";
default: return "UNKNOWN";
}
}
/**
* Notify drivers of Infiniband device or link state change
*
* @v ibdev Infiniband device
*/
static void ib_notify ( struct ib_device *ibdev ) {
struct ib_driver *driver;
for_each_table_entry ( driver, IB_DRIVERS )
driver->notify ( ibdev );
}
/**
* Notify of Infiniband link state change
*
* @v ibdev Infiniband device
*/
void ib_link_state_changed ( struct ib_device *ibdev ) {
DBGC ( ibdev, "IBDEV %s link state is %s\n",
ibdev->name, ib_link_state_text ( ibdev ) );
/* Notify drivers of link state change */
ib_notify ( ibdev );
}
/**
* Open port
*
* @v ibdev Infiniband device
* @ret rc Return status code
*/
int ib_open ( struct ib_device *ibdev ) {
int rc;
/* Increment device open request counter */
if ( ibdev->open_count++ > 0 ) {
/* Device was already open; do nothing */
return 0;
}
/* Open device */
if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not open: %s\n",
ibdev->name, strerror ( rc ) );
goto err_open;
}
/* Create subnet management interface */
if ( ( rc = ib_create_mi ( ibdev, IB_QPT_SMI, &ibdev->smi ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not create SMI: %s\n",
ibdev->name, strerror ( rc ) );
goto err_create_smi;
}
/* Create subnet management agent */
if ( ( rc = ib_create_sma ( ibdev, ibdev->smi ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not create SMA: %s\n",
ibdev->name, strerror ( rc ) );
goto err_create_sma;
}
/* Create general services interface */
if ( ( rc = ib_create_mi ( ibdev, IB_QPT_GSI, &ibdev->gsi ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not create GSI: %s\n",
ibdev->name, strerror ( rc ) );
goto err_create_gsi;
}
/* Add to head of open devices list */
list_add ( &ibdev->open_list, &open_ib_devices );
/* Notify drivers of device state change */
ib_notify ( ibdev );
assert ( ibdev->open_count == 1 );
return 0;
ib_destroy_mi ( ibdev, ibdev->gsi );
err_create_gsi:
ib_destroy_sma ( ibdev, ibdev->smi );
err_create_sma:
ib_destroy_mi ( ibdev, ibdev->smi );
err_create_smi:
ibdev->op->close ( ibdev );
err_open:
assert ( ibdev->open_count == 1 );
ibdev->open_count = 0;
return rc;
}
/**
* Close port
*
* @v ibdev Infiniband device
*/
void ib_close ( struct ib_device *ibdev ) {
/* Decrement device open request counter */
ibdev->open_count--;
/* Close device if this was the last remaining requested opening */
if ( ibdev->open_count == 0 ) {
ib_notify ( ibdev );
list_del ( &ibdev->open_list );
ib_destroy_mi ( ibdev, ibdev->gsi );
ib_destroy_sma ( ibdev, ibdev->smi );
ib_destroy_mi ( ibdev, ibdev->smi );
ibdev->op->close ( ibdev );
ibdev->port_state = IB_PORT_STATE_DOWN;
}
}
/***************************************************************************
*
* Multicast
*
***************************************************************************
*/
/**
* Attach to multicast group
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v gid Multicast GID
* @ret rc Return status code
*
* Note that this function handles only the local device's attachment
* to the multicast GID; it does not issue the relevant MADs to join
* the multicast group on the subnet.
*/
int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
union ib_gid *gid ) {
struct ib_multicast_gid *mgid;
int rc;
/* Sanity check */
assert ( qp != NULL );
/* Add to software multicast GID list */
mgid = zalloc ( sizeof ( *mgid ) );
if ( ! mgid ) {
rc = -ENOMEM;
goto err_alloc_mgid;
}
memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
list_add_tail ( &mgid->list, &qp->mgids );
/* Add to hardware multicast GID list */
if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
goto err_dev_mcast_attach;
return 0;
err_dev_mcast_attach:
list_del ( &mgid->list );
free ( mgid );
err_alloc_mgid:
return rc;
}
/**
* Detach from multicast group
*
* @v ibdev Infiniband device
* @v qp Queue pair
* @v gid Multicast GID
*/
void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
union ib_gid *gid ) {
struct ib_multicast_gid *mgid;
/* Sanity check */
assert ( qp != NULL );
/* Remove from hardware multicast GID list */
ibdev->op->mcast_detach ( ibdev, qp, gid );
/* Remove from software multicast GID list */
list_for_each_entry ( mgid, &qp->mgids, list ) {
if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
list_del ( &mgid->list );
free ( mgid );
break;
}
}
}
/***************************************************************************
*
* Miscellaneous
*
***************************************************************************
*/
/**
* Set port information
*
* @v ibdev Infiniband device
* @v mad Set port information MAD
*/
int ib_set_port_info ( struct ib_device *ibdev, union ib_mad *mad ) {
int rc;
/* Adapters with embedded SMAs do not need to support this method */
if ( ! ibdev->op->set_port_info ) {
DBGC ( ibdev, "IBDEV %s does not support setting port "
"information\n", ibdev->name );
return -ENOTSUP;
}
if ( ( rc = ibdev->op->set_port_info ( ibdev, mad ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not set port information: %s\n",
ibdev->name, strerror ( rc ) );
return rc;
}
return 0;
};
/**
* Set partition key table
*
* @v ibdev Infiniband device
* @v mad Set partition key table MAD
*/
int ib_set_pkey_table ( struct ib_device *ibdev, union ib_mad *mad ) {
int rc;
/* Adapters with embedded SMAs do not need to support this method */
if ( ! ibdev->op->set_pkey_table ) {
DBGC ( ibdev, "IBDEV %s does not support setting partition "
"key table\n", ibdev->name );
return -ENOTSUP;
}
if ( ( rc = ibdev->op->set_pkey_table ( ibdev, mad ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not set partition key table: "
"%s\n", ibdev->name, strerror ( rc ) );
return rc;
}
return 0;
};
/***************************************************************************
*
* Event queues
*
***************************************************************************
*/
/**
* Poll event queue
*
* @v ibdev Infiniband device
*/
void ib_poll_eq ( struct ib_device *ibdev ) {
struct ib_completion_queue *cq;
/* Poll device's event queue */
ibdev->op->poll_eq ( ibdev );
/* Poll all completion queues */
list_for_each_entry ( cq, &ibdev->cqs, list )
ib_poll_cq ( ibdev, cq );
}
/**
* Single-step the Infiniband event queue
*
* @v process Infiniband event queue process
*/
static void ib_step ( struct process *process __unused ) {
struct ib_device *ibdev;
list_for_each_entry ( ibdev, &open_ib_devices, open_list )
ib_poll_eq ( ibdev );
}
/** Infiniband event queue process */
PERMANENT_PROCESS ( ib_process, ib_step );
/***************************************************************************
*
* Infiniband device creation/destruction
*
***************************************************************************
*/
/**
* Allocate Infiniband device
*
* @v priv_size Size of driver private data area
* @ret ibdev Infiniband device, or NULL
*/
struct ib_device * alloc_ibdev ( size_t priv_size ) {
struct ib_device *ibdev;
void *drv_priv;
size_t total_len;
total_len = ( sizeof ( *ibdev ) + priv_size );
ibdev = zalloc ( total_len );
if ( ibdev ) {
drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
ib_set_drvdata ( ibdev, drv_priv );
INIT_LIST_HEAD ( &ibdev->list );
INIT_LIST_HEAD ( &ibdev->open_list );
INIT_LIST_HEAD ( &ibdev->cqs );
INIT_LIST_HEAD ( &ibdev->qps );
ibdev->port_state = IB_PORT_STATE_DOWN;
ibdev->lid = IB_LID_NONE;
ibdev->pkey = IB_PKEY_DEFAULT;
}
return ibdev;
}
/**
* Register Infiniband device
*
* @v ibdev Infiniband device
* @ret rc Return status code
*/
int register_ibdev ( struct ib_device *ibdev ) {
struct ib_driver *driver;
int rc;
/* Record device index and create device name */
if ( ibdev->name[0] == '\0' ) {
snprintf ( ibdev->name, sizeof ( ibdev->name ), "inf%d",
ibdev_index );
}
ibdev->index = ++ibdev_index;
/* Add to device list */
ibdev_get ( ibdev );
list_add_tail ( &ibdev->list, &ib_devices );
DBGC ( ibdev, "IBDEV %s registered (phys %s)\n", ibdev->name,
ibdev->dev->name );
/* Probe device */
for_each_table_entry ( driver, IB_DRIVERS ) {
if ( ( rc = driver->probe ( ibdev ) ) != 0 ) {
DBGC ( ibdev, "IBDEV %s could not add %s device: %s\n",
ibdev->name, driver->name, strerror ( rc ) );
goto err_probe;
}
}
return 0;
err_probe:
for_each_table_entry_continue_reverse ( driver, IB_DRIVERS )
driver->remove ( ibdev );
list_del ( &ibdev->list );
ibdev_put ( ibdev );
return rc;
}
/**
* Unregister Infiniband device
*
* @v ibdev Infiniband device
*/
void unregister_ibdev ( struct ib_device *ibdev ) {
struct ib_driver *driver;
/* Remove device */
for_each_table_entry_reverse ( driver, IB_DRIVERS )
driver->remove ( ibdev );
/* Remove from device list */
list_del ( &ibdev->list );
ibdev_put ( ibdev );
DBGC ( ibdev, "IBDEV %s unregistered\n", ibdev->name );
/* Reset device index if no devices remain */
if ( list_empty ( &ib_devices ) )
ibdev_index = 0;
}
/**
* Find Infiniband device by GID
*
* @v gid GID
* @ret ibdev Infiniband device, or NULL
*/
struct ib_device * find_ibdev ( union ib_gid *gid ) {
struct ib_device *ibdev;
for_each_ibdev ( ibdev ) {
if ( memcmp ( gid, &ibdev->gid, sizeof ( *gid ) ) == 0 )
return ibdev;
}
return NULL;
}
/**
* Get most recently opened Infiniband device
*
* @ret ibdev Most recently opened Infiniband device, or NULL
*/
struct ib_device * last_opened_ibdev ( void ) {
struct ib_device *ibdev;
ibdev = list_first_entry ( &open_ib_devices, struct ib_device,
open_list );
if ( ! ibdev )
return NULL;
assert ( ibdev->open_count != 0 );
return ibdev;
}
/* Drag in objects via register_ibdev() */
REQUIRING_SYMBOL ( register_ibdev );
/* Drag in Infiniband configuration */
REQUIRE_OBJECT ( config_infiniband );