From: Doug Ledford <dledford@redhat.com> Date: Tue, 14 Apr 2009 15:23:42 -0400 Subject: [openib] add support for XRC queues Message-id: 1239737023-31222-16-git-send-email-dledford@redhat.com O-Subject: [Patch RHEL5.4 15/16] [InfiniBand] Add support for XRC queues Bugzilla: 476301 Signed-off-by: Doug Ledford <dledford@redhat.com> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 179f753..31ee171 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -299,6 +299,8 @@ int ib_register_device(struct ib_device *device) INIT_LIST_HEAD(&device->client_data_list); spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); + device->ib_uverbs_xrcd_table = RB_ROOT; + mutex_init(&device->xrcd_table_mutex); ret = read_port_table_lengths(device); if (ret) { diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index d62fcad..2b84289 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -133,6 +133,11 @@ struct ib_ucq_object { u32 async_events_reported; }; +struct ib_uxrcd_object { + struct ib_uobject uobject; + struct list_head xrc_reg_qp_list; +}; + extern spinlock_t ib_uverbs_idr_lock; extern struct idr ib_uverbs_pd_idr; extern struct idr ib_uverbs_mr_idr; @@ -141,6 +146,7 @@ extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +extern struct idr ib_uverbs_xrc_domain_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -160,6 +166,12 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, + void *context_ptr); +void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, + struct ib_xrcd *xrcd); +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, + struct ib_xrcd *xrcd, u32 qp_num); #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ @@ -194,5 +206,14 @@ IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); +IB_UVERBS_DECLARE_CMD(create_xrc_srq); +IB_UVERBS_DECLARE_CMD(open_xrc_domain); +IB_UVERBS_DECLARE_CMD(close_xrc_domain); +IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp); +IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp); + #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 56feab6..fe34627 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -37,6 +37,7 @@ #include <linux/fs.h> #include <asm/uaccess.h> +#include <asm/fcntl.h> #include "uverbs.h" @@ -254,6 +255,20 @@ static void put_srq_read(struct ib_srq *srq) put_uobj_read(srq->uobject); } +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, + struct ib_ucontext *context, + struct ib_uobject **uobj) +{ + *uobj = idr_read_uobj(&ib_uverbs_xrc_domain_idr, xrcd_handle, + context, 0); + return *uobj ? (*uobj)->object : NULL; +} + +static void put_xrcd_read(struct ib_uobject *uobj) +{ + put_uobj_read(uobj); +} + ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -297,6 +312,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); + INIT_LIST_HEAD(&ucontext->xrc_domain_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; @@ -1026,6 +1042,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_qp *qp; struct ib_qp_init_attr attr; + struct ib_xrcd *xrcd; + struct ib_uobject *xrcd_uobj; int ret; if (out_len < sizeof resp) @@ -1045,17 +1063,22 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); down_write(&obj->uevent.uobject.mutex); - srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; + srq = (cmd.is_srq && cmd.qp_type != IB_QPT_XRC) ? + idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; + xrcd = cmd.qp_type == IB_QPT_XRC ? + idr_read_xrcd(cmd.srq_handle, file->ucontext, &xrcd_uobj) : NULL; pd = idr_read_pd(cmd.pd_handle, file->ucontext); scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); - if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { + if (!pd || !scq || !rcq || (cmd.is_srq && !srq) || + (cmd.qp_type == IB_QPT_XRC && !xrcd)) { ret = -EINVAL; goto err_put; } + attr.create_flags = 0; attr.event_handler = ib_uverbs_qp_event_handler; attr.qp_context = file; attr.send_cq = scq; @@ -1063,6 +1086,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, attr.srq = srq; attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd.qp_type; + attr.xrc_domain = xrcd; attr.create_flags = 0; attr.cap.max_send_wr = cmd.max_send_wr; @@ -1090,11 +1114,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; + qp->xrcd = attr.xrc_domain; atomic_inc(&pd->usecnt); atomic_inc(&attr.send_cq->usecnt); atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); + else if (attr.xrc_domain) + atomic_inc(&attr.xrc_domain->usecnt); obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); @@ -1122,6 +1149,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, put_cq_read(rcq); if (srq) put_srq_read(srq); + if (xrcd) + put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); @@ -1148,6 +1177,8 @@ err_put: put_cq_read(rcq); if (srq) put_srq_read(srq); + if (xrcd) + put_xrcd_read(xrcd_uobj); put_uobj_write(&obj->uevent.uobject); return ret; @@ -2000,6 +2031,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, srq->uobject = &obj->uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; + srq->xrc_cq = NULL; + srq->xrcd = NULL; atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); @@ -2045,6 +2078,137 @@ err: return ret; } +ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_xrc_srq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + struct ib_uevent_object *obj; + struct ib_pd *pd; + struct ib_srq *srq; + struct ib_cq *xrc_cq; + struct ib_xrcd *xrcd; + struct ib_srq_init_attr attr; + struct ib_uobject *xrcd_uobj; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, + &srq_lock_key); + down_write(&obj->uobject.mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err; + } + + xrc_cq = idr_read_cq(cmd.xrc_cq, file->ucontext, 0); + if (!xrc_cq) { + ret = -EINVAL; + goto err_put_pd; + } + + xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_put_cq; + } + + + attr.event_handler = ib_uverbs_srq_event_handler; + attr.srq_context = file; + attr.attr.max_wr = cmd.max_wr; + attr.attr.max_sge = cmd.max_sge; + attr.attr.srq_limit = cmd.srq_limit; + + obj->events_reported = 0; + INIT_LIST_HEAD(&obj->event_list); + + srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, &attr, &udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_put; + } + + srq->device = pd->device; + srq->pd = pd; + srq->uobject = &obj->uobject; + srq->event_handler = attr.event_handler; + srq->srq_context = attr.srq_context; + srq->xrc_cq = xrc_cq; + srq->xrcd = xrcd; + atomic_inc(&pd->usecnt); + atomic_inc(&xrc_cq->usecnt); + atomic_inc(&xrcd->usecnt); + + atomic_set(&srq->usecnt, 0); + + obj->uobject.object = srq; + ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); + if (ret) + goto err_destroy; + + memset(&resp, 0, sizeof resp); + resp.srq_handle = obj->uobject.id; + resp.max_wr = attr.attr.max_wr; + resp.max_sge = attr.attr.max_sge; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + put_xrcd_read(xrcd_uobj); + put_cq_read(xrc_cq); + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); + mutex_unlock(&file->mutex); + + obj->uobject.live = 1; + + up_write(&obj->uobject.mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); + +err_destroy: + ib_destroy_srq(srq); + +err_put: + put_xrcd_read(xrcd_uobj); + +err_put_cq: + put_cq_read(xrc_cq); + +err_put_pd: + put_pd_read(pd); + +err: + put_uobj_write(&obj->uobject); + return ret; +} + ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -2163,3 +2327,695 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return ret ? ret : in_len; } + +static struct inode *xrc_file2inode(struct file *f) +{ + return f->f_dentry->d_inode; +} + +struct xrcd_table_entry { + struct rb_node node; + struct inode *inode; + struct ib_xrcd *xrcd; +}; + +static int xrcd_table_insert(struct ib_device *dev, + struct inode *i_n, + struct ib_xrcd *xrcd) +{ + struct xrcd_table_entry *entry, *scan; + struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node; + struct rb_node *parent = NULL; + + entry = kmalloc(sizeof(struct xrcd_table_entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->inode = i_n; + entry->xrcd = xrcd; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (i_n < scan->inode) + p = &(*p)->rb_left; + else if (i_n > scan->inode) + p = &(*p)->rb_right; + else { + kfree(entry); + return -EEXIST; + } + } + + rb_link_node(&entry->node, parent, p); + rb_insert_color(&entry->node, &dev->ib_uverbs_xrcd_table); + igrab(i_n); + return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_device *dev, + struct inode *i_n) +{ + struct xrcd_table_entry *scan; + struct rb_node **p = &dev->ib_uverbs_xrcd_table.rb_node; + struct rb_node *parent = NULL; + + while (*p) { + parent = *p; + scan = rb_entry(parent, struct xrcd_table_entry, node); + + if (i_n < scan->inode) + p = &(*p)->rb_left; + else if (i_n > scan->inode) + p = &(*p)->rb_right; + else + return scan; + } + return NULL; +} + +static int find_xrcd(struct ib_device *dev, struct inode *i_n, + struct ib_xrcd **xrcd) +{ + struct xrcd_table_entry *entry; + + entry = xrcd_table_search(dev, i_n); + if (!entry) + return -EINVAL; + + *xrcd = entry->xrcd; + return 0; +} + + +static void xrcd_table_delete(struct ib_device *dev, + struct inode *i_n) +{ + struct xrcd_table_entry *entry = xrcd_table_search(dev, i_n); + + if (entry) { + iput(i_n); + rb_erase(&entry->node, &dev->ib_uverbs_xrcd_table); + kfree(entry); + } +} + +ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_open_xrc_domain cmd; + struct ib_uverbs_open_xrc_domain_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_uxrcd_object *xrcd_uobj; + struct ib_xrcd *xrcd = NULL; + struct file *f = NULL; + struct inode *inode = NULL; + int ret = 0; + int new_xrcd = 0; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + if (cmd.fd != (u32) (-1)) { + /* search for file descriptor */ + f = fget(cmd.fd); + if (!f) { + ret = -EBADF; + goto err_table_mutex_unlock; + } + + inode = xrc_file2inode(f); + if (!inode) { + ret = -EBADF; + goto err_table_mutex_unlock; + } + + ret = find_xrcd(file->device->ib_dev, inode, &xrcd); + if (ret && !(cmd.oflags & O_CREAT)) { + /* no file descriptor. Need CREATE flag */ + ret = -EAGAIN; + goto err_table_mutex_unlock; + } + + if (xrcd && cmd.oflags & O_EXCL) { + ret = -EINVAL; + goto err_table_mutex_unlock; + } + } + + xrcd_uobj = kmalloc(sizeof *xrcd_uobj, GFP_KERNEL); + if (!xrcd_uobj) { + ret = -ENOMEM; + goto err_table_mutex_unlock; + } + + uobj = &xrcd_uobj->uobject; + init_uobj(uobj, 0, file->ucontext, &pd_lock_key); + down_write(&uobj->mutex); + + if (!xrcd) { + xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(xrcd)) { + ret = PTR_ERR(xrcd); + goto err; + } + xrcd->uobject = (cmd.fd == -1) ? uobj : NULL; + xrcd->inode = inode; + xrcd->device = file->device->ib_dev; + atomic_set(&xrcd->usecnt, 0); + new_xrcd = 1; + } + + uobj->object = xrcd; + ret = idr_add_uobj(&ib_uverbs_xrc_domain_idr, uobj); + if (ret) + goto err_idr; + + memset(&resp, 0, sizeof resp); + resp.xrcd_handle = uobj->id; + + if (inode) { + if (new_xrcd) { + /* create new inode/xrcd table entry */ + ret = xrcd_table_insert(file->device->ib_dev, inode, xrcd); + if (ret) + goto err_insert_xrcd; + } + atomic_inc(&xrcd->usecnt); + } + if (f) + fput(f); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_copy; + } + + INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->xrc_domain_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + return in_len; + +err_copy: + + if (inode) { + if (new_xrcd) + xrcd_table_delete(file->device->ib_dev, inode); + atomic_dec(&xrcd->usecnt); + } + +err_insert_xrcd: + idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); + +err_idr: + ib_dealloc_xrcd(xrcd); + +err: + put_uobj_write(uobj); + +err_table_mutex_unlock: + + if (f) + fput(f); + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + return ret; +} + +ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_close_xrc_domain cmd; + struct ib_uobject *uobj, *t_uobj; + struct ib_uxrcd_object *xrcd_uobj; + struct ib_xrcd *xrcd = NULL; + struct inode *inode = NULL; + int ret = 0; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + uobj = idr_write_uobj(&ib_uverbs_xrc_domain_idr, cmd.xrcd_handle, + file->ucontext); + if (!uobj) { + ret = -EINVAL; + goto err_unlock_mutex; + } + + mutex_lock(&file->mutex); + if (!ret) { + list_for_each_entry(t_uobj, &file->ucontext->qp_list, list) { + struct ib_qp *qp = t_uobj->object; + if (qp->xrcd && qp->xrcd == uobj->object) { + ret = -EBUSY; + break; + } + } + } + if (!ret) { + list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) { + struct ib_srq *srq = t_uobj->object; + if (srq->xrcd && srq->xrcd == uobj->object) { + ret = -EBUSY; + break; + } + } + } + mutex_unlock(&file->mutex); + if (ret) { + put_uobj_write(uobj); + goto err_unlock_mutex; + } + + xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); + if (!list_empty(&xrcd_uobj->xrc_reg_qp_list)) { + ret = -EBUSY; + put_uobj_write(uobj); + goto err_unlock_mutex; + } + + xrcd = (struct ib_xrcd *) (uobj->object); + inode = xrcd->inode; + + if (inode) + atomic_dec(&xrcd->usecnt); + + ret = ib_dealloc_xrcd(uobj->object); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret && !inode) + goto err_unlock_mutex; + + if (!ret && inode) + xrcd_table_delete(file->device->ib_dev, inode); + + idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + return in_len; + +err_unlock_mutex: + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev, + struct ib_xrcd *xrcd) +{ + struct inode *inode = NULL; + int ret = 0; + + inode = xrcd->inode; + if (inode) + atomic_dec(&xrcd->usecnt); + + ret = ib_dealloc_xrcd(xrcd); + if (!ret && inode) + xrcd_table_delete(ib_dev, inode); +} + +ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_xrc_rcv_qp cmd; + struct ib_uverbs_create_xrc_rcv_qp_resp resp; + struct ib_uxrc_rcv_object *obj; + struct ib_qp_init_attr init_attr; + struct ib_xrcd *xrcd; + struct ib_uobject *uobj; + struct ib_uxrcd_object *xrcd_uobj; + u32 qp_num; + int err; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + obj = kzalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); + if (!xrcd) { + err = -EINVAL; + goto err_out; + } + + init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler; + init_attr.qp_context = file; + init_attr.srq = NULL; + init_attr.sq_sig_type = + cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + init_attr.qp_type = IB_QPT_XRC; + init_attr.xrc_domain = xrcd; + + init_attr.cap.max_send_wr = 1; + init_attr.cap.max_recv_wr = 0; + init_attr.cap.max_send_sge = 1; + init_attr.cap.max_recv_sge = 0; + init_attr.cap.max_inline_data = 0; + + err = xrcd->device->create_xrc_rcv_qp(&init_attr, &qp_num); + if (err) + goto err_put; + + memset(&resp, 0, sizeof resp); + resp.qpn = qp_num; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + err = -EFAULT; + goto err_destroy; + } + + atomic_inc(&xrcd->usecnt); + put_xrcd_read(uobj); + obj->qp_num = qp_num; + obj->domain_handle = cmd.xrc_domain_handle; + xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + list_add_tail(&obj->list, &xrcd_uobj->xrc_reg_qp_list); + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + + return in_len; + +err_destroy: + xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); +err_put: + put_xrcd_read(uobj); +err_out: + kfree(obj); + return err; +} + +ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_xrc_rcv_qp cmd; + struct ib_qp_attr *attr; + struct ib_xrcd *xrcd; + struct ib_uobject *uobj; + int err; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kzalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); + if (!xrcd) { + kfree(attr); + return -EINVAL; + } + + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->port_num = cmd.port_num; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->alt_pkey_index = cmd.alt_pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask); + put_xrcd_read(uobj); + kfree(attr); + return err ? err : in_len; +} + +ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_query_xrc_rcv_qp cmd; + struct ib_uverbs_query_qp_resp resp; + struct ib_qp_attr *attr; + struct ib_qp_init_attr *init_attr; + struct ib_xrcd *xrcd; + struct ib_uobject *uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto out; + } + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); + if (!xrcd) { + ret = -EINVAL; + goto out; + } + + ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr, + cmd.attr_mask, init_attr); + + put_xrcd_read(uobj); + + if (ret) + goto out; + + memset(&resp, 0, sizeof resp); + resp.qp_state = attr->qp_state; + resp.cur_qp_state = attr->cur_qp_state; + resp.path_mtu = attr->path_mtu; + resp.path_mig_state = attr->path_mig_state; + resp.qkey = attr->qkey; + resp.rq_psn = attr->rq_psn; + resp.sq_psn = attr->sq_psn; + resp.dest_qp_num = attr->dest_qp_num; + resp.qp_access_flags = attr->qp_access_flags; + resp.pkey_index = attr->pkey_index; + resp.alt_pkey_index = attr->alt_pkey_index; + resp.sq_draining = attr->sq_draining; + resp.max_rd_atomic = attr->max_rd_atomic; + resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; + resp.min_rnr_timer = attr->min_rnr_timer; + resp.port_num = attr->port_num; + resp.timeout = attr->timeout; + resp.retry_cnt = attr->retry_cnt; + resp.rnr_retry = attr->rnr_retry; + resp.alt_port_num = attr->alt_port_num; + resp.alt_timeout = attr->alt_timeout; + + memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); + resp.dest.flow_label = attr->ah_attr.grh.flow_label; + resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; + resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; + resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; + resp.dest.dlid = attr->ah_attr.dlid; + resp.dest.sl = attr->ah_attr.sl; + resp.dest.src_path_bits = attr->ah_attr.src_path_bits; + resp.dest.static_rate = attr->ah_attr.static_rate; + resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); + resp.dest.port_num = attr->ah_attr.port_num; + + memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); + resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; + resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; + resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; + resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; + resp.alt_dest.dlid = attr->alt_ah_attr.dlid; + resp.alt_dest.sl = attr->alt_ah_attr.sl; + resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; + resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; + resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); + resp.alt_dest.port_num = attr->alt_ah_attr.port_num; + + resp.max_send_wr = init_attr->cap.max_send_wr; + resp.max_recv_wr = init_attr->cap.max_recv_wr; + resp.max_send_sge = init_attr->cap.max_send_sge; + resp.max_recv_sge = init_attr->cap.max_recv_sge; + resp.max_inline_data = init_attr->cap.max_inline_data; + resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + kfree(attr); + kfree(init_attr); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_xrc_rcv_qp cmd; + struct ib_uxrc_rcv_object *qp_obj, *tmp; + struct ib_xrcd *xrcd; + struct ib_uobject *uobj; + struct ib_uxrcd_object *xrcd_uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + qp_obj = kmalloc(sizeof *qp_obj, GFP_KERNEL); + if (!qp_obj) + return -ENOMEM; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); + if (!xrcd) { + ret = -EINVAL; + goto err_out; + } + + ret = xrcd->device->reg_xrc_rcv_qp(xrcd, file, cmd.qp_num); + if (ret) + goto err_put; + + xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + list_for_each_entry(tmp, &xrcd_uobj->xrc_reg_qp_list, list) + if (cmd.qp_num == tmp->qp_num) { + kfree(qp_obj); + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + put_xrcd_read(uobj); + return in_len; + } + qp_obj->qp_num = cmd.qp_num; + qp_obj->domain_handle = cmd.xrc_domain_handle; + list_add_tail(&qp_obj->list, &xrcd_uobj->xrc_reg_qp_list); + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + atomic_inc(&xrcd->usecnt); + put_xrcd_read(uobj); + return in_len; + +err_put: + put_xrcd_read(uobj); +err_out: + + kfree(qp_obj); + return ret; +} + +int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file, + struct ib_xrcd *xrcd, u32 qp_num) +{ + int err; + err = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, qp_num); + if (!err) + atomic_dec(&xrcd->usecnt); + return err; +} + +ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_unreg_xrc_rcv_qp cmd; + struct ib_uxrc_rcv_object *qp_obj, *tmp; + struct ib_xrcd *xrcd; + struct ib_uobject *uobj; + struct ib_uxrcd_object *xrcd_uobj; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &uobj); + if (!xrcd) + return -EINVAL; + + ret = xrcd->device->unreg_xrc_rcv_qp(xrcd, file, cmd.qp_num); + if (ret) { + put_xrcd_read(uobj); + return -EINVAL; + } + atomic_dec(&xrcd->usecnt); + + xrcd_uobj = container_of(uobj, struct ib_uxrcd_object, uobject); + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + list_for_each_entry_safe(qp_obj, tmp, &xrcd_uobj->xrc_reg_qp_list, list) + if (cmd.qp_num == qp_obj->qp_num) { + list_del(&qp_obj->list); + kfree(qp_obj); + break; + } + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + put_xrcd_read(uobj); + return in_len; +} diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 68b75e7..24c01b6 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,6 +72,7 @@ DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); +DEFINE_IDR(ib_uverbs_xrc_domain_idr); static spinlock_t map_lock; static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES]; @@ -108,6 +109,14 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq, + [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = ib_uverbs_open_xrc_domain, + [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = ib_uverbs_close_xrc_domain, + [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp, + [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp, + [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp, + [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp, + [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp, }; static struct vfsmount *uverbs_event_mnt; @@ -211,17 +220,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uqp); } - list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { - struct ib_cq *cq = uobj->object; - struct ib_uverbs_event_file *ev_file = cq->cq_context; - struct ib_ucq_object *ucq = - container_of(uobj, struct ib_ucq_object, uobject); - - idr_remove_uobj(&ib_uverbs_cq_idr, uobj); - ib_destroy_cq(cq); - ib_uverbs_release_ucq(file, ev_file, ucq); - kfree(ucq); - } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; @@ -234,6 +232,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uevent); } + list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { + struct ib_cq *cq = uobj->object; + struct ib_uverbs_event_file *ev_file = cq->cq_context; + struct ib_ucq_object *ucq = + container_of(uobj, struct ib_ucq_object, uobject); + + idr_remove_uobj(&ib_uverbs_cq_idr, uobj); + ib_destroy_cq(cq); + ib_uverbs_release_ucq(file, ev_file, ucq); + kfree(ucq); + } + /* XXX Free MWs */ list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { @@ -244,6 +254,27 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + mutex_lock(&file->device->ib_dev->xrcd_table_mutex); + list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) { + struct ib_xrcd *xrcd = uobj->object; + struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1; + struct ib_uxrcd_object *xrcd_uobj = + container_of(uobj, struct ib_uxrcd_object, uobject); + + list_for_each_entry_safe(xrc_qp_obj, tmp1, + &xrcd_uobj->xrc_reg_qp_list, list) { + list_del(&xrc_qp_obj->list); + ib_uverbs_cleanup_xrc_rcv_qp(file, xrcd, + xrc_qp_obj->qp_num); + kfree(xrc_qp_obj); + } + + idr_remove_uobj(&ib_uverbs_xrc_domain_idr, uobj); + ib_uverbs_dealloc_xrcd(file->device->ib_dev, xrcd); + kfree(uobj); + } + mutex_unlock(&file->device->ib_dev->xrcd_table_mutex); + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; @@ -489,6 +520,13 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } +void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event, + void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num, + event->event, NULL, NULL); +} + struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async, int *fd) { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a7da9be..41dddfa 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -234,6 +234,8 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->uobject = NULL; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; + srq->xrc_cq = NULL; + srq->xrcd = NULL; atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); } @@ -242,6 +244,36 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, } EXPORT_SYMBOL(ib_create_srq); +struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd, + struct ib_cq *xrc_cq, + struct ib_xrcd *xrcd, + struct ib_srq_init_attr *srq_init_attr) +{ + struct ib_srq *srq; + + if (!pd->device->create_xrc_srq) + return ERR_PTR(-ENOSYS); + + srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, srq_init_attr, NULL); + + if (!IS_ERR(srq)) { + srq->device = pd->device; + srq->pd = pd; + srq->uobject = NULL; + srq->event_handler = srq_init_attr->event_handler; + srq->srq_context = srq_init_attr->srq_context; + srq->xrc_cq = xrc_cq; + srq->xrcd = xrcd; + atomic_inc(&pd->usecnt); + atomic_inc(&xrcd->usecnt); + atomic_inc(&xrc_cq->usecnt); + atomic_set(&srq->usecnt, 0); + } + + return srq; +} +EXPORT_SYMBOL(ib_create_xrc_srq); + int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) @@ -263,16 +295,25 @@ EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; + struct ib_cq *xrc_cq; + struct ib_xrcd *xrcd; int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; pd = srq->pd; + xrc_cq = srq->xrc_cq; + xrcd = srq->xrcd; ret = srq->device->destroy_srq(srq); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + if (xrc_cq) + atomic_dec(&xrc_cq->usecnt); + if (xrcd) + atomic_dec(&xrcd->usecnt); + } return ret; } @@ -297,11 +338,15 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; qp->qp_type = qp_init_attr->qp_type; + qp->xrcd = qp->qp_type == IB_QPT_XRC ? + qp_init_attr->xrc_domain : NULL; atomic_inc(&pd->usecnt); atomic_inc(&qp_init_attr->send_cq->usecnt); atomic_inc(&qp_init_attr->recv_cq->usecnt); if (qp_init_attr->srq) atomic_inc(&qp_init_attr->srq->usecnt); + if (qp->qp_type == IB_QPT_XRC) + atomic_inc(&qp->xrcd->usecnt); } return qp; @@ -327,6 +372,9 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -349,6 +397,9 @@ static const struct { [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), + [IB_QPT_XRC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -368,6 +419,12 @@ static const struct { IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | @@ -378,6 +435,9 @@ static const struct { [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), + [IB_QPT_XRC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -398,6 +458,11 @@ static const struct { IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), + [IB_QPT_XRC] = (IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, @@ -413,6 +478,11 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -437,6 +507,11 @@ static const struct { IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), + [IB_QPT_XRC] = (IB_QP_CUR_STATE | + IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE | + IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -449,6 +524,7 @@ static const struct { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, + [IB_QPT_XRC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } @@ -471,6 +547,11 @@ static const struct { IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | @@ -499,6 +580,18 @@ static const struct { IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), + [IB_QPT_XRC] = (IB_QP_PORT | + IB_QP_AV | + IB_QP_TIMEOUT | + IB_QP_RETRY_CNT | + IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_MIN_RNR_TIMER | + IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | @@ -583,12 +676,15 @@ int ib_destroy_qp(struct ib_qp *qp) struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; + struct ib_xrcd *xrcd; + enum ib_qp_type qp_type = qp->qp_type; int ret; pd = qp->pd; scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; + xrcd = qp->xrcd; ret = qp->device->destroy_qp(qp); if (!ret) { @@ -597,6 +693,8 @@ int ib_destroy_qp(struct ib_qp *qp) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); + if (qp_type == IB_QPT_XRC) + atomic_dec(&xrcd->usecnt); } return ret; @@ -904,3 +1002,32 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) return qp->device->detach_mcast(qp, gid, lid); } EXPORT_SYMBOL(ib_detach_mcast); + +int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + if (atomic_read(&xrcd->usecnt)) + return -EBUSY; + + return xrcd->device->dealloc_xrcd(xrcd); +} +EXPORT_SYMBOL(ib_dealloc_xrcd); + +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) +{ + struct ib_xrcd *xrcd; + + if (!device->alloc_xrcd) + return ERR_PTR(-ENOSYS); + + xrcd = device->alloc_xrcd(device, NULL, NULL); + if (!IS_ERR(xrcd)) { + xrcd->device = device; + xrcd->inode = NULL; + xrcd->uobject = NULL; + atomic_set(&xrcd->usecnt, 0); + } + return xrcd; +} +EXPORT_SYMBOL(ib_alloc_xrcd); + + diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index ed68324..8008ab8 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -33,6 +33,7 @@ #include <linux/mlx4/cq.h> #include <linux/mlx4/qp.h> +#include <linux/mlx4/srq.h> #include "mlx4_ib.h" #include "user.h" @@ -179,7 +180,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector return ERR_PTR(-EINVAL); } - cq = kmalloc(sizeof *cq, GFP_KERNEL); + cq = kzalloc(sizeof *cq, GFP_KERNEL); if (!cq) return ERR_PTR(-ENOMEM); @@ -553,9 +554,11 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_qp *mqp; struct mlx4_ib_wq *wq; struct mlx4_ib_srq *srq; + struct mlx4_srq *msrq; int is_send; int is_error; u32 g_mlpath_rqpn; + int is_xrc_recv = 0; u16 wqe_ctr; repoll: @@ -597,7 +600,24 @@ repoll: goto repoll; } - if (!*cur_qp || + if ((be32_to_cpu(cqe->vlan_my_qpn) & (1 << 23)) && !is_send) { + /* + * We do not have to take the XRC SRQ table lock here, + * because CQs will be locked while XRC SRQs are removed + * from the table. + */ + msrq = __mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, + be32_to_cpu(cqe->g_mlpath_rqpn) & + 0xffffff); + if (unlikely(!msrq)) { + printk(KERN_WARNING "CQ %06x with entry for unknown " + "XRC SRQ %06x\n", cq->mcq.cqn, + be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff); + return -EINVAL; + } + is_xrc_recv = 1; + srq = to_mibsrq(msrq); + } else if (!*cur_qp || (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { /* * We do not have to take the QP table lock here, @@ -615,7 +635,7 @@ repoll: *cur_qp = to_mibqp(mqp); } - wc->qp = &(*cur_qp)->ibqp; + wc->qp = is_xrc_recv ? NULL: &(*cur_qp)->ibqp; if (is_send) { wq = &(*cur_qp)->sq; @@ -625,6 +645,10 @@ repoll: } wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; ++wq->tail; + } else if (is_xrc_recv) { + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else if ((*cur_qp)->ibqp.srq) { srq = to_msrq((*cur_qp)->ibqp.srq); wqe_ctr = be16_to_cpu(cqe->wqe_index); @@ -764,6 +788,10 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) int nfreed = 0; struct mlx4_cqe *cqe, *dest; u8 owner_bit; + int is_xrc_srq = 0; + + if (srq && srq->ibsrq.xrc_cq) + is_xrc_srq = 1; /* * First we need to find the current producer index, so we @@ -782,7 +810,10 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); - if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { + if (((be32_to_cpu(cqe->vlan_my_qpn) & 0xffffff) == qpn) || + (is_xrc_srq && + (be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff) == + srq->msrq.srqn)) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4a9dfc2..d5e1d15 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -120,6 +120,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY) props->max_raw_ethy_qp = dev->ib_dev.phys_port_cnt; @@ -432,7 +434,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, struct mlx4_ib_pd *pd; int err; - pd = kmalloc(sizeof *pd, GFP_KERNEL); + pd = kzalloc(sizeof *pd, GFP_KERNEL); if (!pd) return ERR_PTR(-ENOMEM); @@ -474,6 +476,80 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) &to_mqp(ibqp)->mqp, gid->raw); } +static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context) +{ +} + +static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_xrcd *xrcd; + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + struct ib_pd *pd; + struct ib_cq *cq; + int err; + + if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn); + if (err) + goto err_xrcd; + + pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL); + if (IS_ERR(pd)) { + err = PTR_ERR(pd); + goto err_pd; + } + pd->device = ibdev; + + cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL); + if (IS_ERR(cq)) { + err = PTR_ERR(cq); + goto err_cq; + } + cq->device = ibdev; + cq->comp_handler = mlx4_dummy_comp_handler; + + if (context) + if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof(__u32))) { + err = -EFAULT; + goto err_copy; + } + + xrcd->cq = cq; + xrcd->pd = pd; + return &xrcd->ibxrcd; + +err_copy: + mlx4_ib_destroy_cq(cq); +err_cq: + mlx4_ib_dealloc_pd(pd); +err_pd: + mlx4_xrcd_free(mdev->dev, xrcd->xrcdn); +err_xrcd: + kfree(xrcd); + return ERR_PTR(err); +} + +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd); + + mlx4_ib_destroy_cq(mxrcd->cq); + mlx4_ib_dealloc_pd(mxrcd->pd); + mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + kfree(xrcd); + + return 0; +} + + static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; @@ -792,12 +868,32 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { + ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq; + ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; + ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; + ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp; + ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp; + ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp; + ibdev->ib_dev.reg_xrc_rcv_qp = mlx4_ib_reg_xrc_rcv_qp; + ibdev->ib_dev.unreg_xrc_rcv_qp = mlx4_ib_unreg_xrc_rcv_qp; + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_XRC_SRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN) | + (1ull << IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP) | + (1ull << IB_USER_VERBS_CMD_REG_XRC_RCV_QP) | + (1ull << IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP); + } if (init_node_data(ibdev)) goto err_map; spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); + mutex_init(&ibdev->xrc_reg_mutex); if (ib_register_device(&ibdev->ib_dev)) goto err_map; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 10a23e9..8c5263f 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -82,6 +82,13 @@ struct mlx4_ib_pd { u32 pdn; }; +struct mlx4_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; + struct ib_pd *pd; + struct ib_cq *cq; +}; + struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; @@ -135,6 +142,7 @@ struct mlx4_ib_wq { enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = 1 << 0, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + MLX4_IB_XRC_RCV = 1 << 2, }; struct mlx4_ib_qp { @@ -157,6 +165,8 @@ struct mlx4_ib_qp { int buf_size; struct mutex mutex; u32 flags; + struct list_head xrc_reg_list; + u16 xrcdn; u8 port; u8 alt_port; u8 atomic_rd_en; @@ -200,6 +210,7 @@ struct mlx4_ib_dev { spinlock_t sm_lock; struct mutex cap_mask_mutex; + struct mutex xrc_reg_mutex; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) @@ -217,6 +228,11 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) return container_of(ibpd, struct mlx4_ib_pd, ibpd); } +static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); +} + static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx4_ib_cq, ibcq); @@ -301,6 +317,11 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); +struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd, + struct ib_cq *xrc_cq, + struct ib_xrcd *xrcd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); @@ -337,6 +358,16 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova); int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); +int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr, + u32 *qp_num); +int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num, + struct ib_qp_attr *attr, int attr_mask); +int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num, + struct ib_qp_attr *attr, int attr_mask, + struct ib_qp_init_attr *init_attr); +int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num); +int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num); + static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c5a2fc3..d75019f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -58,6 +58,12 @@ enum { MLX4_IB_MAX_RAW_ETY_HDR_SIZE = 12 }; + +struct mlx4_ib_xrc_reg_entry { + struct list_head list; + void *context; +}; + struct mlx4_ib_sqp { struct mlx4_ib_qp qp; int pkey_index; @@ -207,14 +213,15 @@ static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind) static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) { struct ib_event event; - struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; + struct mlx4_ib_qp *mqp = to_mibqp(qp); + struct ib_qp *ibqp = &mqp->ibqp; + struct mlx4_ib_xrc_reg_entry *ctx_entry; if (type == MLX4_EVENT_TYPE_PATH_MIG) to_mibqp(qp)->port = to_mibqp(qp)->alt_port; if (ibqp->event_handler) { event.device = ibqp->device; - event.element.qp = ibqp; switch (type) { case MLX4_EVENT_TYPE_PATH_MIG: event.event = IB_EVENT_PATH_MIG; @@ -246,6 +253,15 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) return; } + if (unlikely(ibqp->qp_type == IB_QPT_XRC && + mqp->flags & MLX4_IB_XRC_RCV)) { + event.event |= IB_XRC_QP_EVENT_FLAG; + event.element.xrc_qp_num = ibqp->qp_num; + list_for_each_entry(ctx_entry, &mqp->xrc_reg_list, list) + ibqp->event_handler(&event, ctx_entry->context); + return; + } + event.element.qp = ibqp; ibqp->event_handler(&event, ibqp->qp_context); } } @@ -266,6 +282,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) case IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); + case IB_QPT_XRC: case IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_atomic_seg) + @@ -293,7 +310,7 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_srq, struct mlx4_ib_qp *qp) + int is_user, int has_srq_or_is_xrc, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || @@ -303,7 +320,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, return -EINVAL; } - if (has_srq) { + if (has_srq_or_is_xrc) { /* QPs attached to an SRQ should have no RQ */ if (cap->max_recv_wr) { mlx4_ib_dbg("non-zero RQ size for QP using SRQ"); @@ -501,7 +518,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + !!init_attr->srq || !!init_attr->xrc_domain , qp); if (err) goto err; @@ -540,7 +558,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; } - if (!init_attr->srq) { + if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) { @@ -561,7 +579,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err; - if (!init_attr->srq) { + if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) { err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -607,6 +625,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_wrid; } + if (init_attr->qp_type == IB_QPT_XRC) + qp->mqp.qpn |= (1 << 23); + /* * Hardware wants QPN written in big-endian order (after * shifting) for send doorbell. Precompute this value to save @@ -620,7 +641,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, err_wrid: if (pd->uobject) { - if (!init_attr->srq) + if (!init_attr->srq && init_attr->qp_type != IB_QPT_XRC) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { @@ -638,7 +659,7 @@ err_buf: mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && !init_attr->srq) + if (!pd->uobject && !init_attr->srq && init_attr->qp_type != IB_QPT_XRC) mlx4_db_free(dev->dev, &qp->db); err: @@ -720,7 +741,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (!qp->ibqp.srq) + if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC) mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); ib_umem_release(qp->umem); @@ -728,7 +749,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, kfree(qp->sq.wrid); kfree(qp->rq.wrid); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); - if (!qp->ibqp.srq) + if (!qp->ibqp.srq && qp->ibqp.qp_type != IB_QPT_XRC) mlx4_db_free(dev->dev, &qp->db); } } @@ -755,6 +776,9 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { + case IB_QPT_XRC: + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_UD: @@ -769,6 +793,11 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(err); } + if (init_attr->qp_type == IB_QPT_XRC) + qp->xrcdn = to_mxrcd(init_attr->xrc_domain)->xrcdn; + else + qp->xrcdn = 0; + qp->ibqp.qp_num = qp->mqp.qpn; break; @@ -840,6 +869,7 @@ static int to_mlx4_st(enum ib_qp_type type) case IB_QPT_RC: return MLX4_QP_ST_RC; case IB_QPT_UC: return MLX4_QP_ST_UC; case IB_QPT_UD: return MLX4_QP_ST_UD; + case IB_QPT_XRC: return MLX4_QP_ST_XRC; case IB_QPT_RAW_ETY: case IB_QPT_SMI: case IB_QPT_GSI: return MLX4_QP_ST_MLX; @@ -992,8 +1022,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + if (ibqp->qp_type == IB_QPT_XRC) + context->xrcd = cpu_to_be32((u32) qp->xrcdn); + } if (qp->ibqp.uobject) context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); @@ -1121,7 +1154,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); - if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC && + cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && @@ -1214,7 +1248,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, qp->sq.head = 0; qp->sq.tail = 0; qp->sq_next_wqe = 0; - if (!ibqp->srq) + if (!ibqp->srq && ibqp->qp_type != IB_QPT_XRC) *qp->db.db = 0; } @@ -1692,6 +1726,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size = sizeof *ctrl / 16; switch (ibqp->qp_type) { + case IB_QPT_XRC: + ctrl->srcrb_flags |= + cpu_to_be32(wr->xrc_remote_srq_num << 8); + /* fall thru */ case IB_QPT_RC: case IB_QPT_UC: switch (wr->opcode) { @@ -2041,7 +2079,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr qp_attr->qp_access_flags = to_ib_qp_access_flags(be32_to_cpu(context.params2)); - if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC) { to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; @@ -2101,3 +2140,282 @@ out: return err; } +int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr, + u32 *qp_num) +{ + struct mlx4_ib_dev *dev = to_mdev(init_attr->xrc_domain->device); + struct mlx4_ib_xrcd *xrcd = to_mxrcd(init_attr->xrc_domain); + struct mlx4_ib_qp *qp; + struct ib_qp *ibqp; + struct mlx4_ib_xrc_reg_entry *ctx_entry; + int err; + + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return -ENOSYS; + + if (init_attr->qp_type != IB_QPT_XRC) + return -EINVAL; + + ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL); + if (!ctx_entry) + return -ENOMEM; + + qp = kzalloc(sizeof *qp, GFP_KERNEL); + if (!qp) { + kfree(ctx_entry); + return -ENOMEM; + } + qp->flags = MLX4_IB_XRC_RCV; + qp->xrcdn = to_mxrcd(init_attr->xrc_domain)->xrcdn; + INIT_LIST_HEAD(&qp->xrc_reg_list); + err = create_qp_common(dev, xrcd->pd, init_attr, NULL, 0, qp); + if (err) { + kfree(ctx_entry); + kfree(qp); + return err; + } + + ibqp = &qp->ibqp; + /* set the ibpq attributes which will be used by the mlx4 module */ + ibqp->qp_num = qp->mqp.qpn; + ibqp->device = init_attr->xrc_domain->device; + ibqp->pd = xrcd->pd; + ibqp->send_cq = ibqp->recv_cq = xrcd->cq; + ibqp->event_handler = init_attr->event_handler; + ibqp->qp_context = init_attr->qp_context; + ibqp->qp_type = init_attr->qp_type; + ibqp->xrcd = init_attr->xrc_domain; + + mutex_lock(&qp->mutex); + ctx_entry->context = init_attr->qp_context; + list_add_tail(&ctx_entry->list, &qp->xrc_reg_list); + mutex_unlock(&qp->mutex); + *qp_num = qp->mqp.qpn; + return 0; +} + +int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num, + struct ib_qp_attr *attr, int attr_mask) +{ + struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd); + struct mlx4_qp *mqp; + int err = -EINVAL; + + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return -ENOSYS; + + mutex_lock(&dev->xrc_reg_mutex); + mqp = __mlx4_qp_lookup(dev->dev, qp_num); + if (unlikely(!mqp)) { + printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: " + "unknown QPN %06x\n", qp_num); + goto err_out; + } + + if (xrcd->xrcdn != to_mxrcd(to_mibqp(mqp)->ibqp.xrcd)->xrcdn) + goto err_out; + + err = mlx4_ib_modify_qp(&(to_mibqp(mqp)->ibqp), attr, attr_mask, NULL); + mutex_unlock(&dev->xrc_reg_mutex); + return err; + +err_out: + mutex_unlock(&dev->xrc_reg_mutex); + return err; +} + +int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num, + struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device); + struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd); + struct mlx4_ib_qp *qp; + struct mlx4_qp *mqp; + struct mlx4_qp_context context; + int mlx4_state; + int err = -EINVAL; + + if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return -ENOSYS; + + mutex_lock(&dev->xrc_reg_mutex); + mqp = __mlx4_qp_lookup(dev->dev, qp_num); + if (unlikely(!mqp)) { + printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: " + "unknown QPN %06x\n", qp_num); + goto err_out; + } + + qp = to_mibqp(mqp); + if (xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn) + goto err_out; + + if (qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + err = mlx4_qp_query(dev->dev, mqp, &context); + if (err) + goto err_out; + + mlx4_state = be32_to_cpu(context.flags) >> 28; + + qp_attr->qp_state = to_ib_qp_state(mlx4_state); + qp_attr->path_mtu = context.mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context.qkey); + qp_attr->rq_psn = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context.next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context.remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context.params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_XRC) { + to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, + &context.alt_path); + qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f; + if (qp_attr->qp_state == IB_QPS_INIT) + qp_attr->port_num = qp->port; + else + qp_attr->port_num = context.pri_path.sched_queue & 0x40 ? 2 : 1; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = + 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context.params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context.pri_path.ackto >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context.params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7; + qp_attr->alt_timeout = context.alt_path.ackto >> 3; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = 0; + qp_attr->cap.max_recv_sge = 0; + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + qp_attr->cap.max_inline_data = 0; + qp_init_attr->cap = qp_attr->cap; + + mutex_unlock(&dev->xrc_reg_mutex); + return 0; + +err_out: + mutex_unlock(&dev->xrc_reg_mutex); + return err; +} + +int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num) +{ + + struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd); + + struct mlx4_qp *mqp; + struct mlx4_ib_qp *mibqp; + struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp; + int err = -EINVAL; + + mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex); + mqp = __mlx4_qp_lookup(to_mdev(xrcd->device)->dev, qp_num); + if (unlikely(!mqp)) { + printk(KERN_WARNING "mlx4_ib_reg_xrc_rcv_qp: " + "unknown QPN %06x\n", qp_num); + goto err_out; + } + + mibqp = to_mibqp(mqp); + + if (mxrcd->xrcdn != to_mxrcd(mibqp->ibqp.xrcd)->xrcdn) + goto err_out; + + ctx_entry = kmalloc(sizeof *ctx_entry, GFP_KERNEL); + if (!ctx_entry) { + err = -ENOMEM; + goto err_out; + } + + mutex_lock(&mibqp->mutex); + list_for_each_entry(tmp, &mibqp->xrc_reg_list, list) + if (tmp->context == context) { + mutex_unlock(&mibqp->mutex); + kfree(ctx_entry); + mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex); + return 0; + } + + ctx_entry->context = context; + list_add_tail(&ctx_entry->list, &mibqp->xrc_reg_list); + mutex_unlock(&mibqp->mutex); + mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex); + return 0; + +err_out: + mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex); + return err; +} + +int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num) +{ + + struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd); + + struct mlx4_qp *mqp; + struct mlx4_ib_qp *mibqp; + struct mlx4_ib_xrc_reg_entry *ctx_entry, *tmp; + int found = 0; + int err = -EINVAL; + + mutex_lock(&to_mdev(xrcd->device)->xrc_reg_mutex); + mqp = __mlx4_qp_lookup(to_mdev(xrcd->device)->dev, qp_num); + if (unlikely(!mqp)) { + printk(KERN_WARNING "mlx4_ib_unreg_xrc_rcv_qp: " + "unknown QPN %06x\n", qp_num); + goto err_out; + } + + mibqp = to_mibqp(mqp); + + if (mxrcd->xrcdn != (mibqp->xrcdn & 0xffff)) + goto err_out; + + mutex_lock(&mibqp->mutex); + list_for_each_entry_safe(ctx_entry, tmp, &mibqp->xrc_reg_list, list) + if (ctx_entry->context == context) { + found = 1; + list_del(&ctx_entry->list); + kfree(ctx_entry); + break; + } + + mutex_unlock(&mibqp->mutex); + if (!found) + goto err_out; + + /* destroy the QP if the registration list is empty */ + if (list_empty(&mibqp->xrc_reg_list)) + mlx4_ib_destroy_qp(&mibqp->ibqp); + + mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex); + return 0; + +err_out: + mutex_unlock(&to_mdev(xrcd->device)->xrc_reg_mutex); + return err; +} + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 688b05f..dcb91e6 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -67,13 +67,17 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) } } -struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd, + struct ib_cq *xrc_cq, + struct ib_xrcd *xrcd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_srq *srq; struct mlx4_wqe_srq_next_seg *next; + u32 cqn; + u16 xrcdn; int desc_size; int buf_size; int err; @@ -171,18 +175,24 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } } - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt, + cqn = xrc_cq ? (u32) (to_mcq(xrc_cq)->mcq.cqn) : 0; + xrcdn = xrcd ? (u16) (to_mxrcd(xrcd)->xrcdn) : + (u16) dev->dev->caps.reserved_xrcds; + + err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; srq->msrq.event = mlx4_ib_srq_event; - if (pd->uobject) + if (pd->uobject) { if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; goto err_wrid; } + } else + srq->ibsrq.xrc_srq_num = srq->msrq.srqn; init_attr->attr.max_wr = srq->msrq.max - 1; @@ -243,6 +253,13 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + return mlx4_ib_create_xrc_srq(pd, NULL, NULL, init_attr, udata); +} + int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) { struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); @@ -265,6 +282,18 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); + struct mlx4_ib_cq *cq; + + mlx4_srq_invalidate(dev->dev, &msrq->msrq); + + if (srq->xrc_cq && !srq->uobject) { + cq = to_mcq(srq->xrc_cq); + spin_lock_irq(&cq->lock); + __mlx4_ib_cq_clean(cq, -1, msrq); + mlx4_srq_remove(dev->dev, &msrq->msrq); + spin_unlock_irq(&cq->lock); + } else + mlx4_srq_remove(dev->dev, &msrq->msrq); mlx4_srq_free(dev->dev, &msrq->msrq); mlx4_mtt_cleanup(dev->dev, &msrq->mtt); diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index d3ecd67..5b96ef4 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -1,8 +1,9 @@ obj-$(CONFIG_MLX4_CORE) += mlx4_core.o mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \ - mr.o pd.o profile.o qp.o reset.o srq.o port.o sense.o + mr.o pd.o profile.o qp.o reset.o srq.o port.o sense.o xrcd.o obj-$(CONFIG_MLX4_EN) += mlx4_en.o -mlx4_en-y := en_main.o en_tx.o en_rx.o en_params.o en_port.o en_cq.o en_resources.o en_netdev.o en_frag.o +mlx4_en-y := en_main.o en_tx.o en_rx.o en_params.o en_port.o en_cq.o \ + en_resources.o en_netdev.o en_frag.o diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index fe2ff64..0cc1f20 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -195,6 +195,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 #define QUERY_DEV_CAP_RSVD_PD_OFFSET 0x64 #define QUERY_DEV_CAP_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66 +#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 @@ -305,6 +307,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); dev_cap->max_pds = 1 << (field & 0x3f); + MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); + dev_cap->reserved_xrcds = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); + dev_cap->max_xrcds = 1 << (field & 0x1f); + MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); dev_cap->rdmarc_entry_sz = size; MLX4_GET(size, outbox, QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET); diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h index cabcb87..39a3608 100644 --- a/drivers/net/mlx4/fw.h +++ b/drivers/net/mlx4/fw.h @@ -93,6 +93,8 @@ struct mlx4_dev_cap { int max_mcgs; int reserved_pds; int max_pds; + int reserved_xrcds; + int max_xrcds; int qpc_entry_sz; int rdmarc_entry_sz; int altc_entry_sz; diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 9358528..960618f 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -296,6 +296,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_lkey = dev_cap->reserved_lkey; dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; + dev->caps.reserved_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? + dev_cap->reserved_xrcds : 0; + dev->caps.max_xrcds = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ? + dev_cap->max_xrcds : 0; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = log_num_vlan; @@ -973,11 +977,18 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) goto err_kar_unmap; } + err = mlx4_init_xrcd_table(dev); + if (err) { + mlx4_err(dev, "Failed to initialize extended " + "reliably connected domain table, aborting.\n"); + goto err_pd_table_free; + } + err = mlx4_init_mr_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " "memory region table, aborting.\n"); - goto err_pd_table_free; + goto err_xrcd_table_free; } err = mlx4_init_eq_table(dev); @@ -1080,6 +1091,9 @@ err_eq_table_free: err_mr_table_free: mlx4_cleanup_mr_table(dev); +err_xrcd_table_free: + mlx4_cleanup_xrcd_table(dev); + err_pd_table_free: mlx4_cleanup_pd_table(dev); @@ -1324,6 +1338,7 @@ err_port: mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); mlx4_cleanup_uar_table(dev); @@ -1385,6 +1400,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) mlx4_cmd_use_polling(dev); mlx4_cleanup_eq_table(dev); mlx4_cleanup_mr_table(dev); + mlx4_cleanup_xrcd_table(dev); mlx4_cleanup_pd_table(dev); iounmap(priv->kar); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index f2ce940..c156a3b 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -228,7 +228,6 @@ struct mlx4_eq_table { struct mlx4_srq_table { struct mlx4_bitmap bitmap; spinlock_t lock; - struct radix_tree_root tree; struct mlx4_icm_table table; struct mlx4_icm_table cmpt_table; }; @@ -314,6 +313,7 @@ struct mlx4_priv { struct mlx4_cmd cmd; struct mlx4_bitmap pd_bitmap; + struct mlx4_bitmap xrcd_bitmap; struct mlx4_uar_table uar_table; struct mlx4_mr_table mr_table; struct mlx4_cq_table cq_table; @@ -354,6 +354,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap); int mlx4_reset(struct mlx4_dev *dev); int mlx4_init_pd_table(struct mlx4_dev *dev); +int mlx4_init_xrcd_table(struct mlx4_dev *dev); int mlx4_init_uar_table(struct mlx4_dev *dev); int mlx4_init_mr_table(struct mlx4_dev *dev); int mlx4_init_eq_table(struct mlx4_dev *dev); @@ -370,6 +371,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); +void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c index 9c9f1a2..c2499d5 100644 --- a/drivers/net/mlx4/qp.c +++ b/drivers/net/mlx4/qp.c @@ -283,6 +283,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We reserve 2 extra QPs per port for the special QPs. The * block of special QPs must be aligned to a multiple of 8, so * round up. + * We also reserve the MSB of the 24-bit QP number to indicate + * an XRC qp. */ dev->caps.sqp_start = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c index fe9f218..7a527f1 100644 --- a/drivers/net/mlx4/srq.c +++ b/drivers/net/mlx4/srq.c @@ -41,20 +41,20 @@ struct mlx4_srq_context { __be32 state_logsize_srqn; u8 logstride; - u8 reserved1[3]; - u8 pg_offset; - u8 reserved2[3]; - u32 reserved3; + u8 reserved1; + __be16 xrc_domain; + __be32 pg_offset_cqn; + u32 reserved2; u8 log_page_size; - u8 reserved4[2]; + u8 reserved3[2]; u8 mtt_base_addr_h; __be32 mtt_base_addr_l; __be32 pd; __be16 limit_watermark; __be16 wqe_cnt; - u16 reserved5; + u16 reserved4; __be16 wqe_counter; - u32 reserved6; + u32 reserved5; __be64 db_rec_addr; }; @@ -65,7 +65,8 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) spin_lock(&srq_table->lock); - srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + srq = radix_tree_lookup(&dev->srq_table_tree, + srqn & (dev->caps.num_srqs - 1)); if (srq) atomic_inc(&srq->refcount); @@ -110,8 +111,8 @@ static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox MLX4_CMD_TIME_CLASS_A); } -int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, - u64 db_rec, struct mlx4_srq *srq) +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, + struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_cmd_mailbox *mailbox; @@ -132,7 +133,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, goto err_put; spin_lock_irq(&srq_table->lock); - err = radix_tree_insert(&srq_table->tree, srq->srqn, srq); + err = radix_tree_insert(&dev->srq_table_tree, srq->srqn, srq); spin_unlock_irq(&srq_table->lock); if (err) goto err_cmpt_put; @@ -149,6 +150,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) | srq->srqn); srq_context->logstride = srq->wqe_shift - 4; + srq_context->xrc_domain = cpu_to_be16(xrcd); + srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff); srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT; mtt_addr = mlx4_mtt_addr(dev, mtt); @@ -169,7 +172,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, err_radix: spin_lock_irq(&srq_table->lock); - radix_tree_delete(&srq_table->tree, srq->srqn); + radix_tree_delete(&dev->srq_table_tree, srq->srqn); spin_unlock_irq(&srq_table->lock); err_cmpt_put: @@ -185,18 +188,29 @@ err_out: } EXPORT_SYMBOL_GPL(mlx4_srq_alloc); -void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) +void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq) { - struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; int err; err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn); if (err) mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn); +} +EXPORT_SYMBOL_GPL(mlx4_srq_invalidate); + +void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; spin_lock_irq(&srq_table->lock); - radix_tree_delete(&srq_table->tree, srq->srqn); + radix_tree_delete(&dev->srq_table_tree, srq->srqn); spin_unlock_irq(&srq_table->lock); +} +EXPORT_SYMBOL_GPL(mlx4_srq_remove); + +void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) +{ + struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; if (atomic_dec_and_test(&srq->refcount)) complete(&srq->free); @@ -242,7 +256,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev) int err; spin_lock_init(&srq_table->lock); - INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC); + INIT_RADIX_TREE(&dev->srq_table_tree, GFP_ATOMIC); err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs, dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0); diff --git a/drivers/net/mlx4/xrcd.c b/drivers/net/mlx4/xrcd.c new file mode 100644 index 0000000..d1bfc11 --- /dev/null +++ b/drivers/net/mlx4/xrcd.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/init.h> +#include <linux/errno.h> + +#include "mlx4.h" + +int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + *xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap); + if (*xrcdn == -1) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc); + +void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn) +{ + mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn); +} +EXPORT_SYMBOL_GPL(mlx4_xrcd_free); + +int __devinit mlx4_init_xrcd_table(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + + return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16), + (1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0); +} + +void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev) +{ + mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap); +} + + diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b3ed1cb..a6b4293 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -56,6 +56,7 @@ enum { MLX4_DEV_CAP_FLAG_RC = 1 << 0, MLX4_DEV_CAP_FLAG_UC = 1 << 1, MLX4_DEV_CAP_FLAG_UD = 1 << 2, + MLX4_DEV_CAP_FLAG_XRC = 1 << 3, MLX4_DEV_CAP_FLAG_SRQ = 1 << 6, MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7, MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8, @@ -224,6 +225,8 @@ struct mlx4_caps { int num_pds; int reserved_pds; int mtt_entry_sz; + int reserved_xrcds; + int max_xrcds; u32 max_msg_sz; u32 page_size_cap; u32 flags; @@ -379,6 +382,7 @@ struct mlx4_dev { unsigned long flags; struct mlx4_caps caps; struct radix_tree_root qp_table_tree; + struct radix_tree_root srq_table_tree; u32 rev_id; char board_id[MLX4_BOARD_ID_LEN]; }; @@ -425,6 +429,9 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn); void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn); +int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn); +void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); + int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar); void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar); @@ -467,8 +474,8 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); -int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt, - u64 db_rec, struct mlx4_srq *srq); +int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, + struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq); void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq); int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark); int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9a628a6..bb5bc75 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -74,6 +74,7 @@ enum { MLX4_QP_ST_UC = 0x1, MLX4_QP_ST_RD = 0x2, MLX4_QP_ST_UD = 0x3, + MLX4_QP_ST_XRC = 0x6, MLX4_QP_ST_MLX = 0x7 }; @@ -136,7 +137,7 @@ struct mlx4_qp_context { __be32 ssn; __be32 params2; __be32 rnr_nextrecvpsn; - __be32 srcd; + __be32 xrcd; __be32 cqn_recv; __be64 db_rec_addr; __be32 qkey; diff --git a/include/linux/mlx4/srq.h b/include/linux/mlx4/srq.h index 799a069..5e041e5 100644 --- a/include/linux/mlx4/srq.h +++ b/include/linux/mlx4/srq.h @@ -33,10 +33,22 @@ #ifndef MLX4_SRQ_H #define MLX4_SRQ_H +#include <linux/types.h> +#include <linux/mlx4/device.h> + struct mlx4_wqe_srq_next_seg { u16 reserved1; __be16 next_wqe_index; u32 reserved2[3]; }; +void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq); +void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq); + +static inline struct mlx4_srq *__mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) +{ + return radix_tree_lookup(&dev->srq_table_tree, + srqn & (dev->caps.num_srqs - 1)); +} + #endif /* MLX4_SRQ_H */ diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index a17f771..0df90d8 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -81,7 +81,15 @@ enum { IB_USER_VERBS_CMD_MODIFY_SRQ, IB_USER_VERBS_CMD_QUERY_SRQ, IB_USER_VERBS_CMD_DESTROY_SRQ, - IB_USER_VERBS_CMD_POST_SRQ_RECV + IB_USER_VERBS_CMD_POST_SRQ_RECV, + IB_USER_VERBS_CMD_CREATE_XRC_SRQ, + IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN, + IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN, + IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP, + IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP, + IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP, + IB_USER_VERBS_CMD_REG_XRC_RCV_QP, + IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP, }; /* @@ -647,6 +655,18 @@ struct ib_uverbs_create_srq { __u64 driver_data[0]; }; +struct ib_uverbs_create_xrc_srq { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 xrcd_handle; + __u32 xrc_cq; + __u64 driver_data[0]; +}; + struct ib_uverbs_create_srq_resp { __u32 srq_handle; __u32 max_wr; @@ -686,4 +706,95 @@ struct ib_uverbs_destroy_srq_resp { __u32 events_reported; }; +struct ib_uverbs_open_xrc_domain { + __u64 response; + __u32 fd; + __u32 oflags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_open_xrc_domain_resp { + __u32 xrcd_handle; +}; + +struct ib_uverbs_close_xrc_domain { + __u64 response; + __u32 xrcd_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_xrc_rcv_qp { + __u64 response; + __u64 user_handle; + __u32 xrc_domain_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_xrc_rcv_qp_resp { + __u32 qpn; + __u32 reserved; +}; + +struct ib_uverbs_modify_xrc_rcv_qp { + __u32 xrc_domain_handle; + __u32 qp_num; + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_xrc_rcv_qp { + __u64 response; + __u32 xrc_domain_handle; + __u32 qp_num; + __u32 attr_mask; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_xrc_rcv_qp { + __u32 xrc_domain_handle; + __u32 qp_num; + __u64 driver_data[0]; +}; + +struct ib_uverbs_unreg_xrc_rcv_qp { + __u32 xrc_domain_handle; + __u32 qp_num; + __u64 driver_data[0]; +}; + + #endif /* IB_USER_VERBS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6fbfbae..17ff45c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -50,6 +50,8 @@ #include <asm/atomic.h> #include <asm/uaccess.h> +#include <linux/rbtree.h> +#include <linux/mutex.h> union ib_gid { u8 raw[16]; @@ -103,6 +105,7 @@ enum ib_device_cap_flags { */ IB_DEVICE_UD_IP_CSUM = (1<<18), IB_DEVICE_UD_TSO = (1<<19), + IB_DEVICE_XRC = (1<<20), IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), }; @@ -343,6 +346,10 @@ enum ib_event_type { IB_EVENT_CLIENT_REREGISTER }; +enum ib_event_flags { + IB_XRC_QP_EVENT_FLAG = 0x80000000, +}; + struct ib_event { struct ib_device *device; union { @@ -350,6 +357,7 @@ struct ib_event { struct ib_qp *qp; struct ib_srq *srq; u8 port_num; + u32 xrc_qp_num; } element; enum ib_event_type event; }; @@ -551,6 +559,7 @@ enum ib_qp_type { IB_QPT_RC, IB_QPT_UC, IB_QPT_UD, + IB_QPT_XRC, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETY }; @@ -570,6 +579,7 @@ struct ib_qp_init_attr { enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; enum ib_qp_create_flags create_flags; + struct ib_xrcd *xrc_domain; /* XRC qp's only */ u8 port_num; /* special QP types only */ }; @@ -758,6 +768,7 @@ struct ib_send_wr { u8 static_rate; } raw_ety; } wr; + u32 xrc_remote_srq_num; /* valid for XRC sends only */ }; struct ib_recv_wr { @@ -819,6 +830,7 @@ struct ib_ucontext { struct list_head qp_list; struct list_head srq_list; struct list_head ah_list; + struct list_head xrc_domain_list; int closing; }; @@ -840,12 +852,27 @@ struct ib_udata { size_t outlen; }; +struct ib_uxrc_rcv_object { + struct list_head list; /* link to context's list */ + u32 qp_num; + u32 domain_handle; +}; + struct ib_pd { struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ }; +struct ib_xrcd { + struct ib_device *device; + struct ib_uobject *uobject; + struct inode *inode; + struct rb_node node; + atomic_t usecnt; /* count all resources */ +}; + + struct ib_ah { struct ib_device *device; struct ib_pd *pd; @@ -867,10 +894,13 @@ struct ib_cq { struct ib_srq { struct ib_device *device; struct ib_pd *pd; + struct ib_cq *xrc_cq; + struct ib_xrcd *xrcd; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; atomic_t usecnt; + u32 xrc_srq_num; }; struct ib_qp { @@ -884,6 +914,7 @@ struct ib_qp { void *qp_context; u32 qp_num; enum ib_qp_type qp_type; + struct ib_xrcd *xrcd; /* XRC QPs only */ }; struct ib_mr { @@ -1135,6 +1166,32 @@ struct ib_device { struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); + struct ib_srq * (*create_xrc_srq)(struct ib_pd *pd, + struct ib_cq *xrc_cq, + struct ib_xrcd *xrcd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr, + u32 *qp_num); + int (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd, + u32 qp_num, + struct ib_qp_attr *attr, + int attr_mask); + int (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd, + u32 qp_num, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_qp_init_attr *init_attr); + int (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd, + void *context, + u32 qp_num); + int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd, + void *context, + u32 qp_num); struct ib_dma_mapping_ops *dma_ops; @@ -1158,6 +1215,8 @@ struct ib_device { u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; + struct rb_root ib_uverbs_xrcd_table; + struct mutex xrcd_table_mutex; }; struct ib_client { @@ -1318,8 +1377,28 @@ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int ib_destroy_ah(struct ib_ah *ah); /** - * ib_create_srq - Creates a SRQ associated with the specified protection - * domain. + * ib_create_xrc_srq - Creates an XRC SRQ associated with the specified + * protection domain, cq, and xrc domain. + * @pd: The protection domain associated with the SRQ. + * @xrc_cq: The cq to be associated with the XRC SRQ. + * @xrcd: The XRC domain to be associated with the XRC SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * XRC SRQ. If XRC SRQ creation succeeds, then the attributes are updated + * to the actual capabilities of the created XRC SRQ. + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the XRC SRQ, and set to the actual values allocated + * on return. If ib_create_xrc_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd, + struct ib_cq *xrc_cq, + struct ib_xrcd *xrcd, + struct ib_srq_init_attr *srq_init_attr); + +/** + * ib_create_srq - Creates an SRQ associated with the specified + * protection domain. * @pd: The protection domain associated with the SRQ. * @srq_init_attr: A list of initial attributes required to create the * SRQ. If SRQ creation succeeds, then the attributes are updated to @@ -2045,4 +2124,17 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ib_dealloc_xrcd - Deallocates an extended reliably connected domain. + * @xrcd: The xrc domain to deallocate. + */ +int ib_dealloc_xrcd(struct ib_xrcd *xrcd); + +/** + * ib_alloc_xrcd - Allocates an extended reliably connected domain. + * @device: The device on which to allocate the xrcd. + */ +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); + #endif /* IB_VERBS_H */