diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 50fb1cd447b76d80de09d6f565e737cd37101af5..189e5d4b9b17ed271027e2c015b252f929fbd32d 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
 
-ib_sa-y :=			sa_query.o
+ib_sa-y :=			sa_query.o multicast.o
 
 ib_cm-y :=			cm.o
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index db88e609bf429f6a1fb9f37527f0c21651d14320..f8d69b3fa30796f8c4258a858a3664c0f5bff647 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq;
 static DEFINE_IDR(sdp_ps);
 static DEFINE_IDR(tcp_ps);
 static DEFINE_IDR(udp_ps);
+static DEFINE_IDR(ipoib_ps);
 static int next_port;
 
 struct cma_device {
@@ -116,6 +117,7 @@ struct rdma_id_private {
 	struct list_head	list;
 	struct list_head	listen_list;
 	struct cma_device	*cma_dev;
+	struct list_head	mc_list;
 
 	enum cma_state		state;
 	spinlock_t		lock;
@@ -134,10 +136,23 @@ struct rdma_id_private {
 	} cm_id;
 
 	u32			seq_num;
+	u32			qkey;
 	u32			qp_num;
 	u8			srq;
 };
 
+struct cma_multicast {
+	struct rdma_id_private *id_priv;
+	union {
+		struct ib_sa_multicast *ib;
+	} multicast;
+	struct list_head	list;
+	void			*context;
+	struct sockaddr		addr;
+	u8			pad[sizeof(struct sockaddr_in6) -
+				    sizeof(struct sockaddr)];
+};
+
 struct cma_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
@@ -243,6 +258,11 @@ static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
 	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
 }
 
+static inline int cma_is_ud_ps(enum rdma_port_space ps)
+{
+	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
+}
+
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
@@ -265,19 +285,41 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
 	id_priv->cma_dev = NULL;
 }
 
+static int cma_set_qkey(struct ib_device *device, u8 port_num,
+			enum rdma_port_space ps,
+			struct rdma_dev_addr *dev_addr, u32 *qkey)
+{
+	struct ib_sa_mcmember_rec rec;
+	int ret = 0;
+
+	switch (ps) {
+	case RDMA_PS_UDP:
+		*qkey = RDMA_UDP_QKEY;
+		break;
+	case RDMA_PS_IPOIB:
+		ib_addr_get_mgid(dev_addr, &rec.mgid);
+		ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
+		*qkey = be32_to_cpu(rec.qkey);
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
 static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
-	enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct cma_device *cma_dev;
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	switch (rdma_node_get_transport(dev_type)) {
+	switch (rdma_node_get_transport(dev_addr->dev_type)) {
 	case RDMA_TRANSPORT_IB:
-		ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		ib_addr_get_sgid(dev_addr, &gid);
 		break;
 	case RDMA_TRANSPORT_IWARP:
-		iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+		iw_addr_get_sgid(dev_addr, &gid);
 		break;
 	default:
 		return -ENODEV;
@@ -287,7 +329,12 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
 		if (!ret) {
-			cma_attach_to_dev(id_priv, cma_dev);
+			ret = cma_set_qkey(cma_dev->device,
+					   id_priv->id.port_num,
+					   id_priv->id.ps, dev_addr,
+					   &id_priv->qkey);
+			if (!ret)
+				cma_attach_to_dev(id_priv, cma_dev);
 			break;
 		}
 	}
@@ -325,40 +372,50 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 	init_waitqueue_head(&id_priv->wait_remove);
 	atomic_set(&id_priv->dev_remove, 0);
 	INIT_LIST_HEAD(&id_priv->listen_list);
+	INIT_LIST_HEAD(&id_priv->mc_list);
 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 
 	return &id_priv->id;
 }
 EXPORT_SYMBOL(rdma_create_id);
 
-static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
-	struct rdma_dev_addr *dev_addr;
-	int ret;
+	int qp_attr_mask, ret;
 
-	dev_addr = &id_priv->id.route.addr.dev_addr;
-	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
-				  ib_addr_get_pkey(dev_addr),
-				  &qp_attr.pkey_index);
+	qp_attr.qp_state = IB_QPS_INIT;
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
 		return ret;
 
-	qp_attr.qp_state = IB_QPS_INIT;
-	qp_attr.qp_access_flags = 0;
-	qp_attr.port_num = id_priv->id.port_num;
-	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS |
-					  IB_QP_PKEY_INDEX | IB_QP_PORT);
+	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTR;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IB_QPS_RTS;
+	qp_attr.sq_psn = 0;
+	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+
+	return ret;
 }
 
-static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
+	int qp_attr_mask, ret;
 
 	qp_attr.qp_state = IB_QPS_INIT;
-	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
 
-	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 }
 
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
@@ -376,18 +433,10 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);
 
-	switch (rdma_node_get_transport(id->device->node_type)) {
-	case RDMA_TRANSPORT_IB:
-		ret = cma_init_ib_qp(id_priv, qp);
-		break;
-	case RDMA_TRANSPORT_IWARP:
-		ret = cma_init_iw_qp(id_priv, qp);
-		break;
-	default:
-		ret = -ENOSYS;
-		break;
-	}
-
+	if (cma_is_ud_ps(id_priv->id.ps))
+		ret = cma_init_ud_qp(id_priv, qp);
+	else
+		ret = cma_init_conn_qp(id_priv, qp);
 	if (ret)
 		goto err;
 
@@ -460,23 +509,55 @@ static int cma_modify_qp_err(struct rdma_cm_id *id)
 	return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
 }
 
+static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
+			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	int ret;
+
+	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
+				  ib_addr_get_pkey(dev_addr),
+				  &qp_attr->pkey_index);
+	if (ret)
+		return ret;
+
+	qp_attr->port_num = id_priv->id.port_num;
+	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+	if (cma_is_ud_ps(id_priv->id.ps)) {
+		qp_attr->qkey = id_priv->qkey;
+		*qp_attr_mask |= IB_QP_QKEY;
+	} else {
+		qp_attr->qp_access_flags = 0;
+		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
+	}
+	return 0;
+}
+
 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask)
 {
 	struct rdma_id_private *id_priv;
-	int ret;
+	int ret = 0;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
-					 qp_attr_mask);
+		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
+			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
+		else
+			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
+						 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
 	case RDMA_TRANSPORT_IWARP:
-		ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
-					qp_attr_mask);
+		if (!id_priv->cm_id.iw) {
+			qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+		} else
+			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+						 qp_attr_mask);
 		break;
 	default:
 		ret = -ENOSYS;
@@ -698,6 +779,19 @@ static void cma_release_port(struct rdma_id_private *id_priv)
 	mutex_unlock(&lock);
 }
 
+static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
+{
+	struct cma_multicast *mc;
+
+	while (!list_empty(&id_priv->mc_list)) {
+		mc = container_of(id_priv->mc_list.next,
+				  struct cma_multicast, list);
+		list_del(&mc->list);
+		ib_sa_free_multicast(mc->multicast.ib);
+		kfree(mc);
+	}
+}
+
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
@@ -722,6 +816,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 		default:
 			break;
 		}
+		cma_leave_mc_groups(id_priv);
 		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
 	}
@@ -972,7 +1067,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	memset(&event, 0, sizeof event);
 	offset = cma_user_data_offset(listen_id->id.ps);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
-	if (listen_id->id.ps == RDMA_PS_UDP) {
+	if (cma_is_ud_ps(listen_id->id.ps)) {
 		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
 		event.param.ud.private_data = ib_event->private_data + offset;
 		event.param.ud.private_data_len =
@@ -1725,7 +1820,7 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
 	struct rdma_bind_list *bind_list;
 	int port, ret;
 
-	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
+	bind_list = kmalloc(sizeof *bind_list, GFP_KERNEL);
 	if (!bind_list)
 		return -ENOMEM;
 
@@ -1847,6 +1942,9 @@ static int cma_get_port(struct rdma_id_private *id_priv)
 	case RDMA_PS_UDP:
 		ps = &udp_ps;
 		break;
+	case RDMA_PS_IPOIB:
+		ps = &ipoib_ps;
+		break;
 	default:
 		return -EPROTONOSUPPORT;
 	}
@@ -1961,7 +2059,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 			event.status = ib_event->param.sidr_rep_rcvd.status;
 			break;
 		}
-		if (rep->qkey != RDMA_UD_QKEY) {
+		if (id_priv->qkey != rep->qkey) {
 			event.event = RDMA_CM_EVENT_UNREACHABLE;
 			event.status = -EINVAL;
 			break;
@@ -2160,7 +2258,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		if (id->ps == RDMA_PS_UDP)
+		if (cma_is_ud_ps(id->ps))
 			ret = cma_resolve_ib_udp(id_priv, conn_param);
 		else
 			ret = cma_connect_ib(id_priv, conn_param);
@@ -2256,7 +2354,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 	rep.status = status;
 	if (status == IB_SIDR_SUCCESS) {
 		rep.qp_num = id_priv->qp_num;
-		rep.qkey = RDMA_UD_QKEY;
+		rep.qkey = id_priv->qkey;
 	}
 	rep.private_data = private_data;
 	rep.private_data_len = private_data_len;
@@ -2280,7 +2378,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		if (id->ps == RDMA_PS_UDP)
+		if (cma_is_ud_ps(id->ps))
 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
 						conn_param->private_data,
 						conn_param->private_data_len);
@@ -2341,7 +2439,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		if (id->ps == RDMA_PS_UDP)
+		if (cma_is_ud_ps(id->ps))
 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
 						private_data, private_data_len);
 		else
@@ -2392,6 +2490,178 @@ int rdma_disconnect(struct rdma_cm_id *id)
 }
 EXPORT_SYMBOL(rdma_disconnect);
 
+static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc = multicast->context;
+	struct rdma_cm_event event;
+	int ret;
+
+	id_priv = mc->id_priv;
+	atomic_inc(&id_priv->dev_remove);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		goto out;
+
+	if (!status && id_priv->id.qp)
+		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
+					 multicast->rec.mlid);
+
+	memset(&event, 0, sizeof event);
+	event.status = status;
+	event.param.ud.private_data = mc->context;
+	if (!status) {
+		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
+		ib_init_ah_from_mcmember(id_priv->id.device,
+					 id_priv->id.port_num, &multicast->rec,
+					 &event.param.ud.ah_attr);
+		event.param.ud.qp_num = 0xFFFFFF;
+		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+	} else
+		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
+	ret = id_priv->id.event_handler(&id_priv->id, &event);
+	if (ret) {
+		cma_exch(id_priv, CMA_DESTROYING);
+		cma_release_remove(id_priv);
+		rdma_destroy_id(&id_priv->id);
+		return 0;
+	}
+out:
+	cma_release_remove(id_priv);
+	return 0;
+}
+
+static void cma_set_mgid(struct rdma_id_private *id_priv,
+			 struct sockaddr *addr, union ib_gid *mgid)
+{
+	unsigned char mc_map[MAX_ADDR_LEN];
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
+
+	if (cma_any_addr(addr)) {
+		memset(mgid, 0, sizeof *mgid);
+	} else if ((addr->sa_family == AF_INET6) &&
+		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+								 0xFF10A01B)) {
+		/* IPv6 address is an SA assigned MGID. */
+		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+	} else {
+		ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
+		if (id_priv->id.ps == RDMA_PS_UDP)
+			mc_map[7] = 0x01;	/* Use RDMA CM signature */
+		mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
+		mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
+		*mgid = *(union ib_gid *) (mc_map + 4);
+	}
+}
+
+static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
+				 struct cma_multicast *mc)
+{
+	struct ib_sa_mcmember_rec rec;
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	ib_sa_comp_mask comp_mask;
+	int ret;
+
+	ib_addr_get_mgid(dev_addr, &rec.mgid);
+	ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
+				     &rec.mgid, &rec);
+	if (ret)
+		return ret;
+
+	cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
+	if (id_priv->id.ps == RDMA_PS_UDP)
+		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+	ib_addr_get_sgid(dev_addr, &rec.port_gid);
+	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
+	rec.join_state = 1;
+
+	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
+		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
+		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
+		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
+		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
+
+	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+						id_priv->id.port_num, &rec,
+						comp_mask, GFP_KERNEL,
+						cma_ib_mc_handler, mc);
+	if (IS_ERR(mc->multicast.ib))
+		return PTR_ERR(mc->multicast.ib);
+
+	return 0;
+}
+
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+	int ret;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
+	    !cma_comp(id_priv, CMA_ADDR_RESOLVED))
+		return -EINVAL;
+
+	mc = kmalloc(sizeof *mc, GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	memcpy(&mc->addr, addr, ip_addr_size(addr));
+	mc->context = context;
+	mc->id_priv = id_priv;
+
+	spin_lock(&id_priv->lock);
+	list_add(&mc->list, &id_priv->mc_list);
+	spin_unlock(&id_priv->lock);
+
+	switch (rdma_node_get_transport(id->device->node_type)) {
+	case RDMA_TRANSPORT_IB:
+		ret = cma_join_ib_multicast(id_priv, mc);
+		break;
+	default:
+		ret = -ENOSYS;
+		break;
+	}
+
+	if (ret) {
+		spin_lock_irq(&id_priv->lock);
+		list_del(&mc->list);
+		spin_unlock_irq(&id_priv->lock);
+		kfree(mc);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(rdma_join_multicast);
+
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+	struct rdma_id_private *id_priv;
+	struct cma_multicast *mc;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	spin_lock_irq(&id_priv->lock);
+	list_for_each_entry(mc, &id_priv->mc_list, list) {
+		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
+			list_del(&mc->list);
+			spin_unlock_irq(&id_priv->lock);
+
+			if (id->qp)
+				ib_detach_mcast(id->qp,
+						&mc->multicast.ib->rec.mgid,
+						mc->multicast.ib->rec.mlid);
+			ib_sa_free_multicast(mc->multicast.ib);
+			kfree(mc);
+			return;
+		}
+	}
+	spin_unlock_irq(&id_priv->lock);
+}
+EXPORT_SYMBOL(rdma_leave_multicast);
+
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
@@ -2522,6 +2792,7 @@ static void cma_cleanup(void)
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
 	idr_destroy(&udp_ps);
+	idr_destroy(&ipoib_ps);
 }
 
 module_init(cma_init);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 8926a2bd4a8707d7b765432253a400c3719e9860..1d796e7c81991e6845c16ff3e6d1f323286afd92 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -301,7 +301,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 
 	{
 		struct ib_pool_fmr *fmr;
-		struct ib_fmr_attr attr = {
+		struct ib_fmr_attr fmr_attr = {
 			.max_pages  = params->max_pages_per_fmr,
 			.max_maps   = pool->max_remaps,
 			.page_shift = params->page_shift
@@ -321,7 +321,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 			fmr->ref_count        = 0;
 			INIT_HLIST_NODE(&fmr->cache_node);
 
-			fmr->fmr = ib_alloc_fmr(pd, params->access, &attr);
+			fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
 			if (IS_ERR(fmr->fmr)) {
 				printk(KERN_WARNING "fmr_create failed for FMR %d", i);
 				kfree(fmr);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 1039ad57d53b4b0f3ef2d252c51fb9a47bfd68f0..891d1fa7b2eb44a3fc4edbd5dc93dcb22b02c473 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -146,6 +146,12 @@ static int copy_private_data(struct iw_cm_event *event)
 	return 0;
 }
 
+static void free_cm_id(struct iwcm_id_private *cm_id_priv)
+{
+	dealloc_work_entries(cm_id_priv);
+	kfree(cm_id_priv);
+}
+
 /*
  * Release a reference on cm_id. If the last reference is being
  * released, enable the waiting thread (in iw_destroy_cm_id) to
@@ -153,21 +159,14 @@ static int copy_private_data(struct iw_cm_event *event)
  */
 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
 {
-	int ret = 0;
-
 	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
 	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
 		BUG_ON(!list_empty(&cm_id_priv->work_list));
-		if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
-			BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
-			BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
-					&cm_id_priv->flags));
-			ret = 1;
-		}
 		complete(&cm_id_priv->destroy_comp);
+		return 1;
 	}
 
-	return ret;
+	return 0;
 }
 
 static void add_ref(struct iw_cm_id *cm_id)
@@ -181,7 +180,11 @@ static void rem_ref(struct iw_cm_id *cm_id)
 {
 	struct iwcm_id_private *cm_id_priv;
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
-	iwcm_deref_id(cm_id_priv);
+	if (iwcm_deref_id(cm_id_priv) &&
+	    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+		BUG_ON(!list_empty(&cm_id_priv->work_list));
+		free_cm_id(cm_id_priv);
+	}
 }
 
 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
@@ -355,7 +358,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
 	case IW_CM_STATE_CONN_RECV:
 		/*
 		 * App called destroy before/without calling accept after
-		 * receiving connection request event notification.
+		 * receiving connection request event notification or
+		 * returned non zero from the event callback function.
+		 * In either case, must tell the provider to reject.
 		 */
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		break;
@@ -391,9 +396,7 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
 
 	wait_for_completion(&cm_id_priv->destroy_comp);
 
-	dealloc_work_entries(cm_id_priv);
-
-	kfree(cm_id_priv);
+	free_cm_id(cm_id_priv);
 }
 EXPORT_SYMBOL(iw_destroy_cm_id);
 
@@ -647,10 +650,11 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
 	/* Call the client CM handler */
 	ret = cm_id->cm_handler(cm_id, iw_event);
 	if (ret) {
+		iw_cm_reject(cm_id, NULL, 0);
 		set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 		destroy_cm_id(cm_id);
 		if (atomic_read(&cm_id_priv->refcount)==0)
-			kfree(cm_id);
+			free_cm_id(cm_id_priv);
 	}
 
 out:
@@ -854,13 +858,12 @@ static void cm_work_handler(struct work_struct *_work)
 			destroy_cm_id(&cm_id_priv->id);
 		}
 		BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
-		if (iwcm_deref_id(cm_id_priv))
-			return;
-
-		if (atomic_read(&cm_id_priv->refcount)==0 &&
-		    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
-			dealloc_work_entries(cm_id_priv);
-			kfree(cm_id_priv);
+		if (iwcm_deref_id(cm_id_priv)) {
+			if (test_bit(IWCM_F_CALLBACK_DESTROY,
+				     &cm_id_priv->flags)) {
+				BUG_ON(!list_empty(&cm_id_priv->work_list));
+				free_cm_id(cm_id_priv);
+			}
 			return;
 		}
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
new file mode 100644
index 0000000000000000000000000000000000000000..4a579b3a1c90c87105a4400a1cd467c695f6a211
--- /dev/null
+++ b/drivers/infiniband/core/multicast.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include <rdma/ib_cache.h>
+#include "sa.h"
+
+static void mcast_add_one(struct ib_device *device);
+static void mcast_remove_one(struct ib_device *device);
+
+static struct ib_client mcast_client = {
+	.name   = "ib_multicast",
+	.add    = mcast_add_one,
+	.remove = mcast_remove_one
+};
+
+static struct ib_sa_client	sa_client;
+static struct workqueue_struct	*mcast_wq;
+static union ib_gid mgid0;
+
+struct mcast_device;
+
+struct mcast_port {
+	struct mcast_device	*dev;
+	spinlock_t		lock;
+	struct rb_root		table;
+	atomic_t		refcount;
+	struct completion	comp;
+	u8			port_num;
+};
+
+struct mcast_device {
+	struct ib_device	*device;
+	struct ib_event_handler	event_handler;
+	int			start_port;
+	int			end_port;
+	struct mcast_port	port[0];
+};
+
+enum mcast_state {
+	MCAST_IDLE,
+	MCAST_JOINING,
+	MCAST_MEMBER,
+	MCAST_BUSY,
+	MCAST_ERROR
+};
+
+struct mcast_member;
+
+struct mcast_group {
+	struct ib_sa_mcmember_rec rec;
+	struct rb_node		node;
+	struct mcast_port	*port;
+	spinlock_t		lock;
+	struct work_struct	work;
+	struct list_head	pending_list;
+	struct list_head	active_list;
+	struct mcast_member	*last_join;
+	int			members[3];
+	atomic_t		refcount;
+	enum mcast_state	state;
+	struct ib_sa_query	*query;
+	int			query_id;
+};
+
+struct mcast_member {
+	struct ib_sa_multicast	multicast;
+	struct ib_sa_client	*client;
+	struct mcast_group	*group;
+	struct list_head	list;
+	enum mcast_state	state;
+	atomic_t		refcount;
+	struct completion	comp;
+};
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+			 void *context);
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+			  void *context);
+
+static struct mcast_group *mcast_find(struct mcast_port *port,
+				      union ib_gid *mgid)
+{
+	struct rb_node *node = port->table.rb_node;
+	struct mcast_group *group;
+	int ret;
+
+	while (node) {
+		group = rb_entry(node, struct mcast_group, node);
+		ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
+		if (!ret)
+			return group;
+
+		if (ret < 0)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+	return NULL;
+}
+
+static struct mcast_group *mcast_insert(struct mcast_port *port,
+					struct mcast_group *group,
+					int allow_duplicates)
+{
+	struct rb_node **link = &port->table.rb_node;
+	struct rb_node *parent = NULL;
+	struct mcast_group *cur_group;
+	int ret;
+
+	while (*link) {
+		parent = *link;
+		cur_group = rb_entry(parent, struct mcast_group, node);
+
+		ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
+			     sizeof group->rec.mgid);
+		if (ret < 0)
+			link = &(*link)->rb_left;
+		else if (ret > 0)
+			link = &(*link)->rb_right;
+		else if (allow_duplicates)
+			link = &(*link)->rb_left;
+		else
+			return cur_group;
+	}
+	rb_link_node(&group->node, parent, link);
+	rb_insert_color(&group->node, &port->table);
+	return NULL;
+}
+
+static void deref_port(struct mcast_port *port)
+{
+	if (atomic_dec_and_test(&port->refcount))
+		complete(&port->comp);
+}
+
+static void release_group(struct mcast_group *group)
+{
+	struct mcast_port *port = group->port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (atomic_dec_and_test(&group->refcount)) {
+		rb_erase(&group->node, &port->table);
+		spin_unlock_irqrestore(&port->lock, flags);
+		kfree(group);
+		deref_port(port);
+	} else
+		spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct mcast_member *member)
+{
+	if (atomic_dec_and_test(&member->refcount))
+		complete(&member->comp);
+}
+
+static void queue_join(struct mcast_member *member)
+{
+	struct mcast_group *group = member->group;
+	unsigned long flags;
+
+	spin_lock_irqsave(&group->lock, flags);
+	list_add(&member->list, &group->pending_list);
+	if (group->state == MCAST_IDLE) {
+		group->state = MCAST_BUSY;
+		atomic_inc(&group->refcount);
+		queue_work(mcast_wq, &group->work);
+	}
+	spin_unlock_irqrestore(&group->lock, flags);
+}
+
+/*
+ * A multicast group has three types of members: full member, non member, and
+ * send only member.  We need to keep track of the number of members of each
+ * type based on their join state.  Adjust the number of members the belong to
+ * the specified join states.
+ */
+static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
+{
+	int i;
+
+	for (i = 0; i < 3; i++, join_state >>= 1)
+		if (join_state & 0x1)
+			group->members[i] += inc;
+}
+
+/*
+ * If a multicast group has zero members left for a particular join state, but
+ * the group is still a member with the SA, we need to leave that join state.
+ * Determine which join states we still belong to, but that do not have any
+ * active members.
+ */
+static u8 get_leave_state(struct mcast_group *group)
+{
+	u8 leave_state = 0;
+	int i;
+
+	for (i = 0; i < 3; i++)
+		if (!group->members[i])
+			leave_state |= (0x1 << i);
+
+	return leave_state & group->rec.join_state;
+}
+
+static int check_selector(ib_sa_comp_mask comp_mask,
+			  ib_sa_comp_mask selector_mask,
+			  ib_sa_comp_mask value_mask,
+			  u8 selector, u8 src_value, u8 dst_value)
+{
+	int err;
+
+	if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
+		return 0;
+
+	switch (selector) {
+	case IB_SA_GT:
+		err = (src_value <= dst_value);
+		break;
+	case IB_SA_LT:
+		err = (src_value >= dst_value);
+		break;
+	case IB_SA_EQ:
+		err = (src_value != dst_value);
+		break;
+	default:
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static int cmp_rec(struct ib_sa_mcmember_rec *src,
+		   struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
+{
+	/* MGID must already match */
+
+	if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID &&
+	    memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid))
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
+		return -EINVAL;
+	if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
+			   IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
+			   src->mtu, dst->mtu))
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
+	    src->traffic_class != dst->traffic_class)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
+		return -EINVAL;
+	if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
+			   IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
+			   src->rate, dst->rate))
+		return -EINVAL;
+	if (check_selector(comp_mask,
+			   IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
+			   IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
+			   dst->packet_life_time_selector,
+			   src->packet_life_time, dst->packet_life_time))
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
+	    src->flow_label != dst->flow_label)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
+	    src->hop_limit != dst->hop_limit)
+		return -EINVAL;
+	if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope)
+		return -EINVAL;
+
+	/* join_state checked separately, proxy_join ignored */
+
+	return 0;
+}
+
+static int send_join(struct mcast_group *group, struct mcast_member *member)
+{
+	struct mcast_port *port = group->port;
+	int ret;
+
+	group->last_join = member;
+	ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+				       port->port_num, IB_MGMT_METHOD_SET,
+				       &member->multicast.rec,
+				       member->multicast.comp_mask,
+				       3000, GFP_KERNEL, join_handler, group,
+				       &group->query);
+	if (ret >= 0) {
+		group->query_id = ret;
+		ret = 0;
+	}
+	return ret;
+}
+
+static int send_leave(struct mcast_group *group, u8 leave_state)
+{
+	struct mcast_port *port = group->port;
+	struct ib_sa_mcmember_rec rec;
+	int ret;
+
+	rec = group->rec;
+	rec.join_state = leave_state;
+
+	ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device,
+				       port->port_num, IB_SA_METHOD_DELETE, &rec,
+				       IB_SA_MCMEMBER_REC_MGID     |
+				       IB_SA_MCMEMBER_REC_PORT_GID |
+				       IB_SA_MCMEMBER_REC_JOIN_STATE,
+				       3000, GFP_KERNEL, leave_handler,
+				       group, &group->query);
+	if (ret >= 0) {
+		group->query_id = ret;
+		ret = 0;
+	}
+	return ret;
+}
+
+static void join_group(struct mcast_group *group, struct mcast_member *member,
+		       u8 join_state)
+{
+	member->state = MCAST_MEMBER;
+	adjust_membership(group, join_state, 1);
+	group->rec.join_state |= join_state;
+	member->multicast.rec = group->rec;
+	member->multicast.rec.join_state = join_state;
+	list_move(&member->list, &group->active_list);
+}
+
+static int fail_join(struct mcast_group *group, struct mcast_member *member,
+		     int status)
+{
+	spin_lock_irq(&group->lock);
+	list_del_init(&member->list);
+	spin_unlock_irq(&group->lock);
+	return member->multicast.callback(status, &member->multicast);
+}
+
+static void process_group_error(struct mcast_group *group)
+{
+	struct mcast_member *member;
+	int ret;
+
+	spin_lock_irq(&group->lock);
+	while (!list_empty(&group->active_list)) {
+		member = list_entry(group->active_list.next,
+				    struct mcast_member, list);
+		atomic_inc(&member->refcount);
+		list_del_init(&member->list);
+		adjust_membership(group, member->multicast.rec.join_state, -1);
+		member->state = MCAST_ERROR;
+		spin_unlock_irq(&group->lock);
+
+		ret = member->multicast.callback(-ENETRESET,
+						 &member->multicast);
+		deref_member(member);
+		if (ret)
+			ib_sa_free_multicast(&member->multicast);
+		spin_lock_irq(&group->lock);
+	}
+
+	group->rec.join_state = 0;
+	group->state = MCAST_BUSY;
+	spin_unlock_irq(&group->lock);
+}
+
+static void mcast_work_handler(struct work_struct *work)
+{
+	struct mcast_group *group;
+	struct mcast_member *member;
+	struct ib_sa_multicast *multicast;
+	int status, ret;
+	u8 join_state;
+
+	group = container_of(work, typeof(*group), work);
+retest:
+	spin_lock_irq(&group->lock);
+	while (!list_empty(&group->pending_list) ||
+	       (group->state == MCAST_ERROR)) {
+
+		if (group->state == MCAST_ERROR) {
+			spin_unlock_irq(&group->lock);
+			process_group_error(group);
+			goto retest;
+		}
+
+		member = list_entry(group->pending_list.next,
+				    struct mcast_member, list);
+		multicast = &member->multicast;
+		join_state = multicast->rec.join_state;
+		atomic_inc(&member->refcount);
+
+		if (join_state == (group->rec.join_state & join_state)) {
+			status = cmp_rec(&group->rec, &multicast->rec,
+					 multicast->comp_mask);
+			if (!status)
+				join_group(group, member, join_state);
+			else
+				list_del_init(&member->list);
+			spin_unlock_irq(&group->lock);
+			ret = multicast->callback(status, multicast);
+		} else {
+			spin_unlock_irq(&group->lock);
+			status = send_join(group, member);
+			if (!status) {
+				deref_member(member);
+				return;
+			}
+			ret = fail_join(group, member, status);
+		}
+
+		deref_member(member);
+		if (ret)
+			ib_sa_free_multicast(&member->multicast);
+		spin_lock_irq(&group->lock);
+	}
+
+	join_state = get_leave_state(group);
+	if (join_state) {
+		group->rec.join_state &= ~join_state;
+		spin_unlock_irq(&group->lock);
+		if (send_leave(group, join_state))
+			goto retest;
+	} else {
+		group->state = MCAST_IDLE;
+		spin_unlock_irq(&group->lock);
+		release_group(group);
+	}
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct mcast_group *group, int status)
+{
+	struct mcast_member *member;
+	int ret;
+
+	spin_lock_irq(&group->lock);
+	member = list_entry(group->pending_list.next,
+			    struct mcast_member, list);
+	if (group->last_join == member) {
+		atomic_inc(&member->refcount);
+		list_del_init(&member->list);
+		spin_unlock_irq(&group->lock);
+		ret = member->multicast.callback(status, &member->multicast);
+		deref_member(member);
+		if (ret)
+			ib_sa_free_multicast(&member->multicast);
+	} else
+		spin_unlock_irq(&group->lock);
+}
+
+static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
+			 void *context)
+{
+	struct mcast_group *group = context;
+
+	if (status)
+		process_join_error(group, status);
+	else {
+		spin_lock_irq(&group->port->lock);
+		group->rec = *rec;
+		if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+			rb_erase(&group->node, &group->port->table);
+			mcast_insert(group->port, group, 1);
+		}
+		spin_unlock_irq(&group->port->lock);
+	}
+	mcast_work_handler(&group->work);
+}
+
+static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
+			  void *context)
+{
+	struct mcast_group *group = context;
+
+	mcast_work_handler(&group->work);
+}
+
+static struct mcast_group *acquire_group(struct mcast_port *port,
+					 union ib_gid *mgid, gfp_t gfp_mask)
+{
+	struct mcast_group *group, *cur_group;
+	unsigned long flags;
+	int is_mgid0;
+
+	is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
+	if (!is_mgid0) {
+		spin_lock_irqsave(&port->lock, flags);
+		group = mcast_find(port, mgid);
+		if (group)
+			goto found;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	group = kzalloc(sizeof *group, gfp_mask);
+	if (!group)
+		return NULL;
+
+	group->port = port;
+	group->rec.mgid = *mgid;
+	INIT_LIST_HEAD(&group->pending_list);
+	INIT_LIST_HEAD(&group->active_list);
+	INIT_WORK(&group->work, mcast_work_handler);
+	spin_lock_init(&group->lock);
+
+	spin_lock_irqsave(&port->lock, flags);
+	cur_group = mcast_insert(port, group, is_mgid0);
+	if (cur_group) {
+		kfree(group);
+		group = cur_group;
+	} else
+		atomic_inc(&port->refcount);
+found:
+	atomic_inc(&group->refcount);
+	spin_unlock_irqrestore(&port->lock, flags);
+	return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier.  Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_sa_multicast *
+ib_sa_join_multicast(struct ib_sa_client *client,
+		     struct ib_device *device, u8 port_num,
+		     struct ib_sa_mcmember_rec *rec,
+		     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+		     int (*callback)(int status,
+				     struct ib_sa_multicast *multicast),
+		     void *context)
+{
+	struct mcast_device *dev;
+	struct mcast_member *member;
+	struct ib_sa_multicast *multicast;
+	int ret;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	member = kmalloc(sizeof *member, gfp_mask);
+	if (!member)
+		return ERR_PTR(-ENOMEM);
+
+	ib_sa_client_get(client);
+	member->client = client;
+	member->multicast.rec = *rec;
+	member->multicast.comp_mask = comp_mask;
+	member->multicast.callback = callback;
+	member->multicast.context = context;
+	init_completion(&member->comp);
+	atomic_set(&member->refcount, 1);
+	member->state = MCAST_JOINING;
+
+	member->group = acquire_group(&dev->port[port_num - dev->start_port],
+				      &rec->mgid, gfp_mask);
+	if (!member->group) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/*
+	 * The user will get the multicast structure in their callback.  They
+	 * could then free the multicast structure before we can return from
+	 * this routine.  So we save the pointer to return before queuing
+	 * any callback.
+	 */
+	multicast = &member->multicast;
+	queue_join(member);
+	return multicast;
+
+err:
+	ib_sa_client_put(client);
+	kfree(member);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_join_multicast);
+
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
+{
+	struct mcast_member *member;
+	struct mcast_group *group;
+
+	member = container_of(multicast, struct mcast_member, multicast);
+	group = member->group;
+
+	spin_lock_irq(&group->lock);
+	if (member->state == MCAST_MEMBER)
+		adjust_membership(group, multicast->rec.join_state, -1);
+
+	list_del_init(&member->list);
+
+	if (group->state == MCAST_IDLE) {
+		group->state = MCAST_BUSY;
+		spin_unlock_irq(&group->lock);
+		/* Continue to hold reference on group until callback */
+		queue_work(mcast_wq, &group->work);
+	} else {
+		spin_unlock_irq(&group->lock);
+		release_group(group);
+	}
+
+	deref_member(member);
+	wait_for_completion(&member->comp);
+	ib_sa_client_put(member->client);
+	kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_free_multicast);
+
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
+{
+	struct mcast_device *dev;
+	struct mcast_port *port;
+	struct mcast_group *group;
+	unsigned long flags;
+	int ret = 0;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return -ENODEV;
+
+	port = &dev->port[port_num - dev->start_port];
+	spin_lock_irqsave(&port->lock, flags);
+	group = mcast_find(port, mgid);
+	if (group)
+		*rec = group->rec;
+	else
+		ret = -EADDRNOTAVAIL;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr)
+{
+	int ret;
+	u16 gid_index;
+	u8 p;
+
+	ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+	if (ret)
+		return ret;
+
+	memset(ah_attr, 0, sizeof *ah_attr);
+	ah_attr->dlid = be16_to_cpu(rec->mlid);
+	ah_attr->sl = rec->sl;
+	ah_attr->port_num = port_num;
+	ah_attr->static_rate = rec->rate;
+
+	ah_attr->ah_flags = IB_AH_GRH;
+	ah_attr->grh.dgid = rec->mgid;
+
+	ah_attr->grh.sgid_index = (u8) gid_index;
+	ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
+	ah_attr->grh.hop_limit = rec->hop_limit;
+	ah_attr->grh.traffic_class = rec->traffic_class;
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+
+static void mcast_groups_lost(struct mcast_port *port)
+{
+	struct mcast_group *group;
+	struct rb_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	for (node = rb_first(&port->table); node; node = rb_next(node)) {
+		group = rb_entry(node, struct mcast_group, node);
+		spin_lock(&group->lock);
+		if (group->state == MCAST_IDLE) {
+			atomic_inc(&group->refcount);
+			queue_work(mcast_wq, &group->work);
+		}
+		group->state = MCAST_ERROR;
+		spin_unlock(&group->lock);
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mcast_event_handler(struct ib_event_handler *handler,
+				struct ib_event *event)
+{
+	struct mcast_device *dev;
+
+	dev = container_of(handler, struct mcast_device, event_handler);
+
+	switch (event->event) {
+	case IB_EVENT_PORT_ERR:
+	case IB_EVENT_LID_CHANGE:
+	case IB_EVENT_SM_CHANGE:
+	case IB_EVENT_CLIENT_REREGISTER:
+		mcast_groups_lost(&dev->port[event->element.port_num -
+					     dev->start_port]);
+		break;
+	default:
+		break;
+	}
+}
+
+static void mcast_add_one(struct ib_device *device)
+{
+	struct mcast_device *dev;
+	struct mcast_port *port;
+	int i;
+
+	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+		return;
+
+	dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+		      GFP_KERNEL);
+	if (!dev)
+		return;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
+		dev->start_port = dev->end_port = 0;
+	else {
+		dev->start_port = 1;
+		dev->end_port = device->phys_port_cnt;
+	}
+
+	for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+		port = &dev->port[i];
+		port->dev = dev;
+		port->port_num = dev->start_port + i;
+		spin_lock_init(&port->lock);
+		port->table = RB_ROOT;
+		init_completion(&port->comp);
+		atomic_set(&port->refcount, 1);
+	}
+
+	dev->device = device;
+	ib_set_client_data(device, &mcast_client, dev);
+
+	INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
+	ib_register_event_handler(&dev->event_handler);
+}
+
+static void mcast_remove_one(struct ib_device *device)
+{
+	struct mcast_device *dev;
+	struct mcast_port *port;
+	int i;
+
+	dev = ib_get_client_data(device, &mcast_client);
+	if (!dev)
+		return;
+
+	ib_unregister_event_handler(&dev->event_handler);
+	flush_workqueue(mcast_wq);
+
+	for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+		port = &dev->port[i];
+		deref_port(port);
+		wait_for_completion(&port->comp);
+	}
+
+	kfree(dev);
+}
+
+int mcast_init(void)
+{
+	int ret;
+
+	mcast_wq = create_singlethread_workqueue("ib_mcast");
+	if (!mcast_wq)
+		return -ENOMEM;
+
+	ib_sa_register_client(&sa_client);
+
+	ret = ib_register_client(&mcast_client);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	ib_sa_unregister_client(&sa_client);
+	destroy_workqueue(mcast_wq);
+	return ret;
+}
+
+void mcast_cleanup(void)
+{
+	ib_unregister_client(&mcast_client);
+	ib_sa_unregister_client(&sa_client);
+	destroy_workqueue(mcast_wq);
+}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
new file mode 100644
index 0000000000000000000000000000000000000000..24c93fd320fb8716fb605c2c89afb9f9fd17ce6a
--- /dev/null
+++ b/drivers/infiniband/core/sa.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef SA_H
+#define SA_H
+
+#include <rdma/ib_sa.h>
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+	atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+	if (atomic_dec_and_test(&client->users))
+		complete(&client->comp);
+}
+
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+			     struct ib_device *device, u8 port_num,
+			     u8 method,
+			     struct ib_sa_mcmember_rec *rec,
+			     ib_sa_comp_mask comp_mask,
+			     int timeout_ms, gfp_t gfp_mask,
+			     void (*callback)(int status,
+					      struct ib_sa_mcmember_rec *resp,
+					      void *context),
+			     void *context,
+			     struct ib_sa_query **sa_query);
+
+int mcast_init(void);
+void mcast_cleanup(void);
+
+#endif /* SA_H */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index e45afba75341ee86d80464406564c3492366edbd..68db633711c5dfd58711c9018ea11d599a9cc3c0 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -47,8 +47,8 @@
 #include <linux/workqueue.h>
 
 #include <rdma/ib_pack.h>
-#include <rdma/ib_sa.h>
 #include <rdma/ib_cache.h>
+#include "sa.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -425,17 +425,6 @@ void ib_sa_register_client(struct ib_sa_client *client)
 }
 EXPORT_SYMBOL(ib_sa_register_client);
 
-static inline void ib_sa_client_get(struct ib_sa_client *client)
-{
-	atomic_inc(&client->users);
-}
-
-static inline void ib_sa_client_put(struct ib_sa_client *client)
-{
-	if (atomic_dec_and_test(&client->users))
-		complete(&client->comp);
-}
-
 void ib_sa_unregister_client(struct ib_sa_client *client)
 {
 	ib_sa_client_put(client);
@@ -482,6 +471,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 	ah_attr->sl = rec->sl;
 	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
 	ah_attr->port_num = port_num;
+	ah_attr->static_rate = rec->rate;
 
 	if (rec->hop_limit > 1) {
 		ah_attr->ah_flags = IB_AH_GRH;
@@ -901,7 +891,6 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 	kfree(query);
 	return ret;
 }
-EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
@@ -1053,14 +1042,27 @@ static int __init ib_sa_init(void)
 	get_random_bytes(&tid, sizeof tid);
 
 	ret = ib_register_client(&sa_client);
-	if (ret)
+	if (ret) {
 		printk(KERN_ERR "Couldn't register ib_sa client\n");
+		goto err1;
+	}
+
+	ret = mcast_init();
+	if (ret) {
+		printk(KERN_ERR "Couldn't initialize multicast handling\n");
+		goto err2;
+	}
 
+	return 0;
+err2:
+	ib_unregister_client(&sa_client);
+err1:
 	return ret;
 }
 
 static void __exit ib_sa_cleanup(void)
 {
+	mcast_cleanup();
 	ib_unregister_client(&sa_client);
 	idr_destroy(&query_idr);
 }
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 709323c14c5d3fff1fba12f50c6290e0a17d6a4a..000c086bf2e9bf9d6ec0e8d90e7d9c5cec7fc4ec 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -714,8 +714,6 @@ int ib_device_register_sysfs(struct ib_device *device)
 		if (ret)
 			goto err_put;
 	} else {
-		int i;
-
 		for (i = 1; i <= device->phys_port_cnt; ++i) {
 			ret = add_port(device, i);
 			if (ret)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 6b81b98961c7b8014120cfd79cf939f478778e12..b516b93b85505b5a243a88ad00a670defab4c452 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -70,10 +70,24 @@ struct ucma_context {
 	u64			uid;
 
 	struct list_head	list;
+	struct list_head	mc_list;
+};
+
+struct ucma_multicast {
+	struct ucma_context	*ctx;
+	int			id;
+	int			events_reported;
+
+	u64			uid;
+	struct list_head	list;
+	struct sockaddr		addr;
+	u8			pad[sizeof(struct sockaddr_in6) -
+				    sizeof(struct sockaddr)];
 };
 
 struct ucma_event {
 	struct ucma_context	*ctx;
+	struct ucma_multicast	*mc;
 	struct list_head	list;
 	struct rdma_cm_id	*cm_id;
 	struct rdma_ucm_event_resp resp;
@@ -81,6 +95,7 @@ struct ucma_event {
 
 static DEFINE_MUTEX(mut);
 static DEFINE_IDR(ctx_idr);
+static DEFINE_IDR(multicast_idr);
 
 static inline struct ucma_context *_ucma_find_context(int id,
 						      struct ucma_file *file)
@@ -124,6 +139,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 
 	atomic_set(&ctx->ref, 1);
 	init_completion(&ctx->comp);
+	INIT_LIST_HEAD(&ctx->mc_list);
 	ctx->file = file;
 
 	do {
@@ -147,6 +163,37 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
 	return NULL;
 }
 
+static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+{
+	struct ucma_multicast *mc;
+	int ret;
+
+	mc = kzalloc(sizeof(*mc), GFP_KERNEL);
+	if (!mc)
+		return NULL;
+
+	do {
+		ret = idr_pre_get(&multicast_idr, GFP_KERNEL);
+		if (!ret)
+			goto error;
+
+		mutex_lock(&mut);
+		ret = idr_get_new(&multicast_idr, mc, &mc->id);
+		mutex_unlock(&mut);
+	} while (ret == -EAGAIN);
+
+	if (ret)
+		goto error;
+
+	mc->ctx = ctx;
+	list_add_tail(&mc->list, &ctx->mc_list);
+	return mc;
+
+error:
+	kfree(mc);
+	return NULL;
+}
+
 static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
 				 struct rdma_conn_param *src)
 {
@@ -180,8 +227,19 @@ static void ucma_set_event_context(struct ucma_context *ctx,
 				   struct ucma_event *uevent)
 {
 	uevent->ctx = ctx;
-	uevent->resp.uid = ctx->uid;
-	uevent->resp.id = ctx->id;
+	switch (event->event) {
+	case RDMA_CM_EVENT_MULTICAST_JOIN:
+	case RDMA_CM_EVENT_MULTICAST_ERROR:
+		uevent->mc = (struct ucma_multicast *)
+			     event->param.ud.private_data;
+		uevent->resp.uid = uevent->mc->uid;
+		uevent->resp.id = uevent->mc->id;
+		break;
+	default:
+		uevent->resp.uid = ctx->uid;
+		uevent->resp.id = ctx->id;
+		break;
+	}
 }
 
 static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -199,7 +257,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
 	ucma_set_event_context(ctx, event, uevent);
 	uevent->resp.event = event->event;
 	uevent->resp.status = event->status;
-	if (cm_id->ps == RDMA_PS_UDP)
+	if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
 		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
 	else
 		ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -290,6 +348,8 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 
 	list_del(&uevent->list);
 	uevent->ctx->events_reported++;
+	if (uevent->mc)
+		uevent->mc->events_reported++;
 	kfree(uevent);
 done:
 	mutex_unlock(&file->mut);
@@ -342,6 +402,19 @@ static ssize_t ucma_create_id(struct ucma_file *file,
 	return ret;
 }
 
+static void ucma_cleanup_multicast(struct ucma_context *ctx)
+{
+	struct ucma_multicast *mc, *tmp;
+
+	mutex_lock(&mut);
+	list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
+		list_del(&mc->list);
+		idr_remove(&multicast_idr, mc->id);
+		kfree(mc);
+	}
+	mutex_unlock(&mut);
+}
+
 static void ucma_cleanup_events(struct ucma_context *ctx)
 {
 	struct ucma_event *uevent, *tmp;
@@ -360,6 +433,19 @@ static void ucma_cleanup_events(struct ucma_context *ctx)
 	}
 }
 
+static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
+{
+	struct ucma_event *uevent, *tmp;
+
+	list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
+		if (uevent->mc != mc)
+			continue;
+
+		list_del(&uevent->list);
+		kfree(uevent);
+	}
+}
+
 static int ucma_free_ctx(struct ucma_context *ctx)
 {
 	int events_reported;
@@ -367,6 +453,8 @@ static int ucma_free_ctx(struct ucma_context *ctx)
 	/* No new events will be generated after destroying the id. */
 	rdma_destroy_id(ctx->cm_id);
 
+	ucma_cleanup_multicast(ctx);
+
 	/* Cleanup events not yet reported to the user. */
 	mutex_lock(&ctx->file->mut);
 	ucma_cleanup_events(ctx);
@@ -731,6 +819,114 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
 	return ret;
 }
 
+static ssize_t ucma_join_multicast(struct ucma_file *file,
+				   const char __user *inbuf,
+				   int in_len, int out_len)
+{
+	struct rdma_ucm_join_mcast cmd;
+	struct rdma_ucm_create_id_resp resp;
+	struct ucma_context *ctx;
+	struct ucma_multicast *mc;
+	int ret;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	ctx = ucma_get_ctx(file, cmd.id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	mutex_lock(&file->mut);
+	mc = ucma_alloc_multicast(ctx);
+	if (IS_ERR(mc)) {
+		ret = PTR_ERR(mc);
+		goto err1;
+	}
+
+	mc->uid = cmd.uid;
+	memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr);
+	ret = rdma_join_multicast(ctx->cm_id, &mc->addr, mc);
+	if (ret)
+		goto err2;
+
+	resp.id = mc->id;
+	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+			 &resp, sizeof(resp))) {
+		ret = -EFAULT;
+		goto err3;
+	}
+
+	mutex_unlock(&file->mut);
+	ucma_put_ctx(ctx);
+	return 0;
+
+err3:
+	rdma_leave_multicast(ctx->cm_id, &mc->addr);
+	ucma_cleanup_mc_events(mc);
+err2:
+	mutex_lock(&mut);
+	idr_remove(&multicast_idr, mc->id);
+	mutex_unlock(&mut);
+	list_del(&mc->list);
+	kfree(mc);
+err1:
+	mutex_unlock(&file->mut);
+	ucma_put_ctx(ctx);
+	return ret;
+}
+
+static ssize_t ucma_leave_multicast(struct ucma_file *file,
+				    const char __user *inbuf,
+				    int in_len, int out_len)
+{
+	struct rdma_ucm_destroy_id cmd;
+	struct rdma_ucm_destroy_id_resp resp;
+	struct ucma_multicast *mc;
+	int ret = 0;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+		return -EFAULT;
+
+	mutex_lock(&mut);
+	mc = idr_find(&multicast_idr, cmd.id);
+	if (!mc)
+		mc = ERR_PTR(-ENOENT);
+	else if (mc->ctx->file != file)
+		mc = ERR_PTR(-EINVAL);
+	else {
+		idr_remove(&multicast_idr, mc->id);
+		atomic_inc(&mc->ctx->ref);
+	}
+	mutex_unlock(&mut);
+
+	if (IS_ERR(mc)) {
+		ret = PTR_ERR(mc);
+		goto out;
+	}
+
+	rdma_leave_multicast(mc->ctx->cm_id, &mc->addr);
+	mutex_lock(&mc->ctx->file->mut);
+	ucma_cleanup_mc_events(mc);
+	list_del(&mc->list);
+	mutex_unlock(&mc->ctx->file->mut);
+
+	ucma_put_ctx(mc->ctx);
+	resp.events_reported = mc->events_reported;
+	kfree(mc);
+
+	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+			 &resp, sizeof(resp)))
+		ret = -EFAULT;
+out:
+	return ret;
+}
+
 static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
 				   const char __user *inbuf,
 				   int in_len, int out_len) = {
@@ -750,6 +946,8 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
 	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL,
 	[RDMA_USER_CM_CMD_SET_OPTION]	= NULL,
 	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify,
+	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast,
+	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast,
 };
 
 static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 5a7306f5efae8433b2769cc59853ddc49f04b9c4..75f7b16a271d6652a9aac3326c6a56ffc18f5ffe 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 82fa72041989e54695a7bfc2b13279b4ea4fe1f2..114ac3b775dc2fb527222a0c575d782af5d7e736 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 1b97e80b87805d98f7b519f744334d1258c02337..8ab04a7c6f6eec1558546a57c8e2397e920e643c 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 997aa32cbf0794102677b448d3b03599d71e3869..65bf577311aa628af1e980f073e4c4c9268e1383 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
index a6bbe8370d818c8328fbf48ff1e6c34bd93922c0..a2703a3d882d0c2279a215f0d90a6aa63ddb357f 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index 103fc42d6976471a1d4b87ed51321b327d6d3c4c..90d7b8972cb46d21a554393abf88cd5d69730eff 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 4611afa5222003d9f121658116c364175920d4ef..0315c9d9fce931c680f99107c7cfc322f7d86533 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 6517ef85026feaa7b73a8b2c6ec52514b6d0b159..caf4e6007a44cc4aef2e47c08f2ebb5c0db4e875 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index a522b1baa3b40854fcdbca1b166e4052385b6024..e5442e34b788566f4adba514fa4bc332db8f8698 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index 7c810d9042796c7fc87ec38cb1657bb4d94274b7..0c6f281bd4a03726c4875ccb03c75771bde28f7b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 98b3bdb5de9e99872d8762eeed2a1b4131d42415..d7624c170ee73e16a13ef7c35c0366d37faa473a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index a6efa8fe15d8cd000dece3cfaac93c2a86fb8469..54362afbf72f6f410b5224e8c297afb92559a6e3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 2b6cd53bb3fc741344577cd461700c9174637627..a6c2c4ba29e69244b896849b376698a5c2ab7cd2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6861087d776cfecca15ab8e276f84284ede49eb4..2aef122f9955b63cce07d008aee911282cc14a40 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 61e3278fd7a88dd0cb5dc18f86399c0bbe9d7163..2af3e93b607f2a380f13b21148432e95bbd03557 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e066727504b6db81ce4f9bfe406e43251c5b2f10..4dda2f6da2dec84869e0838e5ced54e5f86d3ff2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -846,6 +845,8 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
 			break;
 		case IWCH_QP_STATE_TERMINATE:
 			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+			if (t3b_device(qhp->rhp))
+				cxio_set_wq_in_error(&qhp->wq);
 			if (!internal)
 				terminate = 1;
 			break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
index c4e7fbea8bbdb83d1839905c9d46b8c1772f06e4..cb7086f558c1e769ad66eee48fee14d1bbe97081 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_user.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
index 727b10d896868516e1776938ef5fdbafedd0cba7..1a854598e0e68d01ca4a4ede2bb765cc13e2dac5 100644
--- a/drivers/infiniband/hw/ehca/Kconfig
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -7,11 +7,3 @@ config INFINIBAND_EHCA
 	To compile the driver as a module, choose M here. The module
 	will be called ib_ehca.
 
-config INFINIBAND_EHCA_SCALING
-	bool "Scaling support (EXPERIMENTAL)"
-	depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
-	default y
-	---help---
-	eHCA scaling support schedules the CQ callbacks to different CPUs.
-
-	To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index cf95ee474b0f6a1105551ce69f342168d5c2034e..40404c9e28179558d45feccc83880feb8b4132b5 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -42,8 +42,6 @@
 #ifndef __EHCA_CLASSES_H__
 #define __EHCA_CLASSES_H__
 
-#include "ehca_classes.h"
-#include "ipz_pt_fn.h"
 
 struct ehca_module;
 struct ehca_qp;
@@ -54,14 +52,22 @@ struct ehca_mw;
 struct ehca_pd;
 struct ehca_av;
 
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
 #ifdef CONFIG_PPC64
 #include "ehca_classes_pSeries.h"
 #endif
+#include "ipz_pt_fn.h"
+#include "ehca_qes.h"
+#include "ehca_irq.h"
 
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_user_verbs.h>
+#define EHCA_EQE_CACHE_SIZE 20
 
-#include "ehca_irq.h"
+struct ehca_eqe_cache_entry {
+	struct ehca_eqe *eqe;
+	struct ehca_cq *cq;
+};
 
 struct ehca_eq {
 	u32 length;
@@ -74,6 +80,8 @@ struct ehca_eq {
 	spinlock_t spinlock;
 	struct tasklet_struct interrupt_task;
 	u32 ist;
+	spinlock_t irq_spinlock;
+	struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 };
 
 struct ehca_sport {
@@ -269,6 +277,7 @@ extern struct idr ehca_cq_idr;
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
 extern int ehca_use_hp_mr;
+extern int ehca_scaling_code;
 
 struct ipzu_queue_resp {
 	u32 qe_size;      /* queue entry size */
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 24ceab0bae4a5b9407e6a2b82ff115668830c04d..4961eb88827cd85bb6438a5777c1458297f349e0 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -61,6 +61,7 @@ int ehca_create_eq(struct ehca_shca *shca,
 	struct ib_device *ib_dev = &shca->ib_device;
 
 	spin_lock_init(&eq->spinlock);
+	spin_lock_init(&eq->irq_spinlock);
 	eq->is_initialized = 0;
 
 	if (type != EHCA_EQ && type != EHCA_NEQ) {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index b7be950ab47c3a700737feae370d8bd09a2d2b69..30eb45df9f0b8d8207173c0023b481af89e6a95f 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -162,6 +162,9 @@ int ehca_query_port(struct ib_device *ibdev,
 	props->active_width    = IB_WIDTH_12X;
 	props->active_speed    = 0x1;
 
+	/* at the moment (logical) link state is always LINK_UP */
+	props->phys_state      = 0x5;
+
 query_port1:
 	ehca_free_fw_ctrlblock(rblock);
 
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 6c4f9f91b15df3e10f9e417f3deac51abcfd4077..3ec53c687d0823c2eb0e7d34409f8684d7bdbe5e 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -63,15 +63,11 @@
 #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
 
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-
 static void queue_comp_task(struct ehca_cq *__cq);
 
 static struct ehca_comp_pool* pool;
 static struct notifier_block comp_pool_callback_nb;
 
-#endif
-
 static inline void comp_event_callback(struct ehca_cq *cq)
 {
 	if (!cq->ib_cq.comp_handler)
@@ -206,7 +202,7 @@ static void qp_event_callback(struct ehca_shca *shca,
 }
 
 static void cq_event_callback(struct ehca_shca *shca,
-					  u64 eqe)
+			      u64 eqe)
 {
 	struct ehca_cq *cq;
 	unsigned long flags;
@@ -318,7 +314,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
 			  "disruptive port %x configuration change", port);
 
 		ehca_info(&shca->ib_device,
-			 "port %x is inactive.", port);
+			  "port %x is inactive.", port);
 		event.device = &shca->ib_device;
 		event.event = IB_EVENT_PORT_ERR;
 		event.element.port_num = port;
@@ -326,7 +322,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
 		ib_dispatch_event(&event);
 
 		ehca_info(&shca->ib_device,
-			 "port %x is active.", port);
+			  "port %x is active.", port);
 		event.device = &shca->ib_device;
 		event.event = IB_EVENT_PORT_ACTIVE;
 		event.element.port_num = port;
@@ -401,115 +397,170 @@ irqreturn_t ehca_interrupt_eq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-void ehca_tasklet_eq(unsigned long data)
-{
-	struct ehca_shca *shca = (struct ehca_shca*)data;
-	struct ehca_eqe *eqe;
-	int int_state;
-	int query_cnt = 0;
 
-	do {
-		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
+{
+	u64 eqe_value;
+	u32 token;
+	unsigned long flags;
+	struct ehca_cq *cq;
+	eqe_value = eqe->entry;
+	ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value);
+	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+		ehca_dbg(&shca->ib_device, "... completion event");
+		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		cq = idr_find(&ehca_cq_idr, token);
+		if (cq == NULL) {
+			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+			ehca_err(&shca->ib_device,
+				 "Invalid eqe for non-existing cq token=%x",
+				 token);
+			return;
+		}
+		reset_eq_pending(cq);
+		if (ehca_scaling_code) {
+			queue_comp_task(cq);
+			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		} else {
+			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+			comp_event_callback(cq);
+		}
+	} else {
+		ehca_dbg(&shca->ib_device,
+			 "Got non completion event");
+		parse_identifier(shca, eqe_value);
+	}
+}
 
-		if ((shca->hw_level >= 2) && eqe)
-			int_state = 1;
-		else
-			int_state = 0;
-
-		while ((int_state == 1) || eqe) {
-			while (eqe) {
-				u64 eqe_value = eqe->entry;
-
-				ehca_dbg(&shca->ib_device,
-					 "eqe_value=%lx", eqe_value);
-
-				/* TODO: better structure */
-				if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
-						   eqe_value)) {
-					unsigned long flags;
-					u32 token;
-					struct ehca_cq *cq;
-
-					ehca_dbg(&shca->ib_device,
-						 "... completion event");
-					token =
-						EHCA_BMASK_GET(EQE_CQ_TOKEN,
-							       eqe_value);
-					spin_lock_irqsave(&ehca_cq_idr_lock,
-							  flags);
-					cq = idr_find(&ehca_cq_idr, token);
-
-					if (cq == NULL) {
-						spin_unlock_irqrestore(&ehca_cq_idr_lock,
-								       flags);
-						break;
-					}
-
-					reset_eq_pending(cq);
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
-					queue_comp_task(cq);
-					spin_unlock_irqrestore(&ehca_cq_idr_lock,
-							       flags);
-#else
-					spin_unlock_irqrestore(&ehca_cq_idr_lock,
-							       flags);
-					comp_event_callback(cq);
-#endif
-				} else {
-					ehca_dbg(&shca->ib_device,
-						 "... non completion event");
-					parse_identifier(shca, eqe_value);
-				}
-				eqe =
-					(struct ehca_eqe *)ehca_poll_eq(shca,
-								    &shca->eq);
-			}
+void ehca_process_eq(struct ehca_shca *shca, int is_irq)
+{
+	struct ehca_eq *eq = &shca->eq;
+	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+	u64 eqe_value;
+	unsigned long flags;
+	int eqe_cnt, i;
+	int eq_empty = 0;
+
+	spin_lock_irqsave(&eq->irq_spinlock, flags);
+	if (is_irq) {
+		const int max_query_cnt = 100;
+		int query_cnt = 0;
+		int int_state = 1;
+		do {
+			int_state = hipz_h_query_int_state(
+				shca->ipz_hca_handle, eq->ist);
+			query_cnt++;
+			iosync();
+		} while (int_state && query_cnt < max_query_cnt);
+		if (unlikely((query_cnt == max_query_cnt)))
+			ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x",
+				 int_state, query_cnt);
+	}
 
-			if (shca->hw_level >= 2) {
-				int_state =
-				    hipz_h_query_int_state(shca->ipz_hca_handle,
-							   shca->eq.ist);
-				query_cnt++;
-				iosync();
-				if (query_cnt >= 100) {
-					query_cnt = 0;
-					int_state = 0;
-				}
+	/* read out all eqes */
+	eqe_cnt = 0;
+	do {
+		u32 token;
+		eqe_cache[eqe_cnt].eqe =
+			(struct ehca_eqe *)ehca_poll_eq(shca, eq);
+		if (!eqe_cache[eqe_cnt].eqe)
+			break;
+		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
+		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
+			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
+			spin_lock(&ehca_cq_idr_lock);
+			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+			if (!eqe_cache[eqe_cnt].cq) {
+				spin_unlock(&ehca_cq_idr_lock);
+				ehca_err(&shca->ib_device,
+					 "Invalid eqe for non-existing cq "
+					 "token=%x", token);
+				continue;
 			}
-			eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
-
+			spin_unlock(&ehca_cq_idr_lock);
+		} else
+			eqe_cache[eqe_cnt].cq = NULL;
+		eqe_cnt++;
+	} while (eqe_cnt < EHCA_EQE_CACHE_SIZE);
+	if (!eqe_cnt) {
+		if (is_irq)
+			ehca_dbg(&shca->ib_device,
+				 "No eqe found for irq event");
+		goto unlock_irq_spinlock;
+	} else if (!is_irq)
+		ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt);
+	if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE))
+		ehca_dbg(&shca->ib_device, "too many eqes for one irq event");
+	/* enable irq for new packets */
+	for (i = 0; i < eqe_cnt; i++) {
+		if (eq->eqe_cache[i].cq)
+			reset_eq_pending(eq->eqe_cache[i].cq);
+	}
+	/* check eq */
+	spin_lock(&eq->spinlock);
+	eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue));
+	spin_unlock(&eq->spinlock);
+	/* call completion handler for cached eqes */
+	for (i = 0; i < eqe_cnt; i++)
+		if (eq->eqe_cache[i].cq) {
+			if (ehca_scaling_code) {
+				spin_lock(&ehca_cq_idr_lock);
+				queue_comp_task(eq->eqe_cache[i].cq);
+				spin_unlock(&ehca_cq_idr_lock);
+			} else
+				comp_event_callback(eq->eqe_cache[i].cq);
+		} else {
+			ehca_dbg(&shca->ib_device, "Got non completion event");
+			parse_identifier(shca, eq->eqe_cache[i].eqe->entry);
 		}
-	} while (int_state != 0);
-
-	return;
+	/* poll eq if not empty */
+	if (eq_empty)
+		goto unlock_irq_spinlock;
+	do {
+		struct ehca_eqe *eqe;
+		eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
+		if (!eqe)
+			break;
+		process_eqe(shca, eqe);
+		eqe_cnt++;
+	} while (1);
+
+unlock_irq_spinlock:
+	spin_unlock_irqrestore(&eq->irq_spinlock, flags);
 }
 
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
+void ehca_tasklet_eq(unsigned long data)
+{
+	ehca_process_eq((struct ehca_shca*)data, 1);
+}
 
 static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
 {
-	unsigned long flags_last_cpu;
+	int cpu;
+	unsigned long flags;
 
+	WARN_ON_ONCE(!in_interrupt());
 	if (ehca_debug_level)
 		ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
 
-	spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
-	pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
-	if (pool->last_cpu == NR_CPUS)
-		pool->last_cpu = first_cpu(cpu_online_map);
-	spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
+	spin_lock_irqsave(&pool->last_cpu_lock, flags);
+	cpu = next_cpu(pool->last_cpu, cpu_online_map);
+	if (cpu == NR_CPUS)
+		cpu = first_cpu(cpu_online_map);
+	pool->last_cpu = cpu;
+	spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
 
-	return pool->last_cpu;
+	return cpu;
 }
 
 static void __queue_comp_task(struct ehca_cq *__cq,
 			      struct ehca_cpu_comp_task *cct)
 {
-	unsigned long flags_cct;
-	unsigned long flags_cq;
+	unsigned long flags;
 
-	spin_lock_irqsave(&cct->task_lock, flags_cct);
-	spin_lock_irqsave(&__cq->task_lock, flags_cq);
+	spin_lock_irqsave(&cct->task_lock, flags);
+	spin_lock(&__cq->task_lock);
 
 	if (__cq->nr_callbacks == 0) {
 		__cq->nr_callbacks++;
@@ -520,8 +571,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
 	else
 		__cq->nr_callbacks++;
 
-	spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
-	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+	spin_unlock(&__cq->task_lock);
+	spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static void queue_comp_task(struct ehca_cq *__cq)
@@ -532,69 +583,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
 
 	cpu = get_cpu();
 	cpu_id = find_next_online_cpu(pool);
-
 	BUG_ON(!cpu_online(cpu_id));
 
 	cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+	BUG_ON(!cct);
 
 	if (cct->cq_jobs > 0) {
 		cpu_id = find_next_online_cpu(pool);
 		cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
+		BUG_ON(!cct);
 	}
 
 	__queue_comp_task(__cq, cct);
-
-	put_cpu();
-
-	return;
 }
 
 static void run_comp_task(struct ehca_cpu_comp_task* cct)
 {
 	struct ehca_cq *cq;
-	unsigned long flags_cct;
-	unsigned long flags_cq;
+	unsigned long flags;
 
-	spin_lock_irqsave(&cct->task_lock, flags_cct);
+	spin_lock_irqsave(&cct->task_lock, flags);
 
 	while (!list_empty(&cct->cq_list)) {
 		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
-		spin_unlock_irqrestore(&cct->task_lock, flags_cct);
+		spin_unlock_irqrestore(&cct->task_lock, flags);
 		comp_event_callback(cq);
-		spin_lock_irqsave(&cct->task_lock, flags_cct);
+		spin_lock_irqsave(&cct->task_lock, flags);
 
-		spin_lock_irqsave(&cq->task_lock, flags_cq);
+		spin_lock(&cq->task_lock);
 		cq->nr_callbacks--;
 		if (cq->nr_callbacks == 0) {
 			list_del_init(cct->cq_list.next);
 			cct->cq_jobs--;
 		}
-		spin_unlock_irqrestore(&cq->task_lock, flags_cq);
-
+		spin_unlock(&cq->task_lock);
 	}
 
-	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
-
-	return;
+	spin_unlock_irqrestore(&cct->task_lock, flags);
 }
 
 static int comp_task(void *__cct)
 {
 	struct ehca_cpu_comp_task* cct = __cct;
+	int cql_empty;
 	DECLARE_WAITQUEUE(wait, current);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while(!kthread_should_stop()) {
 		add_wait_queue(&cct->wait_queue, &wait);
 
-		if (list_empty(&cct->cq_list))
+		spin_lock_irq(&cct->task_lock);
+		cql_empty = list_empty(&cct->cq_list);
+		spin_unlock_irq(&cct->task_lock);
+		if (cql_empty)
 			schedule();
 		else
 			__set_current_state(TASK_RUNNING);
 
 		remove_wait_queue(&cct->wait_queue, &wait);
 
-		if (!list_empty(&cct->cq_list))
+		spin_lock_irq(&cct->task_lock);
+		cql_empty = list_empty(&cct->cq_list);
+		spin_unlock_irq(&cct->task_lock);
+		if (!cql_empty)
 			run_comp_task(__cct);
 
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -637,8 +688,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
 
 	if (task)
 		kthread_stop(task);
-
-	return;
 }
 
 static void take_over_work(struct ehca_comp_pool *pool,
@@ -654,11 +703,11 @@ static void take_over_work(struct ehca_comp_pool *pool,
 	list_splice_init(&cct->cq_list, &list);
 
 	while(!list_empty(&list)) {
-	       cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
+		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
 
-	       list_del(&cq->entry);
-	       __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
-						 smp_processor_id()));
+		list_del(&cq->entry);
+		__queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
+						  smp_processor_id()));
 	}
 
 	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
@@ -708,14 +757,14 @@ static int comp_pool_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-#endif
-
 int ehca_create_comp_pool(void)
 {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
 	int cpu;
 	struct task_struct *task;
 
+	if (!ehca_scaling_code)
+		return 0;
+
 	pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
 	if (pool == NULL)
 		return -ENOMEM;
@@ -740,16 +789,19 @@ int ehca_create_comp_pool(void)
 	comp_pool_callback_nb.notifier_call = comp_pool_callback;
 	comp_pool_callback_nb.priority =0;
 	register_cpu_notifier(&comp_pool_callback_nb);
-#endif
+
+	printk(KERN_INFO "eHCA scaling code enabled\n");
 
 	return 0;
 }
 
 void ehca_destroy_comp_pool(void)
 {
-#ifdef CONFIG_INFINIBAND_EHCA_SCALING
 	int i;
 
+	if (!ehca_scaling_code)
+		return;
+
 	unregister_cpu_notifier(&comp_pool_callback_nb);
 
 	for (i = 0; i < NR_CPUS; i++) {
@@ -758,7 +810,4 @@ void ehca_destroy_comp_pool(void)
 	}
 	free_percpu(pool->cpu_comp_tasks);
 	kfree(pool);
-#endif
-
-	return;
 }
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index be579cc0adf632f23283aedf74668a5714a0a2c9..6ed06ee033ed2f326a6674d21701eed3479fa2df 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -56,6 +56,7 @@ void ehca_tasklet_neq(unsigned long data);
 
 irqreturn_t ehca_interrupt_eq(int irq, void *dev_id);
 void ehca_tasklet_eq(unsigned long data);
+void ehca_process_eq(struct ehca_shca *shca, int is_irq);
 
 struct ehca_cpu_comp_task {
 	wait_queue_head_t wait_queue;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 1155bcf48212ef33da22d188a4530d7545e92661..c1835121a82223612195fce09369ba0b58173e4a 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -52,7 +52,7 @@
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0020");
+MODULE_VERSION("SVNEHCA_0021");
 
 int ehca_open_aqp1     = 0;
 int ehca_debug_level   = 0;
@@ -62,6 +62,7 @@ int ehca_use_hp_mr     = 0;
 int ehca_port_act_time = 30;
 int ehca_poll_all_eqs  = 1;
 int ehca_static_rate   = -1;
+int ehca_scaling_code  = 1;
 
 module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
 module_param_named(debug_level,   ehca_debug_level,   int, 0);
@@ -71,6 +72,7 @@ module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
 module_param_named(port_act_time, ehca_port_act_time, int, 0);
 module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
 module_param_named(static_rate,   ehca_static_rate,   int, 0);
+module_param_named(scaling_code,   ehca_scaling_code,   int, 0);
 
 MODULE_PARM_DESC(open_aqp1,
 		 "AQP1 on startup (0: no (default), 1: yes)");
@@ -91,6 +93,8 @@ MODULE_PARM_DESC(poll_all_eqs,
 		 " (0: no, 1: yes (default))");
 MODULE_PARM_DESC(static_rate,
 		 "set permanent static rate (default: disabled)");
+MODULE_PARM_DESC(scaling_code,
+		 "set scaling code (0: disabled, 1: enabled/default)");
 
 spinlock_t ehca_qp_idr_lock;
 spinlock_t ehca_cq_idr_lock;
@@ -432,8 +436,8 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
 
 static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
 {
-	return  snprintf(buf, PAGE_SIZE, "%d\n",
-			 ehca_debug_level);
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			ehca_debug_level);
 }
 
 static ssize_t ehca_store_debug_level(struct device_driver *ddp,
@@ -778,8 +782,24 @@ void ehca_poll_eqs(unsigned long data)
 
 	spin_lock(&shca_list_lock);
 	list_for_each_entry(shca, &shca_list, shca_list) {
-		if (shca->eq.is_initialized)
-			ehca_tasklet_eq((unsigned long)(void*)shca);
+		if (shca->eq.is_initialized) {
+			/* call deadman proc only if eq ptr does not change */
+			struct ehca_eq *eq = &shca->eq;
+			int max = 3;
+			volatile u64 q_ofs, q_ofs2;
+			u64 flags;
+			spin_lock_irqsave(&eq->spinlock, flags);
+			q_ofs = eq->ipz_queue.current_q_offset;
+			spin_unlock_irqrestore(&eq->spinlock, flags);
+			do {
+				spin_lock_irqsave(&eq->spinlock, flags);
+				q_ofs2 = eq->ipz_queue.current_q_offset;
+				spin_unlock_irqrestore(&eq->spinlock, flags);
+				max--;
+			} while (q_ofs == q_ofs2 && max > 0);
+			if (q_ofs == q_ofs2)
+				ehca_process_eq(shca, 0);
+		}
 	}
 	mod_timer(&poll_eqs_timer, jiffies + HZ);
 	spin_unlock(&shca_list_lock);
@@ -790,7 +810,7 @@ int __init ehca_module_init(void)
 	int ret;
 
 	printk(KERN_INFO "eHCA Infiniband Device Driver "
-	                 "(Rel.: SVNEHCA_0020)\n");
+	       "(Rel.: SVNEHCA_0021)\n");
 	idr_init(&ehca_qp_idr);
 	idr_init(&ehca_cq_idr);
 	spin_lock_init(&ehca_qp_idr_lock);
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
index dc3bda2634b7dfcbd5fd3db3e791249c1d0cbe22..8199c45768a321a9c89d54f18d0d03c759a5a7e0 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -79,7 +79,7 @@ static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
 	if (q_offset >= queue->queue_length)
 		return NULL;
 	current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
-	return  &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
+	return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
 }
 
 /*
@@ -247,6 +247,15 @@ static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
 	return ret;
 }
 
+static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue)
+{
+	void *ret = ipz_qeit_get(queue);
+	u32 qe = *(u8 *) ret;
+	if ((qe >> 7) != (queue->toggle_state & 1))
+		return NULL;
+	return ret;
+}
+
 /* returns address (GX) of first queue entry */
 static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
 {
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 6e0f2b8918ce893ce746fba86726e9140185df32..f6f9490408258da1c8e49b09a54a362e48930ff0 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -96,8 +96,8 @@ static void ipath_dma_unmap_page(struct ib_device *dev,
 	BUG_ON(!valid_dma_direction(direction));
 }
 
-int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
-		 enum dma_data_direction direction)
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
+			enum dma_data_direction direction)
 {
 	u64 addr;
 	int i;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 0b9d053a599d2c3a328913aceccafb5721ed5698..48f7c65e9aedb372336ec0db1e299276f94e01e8 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -175,7 +175,9 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
 		if (!ret) {
 			++chunk->npages;
 
-			if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) {
+			if (coherent)
+				++chunk->nsg;
+			else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
 				chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
 							chunk->npages,
 							PCI_DMA_BIDIRECTIONAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 224c93dd29eb7ecd9c56e8df0044aaa5c6609277..71dc84bd425498cd968a33cfa6e005804861474f 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -573,6 +573,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 		goto out;
 	}
 
+	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+		err = 0;
+		goto out;
+	}
+
 	if ((attr_mask & IB_QP_PKEY_INDEX) &&
 	     attr->pkey_index >= dev->limits.pkey_table_len) {
 		mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2d483874a58909ca77d359c1d201067dea7cb450..4d59682f7d4a39084e61361925c6b7dc0041047c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -145,7 +145,7 @@ static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
 	for (; i >= 0; --i)
 		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
 
-	kfree_skb(skb);
+	dev_kfree_skb_any(skb);
 	return -ENOMEM;
 }
 
@@ -1138,7 +1138,7 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 	return -EINVAL;
 }
 
-static DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode);
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
 
 int ipoib_cm_add_mode_attr(struct net_device *dev)
 {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fea737f520fdfeae5a8a7b85765799552406b379..b303ce6bc21ee6664d1471a3a99d2455e11aef2e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -60,14 +60,11 @@ static DEFINE_MUTEX(mcast_mutex);
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
 	struct ib_sa_mcmember_rec mcmember;
+	struct ib_sa_multicast	 *mc;
 	struct ipoib_ah          *ah;
 
 	struct rb_node    rb_node;
 	struct list_head  list;
-	struct completion done;
-
-	int                 query_id;
-	struct ib_sa_query *query;
 
 	unsigned long created;
 	unsigned long backoff;
@@ -299,18 +296,22 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 	return 0;
 }
 
-static void
+static int
 ipoib_mcast_sendonly_join_complete(int status,
-				   struct ib_sa_mcmember_rec *mcmember,
-				   void *mcast_ptr)
+				   struct ib_sa_multicast *multicast)
 {
-	struct ipoib_mcast *mcast = mcast_ptr;
+	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
+	/* We trap for port events ourselves. */
+	if (status == -ENETRESET)
+		return 0;
+
 	if (!status)
-		ipoib_mcast_join_finish(mcast, mcmember);
-	else {
+		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+	if (status) {
 		if (mcast->logcount++ < 20)
 			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for "
 					IPOIB_GID_FMT ", status %d\n",
@@ -325,11 +326,10 @@ ipoib_mcast_sendonly_join_complete(int status,
 		spin_unlock_irq(&priv->tx_lock);
 
 		/* Clear the busy flag so we try again */
-		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-		mcast->query = NULL;
+		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
+					    &mcast->flags);
 	}
-
-	complete(&mcast->done);
+	return status;
 }
 
 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
@@ -359,35 +359,33 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 	rec.port_gid = priv->local_gid;
 	rec.pkey     = cpu_to_be16(priv->pkey);
 
-	init_completion(&mcast->done);
-
-	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
-				     IB_SA_MCMEMBER_REC_MGID		|
-				     IB_SA_MCMEMBER_REC_PORT_GID	|
-				     IB_SA_MCMEMBER_REC_PKEY		|
-				     IB_SA_MCMEMBER_REC_JOIN_STATE,
-				     1000, GFP_ATOMIC,
-				     ipoib_mcast_sendonly_join_complete,
-				     mcast, &mcast->query);
-	if (ret < 0) {
-		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed (ret = %d)\n",
+	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
+					 priv->port, &rec,
+					 IB_SA_MCMEMBER_REC_MGID	|
+					 IB_SA_MCMEMBER_REC_PORT_GID	|
+					 IB_SA_MCMEMBER_REC_PKEY	|
+					 IB_SA_MCMEMBER_REC_JOIN_STATE,
+					 GFP_ATOMIC,
+					 ipoib_mcast_sendonly_join_complete,
+					 mcast);
+	if (IS_ERR(mcast->mc)) {
+		ret = PTR_ERR(mcast->mc);
+		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
 			   ret);
 	} else {
 		ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT
 				", starting join\n",
 				IPOIB_GID_ARG(mcast->mcmember.mgid));
-
-		mcast->query_id = ret;
 	}
 
 	return ret;
 }
 
-static void ipoib_mcast_join_complete(int status,
-				      struct ib_sa_mcmember_rec *mcmember,
-				      void *mcast_ptr)
+static int ipoib_mcast_join_complete(int status,
+				     struct ib_sa_multicast *multicast)
 {
-	struct ipoib_mcast *mcast = mcast_ptr;
+	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -395,24 +393,25 @@ static void ipoib_mcast_join_complete(int status,
 			" (status %d)\n",
 			IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
-	if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
+	/* We trap for port events ourselves. */
+	if (status == -ENETRESET)
+		return 0;
+
+	if (!status)
+		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
+
+	if (!status) {
 		mcast->backoff = 1;
 		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
 					   &priv->mcast_task, 0);
 		mutex_unlock(&mcast_mutex);
-		complete(&mcast->done);
-		return;
-	}
-
-	if (status == -EINTR) {
-		complete(&mcast->done);
-		return;
+		return 0;
 	}
 
-	if (status && mcast->logcount++ < 20) {
-		if (status == -ETIMEDOUT || status == -EINTR) {
+	if (mcast->logcount++ < 20) {
+		if (status == -ETIMEDOUT) {
 			ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT
 					", status %d\n",
 					IPOIB_GID_ARG(mcast->mcmember.mgid),
@@ -429,24 +428,18 @@ static void ipoib_mcast_join_complete(int status,
 	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
 		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-	mutex_lock(&mcast_mutex);
+	/* Clear the busy flag so we try again */
+	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 
+	mutex_lock(&mcast_mutex);
 	spin_lock_irq(&priv->lock);
-	mcast->query = NULL;
-
-	if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
-		if (status == -ETIMEDOUT)
-			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-					   0);
-		else
-			queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-					   mcast->backoff * HZ);
-	} else
-		complete(&mcast->done);
+	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
+		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+				   mcast->backoff * HZ);
 	spin_unlock_irq(&priv->lock);
 	mutex_unlock(&mcast_mutex);
 
-	return;
+	return status;
 }
 
 static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
@@ -495,15 +488,14 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
-	init_completion(&mcast->done);
-
-	ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
-				     &rec, comp_mask, mcast->backoff * 1000,
-				     GFP_ATOMIC, ipoib_mcast_join_complete,
-				     mcast, &mcast->query);
-
-	if (ret < 0) {
-		ipoib_warn(priv, "ib_sa_mcmember_rec_set failed, status %d\n", ret);
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+					 &rec, comp_mask, GFP_KERNEL,
+					 ipoib_mcast_join_complete, mcast);
+	if (IS_ERR(mcast->mc)) {
+		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+		ret = PTR_ERR(mcast->mc);
+		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
 
 		mcast->backoff *= 2;
 		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
@@ -515,8 +507,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 					   &priv->mcast_task,
 					   mcast->backoff * HZ);
 		mutex_unlock(&mcast_mutex);
-	} else
-		mcast->query_id = ret;
+	}
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -541,7 +532,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
 			priv->local_rate = attr.active_speed *
 				ib_width_enum_to_int(attr.active_width);
 		} else
-		ipoib_warn(priv, "ib_query_port failed\n");
+			ipoib_warn(priv, "ib_query_port failed\n");
 	}
 
 	if (!priv->broadcast) {
@@ -568,7 +559,8 @@ void ipoib_mcast_join_task(struct work_struct *work)
 	}
 
 	if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-		ipoib_mcast_join(dev, priv->broadcast, 0);
+		if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+			ipoib_mcast_join(dev, priv->broadcast, 0);
 		return;
 	}
 
@@ -625,26 +617,9 @@ int ipoib_mcast_start_thread(struct net_device *dev)
 	return 0;
 }
 
-static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
-				struct ipoib_mcast *mcast)
-{
-	spin_lock_irq(&priv->lock);
-	if (mcast && mcast->query) {
-		ib_sa_cancel_query(mcast->query_id, mcast->query);
-		mcast->query = NULL;
-		spin_unlock_irq(&priv->lock);
-		ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
-				IPOIB_GID_ARG(mcast->mcmember.mgid));
-		wait_for_completion(&mcast->done);
-	}
-	else
-		spin_unlock_irq(&priv->lock);
-}
-
 int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_mcast *mcast;
 
 	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
@@ -660,52 +635,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 	if (flush)
 		flush_workqueue(ipoib_workqueue);
 
-	wait_for_mcast_join(priv, priv->broadcast);
-
-	list_for_each_entry(mcast, &priv->multicast_list, list)
-		wait_for_mcast_join(priv, mcast);
-
 	return 0;
 }
 
 static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ib_sa_mcmember_rec rec = {
-		.join_state = 1
-	};
 	int ret = 0;
 
-	if (!test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags))
-		return 0;
-
-	ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
-			IPOIB_GID_ARG(mcast->mcmember.mgid));
-
-	rec.mgid     = mcast->mcmember.mgid;
-	rec.port_gid = priv->local_gid;
-	rec.pkey     = cpu_to_be16(priv->pkey);
+	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+		ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n",
+				IPOIB_GID_ARG(mcast->mcmember.mgid));
 
-	/* Remove ourselves from the multicast group */
-	ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
-				 &mcast->mcmember.mgid);
-	if (ret)
-		ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+		/* Remove ourselves from the multicast group */
+		ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
+					 &mcast->mcmember.mgid);
+		if (ret)
+			ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret);
+	}
 
-	/*
-	 * Just make one shot at leaving and don't wait for a reply;
-	 * if we fail, too bad.
-	 */
-	ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
-					IB_SA_MCMEMBER_REC_MGID		|
-					IB_SA_MCMEMBER_REC_PORT_GID	|
-					IB_SA_MCMEMBER_REC_PKEY		|
-					IB_SA_MCMEMBER_REC_JOIN_STATE,
-					0, GFP_ATOMIC, NULL,
-					mcast, &mcast->query);
-	if (ret < 0)
-		ipoib_warn(priv, "ib_sa_mcmember_rec_delete failed "
-			   "for leave (result = %d)\n", ret);
+	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+		ib_sa_free_multicast(mcast->mc);
 
 	return 0;
 }
@@ -758,7 +708,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
 			dev_kfree_skb_any(skb);
 		}
 
-		if (mcast->query)
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
 			ipoib_dbg_mcast(priv, "no address vector, "
 					"but multicast join already started\n");
 		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
@@ -916,7 +866,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 
 	/* We have to cancel outside of the spinlock */
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
-		wait_for_mcast_join(priv, mcast);
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
 	}
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index c094e501286229bbc699bc74534c088235a7a436..c36750ff6ae82caf596aaee36d25de55e9d4316a 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -110,6 +110,12 @@ static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey)
 	dev_addr->broadcast[9] = (unsigned char) pkey;
 }
 
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
 static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 				    union ib_gid *gid)
 {
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 97715b0c20b69f6e50b75b2c03e24fa50758fdb6..5e26b2f53f86e198f1c8893c72d460d624a4c419 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -285,18 +285,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 		       void *context,
 		       struct ib_sa_query **query);
 
-int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
-			     struct ib_device *device, u8 port_num,
-			     u8 method,
-			     struct ib_sa_mcmember_rec *rec,
-			     ib_sa_comp_mask comp_mask,
-			     int timeout_ms, gfp_t gfp_mask,
-			     void (*callback)(int status,
-					      struct ib_sa_mcmember_rec *resp,
-					      void *context),
-			     void *context,
-			     struct ib_sa_query **query);
-
 int ib_sa_service_rec_query(struct ib_sa_client *client,
 			 struct ib_device *device, u8 port_num,
 			 u8 method,
@@ -309,93 +297,82 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 			 void *context,
 			 struct ib_sa_query **sa_query);
 
+struct ib_sa_multicast {
+	struct ib_sa_mcmember_rec rec;
+	ib_sa_comp_mask		comp_mask;
+	int			(*callback)(int status,
+					    struct ib_sa_multicast *multicast);
+	void			*context;
+};
+
 /**
- * ib_sa_mcmember_rec_set - Start an MCMember set query
- * @client:SA client
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:MCMember Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
+ * ib_sa_join_multicast - Initiates a join request to the specified multicast
+ *   group.
+ * @client: SA client
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @rec: SA multicast member record specifying group attributes.
+ * @comp_mask: Component mask indicating which group attributes of %rec are
+ *   valid.
+ * @gfp_mask: GFP mask for memory allocations.
+ * @callback: User callback invoked once the join operation completes.
+ * @context: User specified context stored with the ib_sa_multicast structure.
  *
- * Send an MCMember Set query to the SA (eg to join a multicast
- * group).  The callback function will be called when the query
- * completes (or fails); status is 0 for a successful response, -EINTR
- * if the query is canceled, -ETIMEDOUT is the query timed out, or
- * -EIO if an error occurred sending the query.  The resp parameter of
- * the callback is only valid if status is 0.
+ * This call initiates a multicast join request with the SA for the specified
+ * multicast group.  If the join operation is started successfully, it returns
+ * an ib_sa_multicast structure that is used to track the multicast operation.
+ * Users must free this structure by calling ib_free_multicast, even if the
+ * join operation later fails.  (The callback status is non-zero.)
  *
- * If the return value of ib_sa_mcmember_rec_set() is negative, it is
- * an error code.  Otherwise it is a query ID that can be used to
- * cancel the query.
+ * If the join operation fails; status will be non-zero, with the following
+ * failures possible:
+ * -ETIMEDOUT: The request timed out.
+ * -EIO: An error occurred sending the query.
+ * -EINVAL: The MCMemberRecord values differed from the existing group's.
+ * -ENETRESET: Indicates that an fatal error has occurred on the multicast
+ *   group, and the user must rejoin the group to continue using it.
  */
-static inline int
-ib_sa_mcmember_rec_set(struct ib_sa_client *client,
-		       struct ib_device *device, u8 port_num,
-		       struct ib_sa_mcmember_rec *rec,
-		       ib_sa_comp_mask comp_mask,
-		       int timeout_ms, gfp_t gfp_mask,
-		       void (*callback)(int status,
-					struct ib_sa_mcmember_rec *resp,
-					void *context),
-		       void *context,
-		       struct ib_sa_query **query)
-{
-	return ib_sa_mcmember_rec_query(client, device, port_num,
-					IB_MGMT_METHOD_SET,
-					rec, comp_mask,
-					timeout_ms, gfp_mask, callback,
-					context, query);
-}
+struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
+					     struct ib_device *device, u8 port_num,
+					     struct ib_sa_mcmember_rec *rec,
+					     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+					     int (*callback)(int status,
+							     struct ib_sa_multicast
+								    *multicast),
+					     void *context);
 
 /**
- * ib_sa_mcmember_rec_delete - Start an MCMember delete query
- * @client:SA client
- * @device:device to send query on
- * @port_num: port number to send query on
- * @rec:MCMember Record to send in query
- * @comp_mask:component mask to send in query
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when query completes, times out or is
- * canceled
- * @context:opaque user context passed to callback
- * @sa_query:query context, used to cancel query
- *
- * Send an MCMember Delete query to the SA (eg to leave a multicast
- * group).  The callback function will be called when the query
- * completes (or fails); status is 0 for a successful response, -EINTR
- * if the query is canceled, -ETIMEDOUT is the query timed out, or
- * -EIO if an error occurred sending the query.  The resp parameter of
- * the callback is only valid if status is 0.
+ * ib_free_multicast - Frees the multicast tracking structure, and releases
+ *    any reference on the multicast group.
+ * @multicast: Multicast tracking structure allocated by ib_join_multicast.
  *
- * If the return value of ib_sa_mcmember_rec_delete() is negative, it
- * is an error code.  Otherwise it is a query ID that can be used to
- * cancel the query.
+ * This call blocks until the multicast identifier is destroyed.  It may
+ * not be called from within the multicast callback; however, returning a non-
+ * zero value from the callback will result in destroying the multicast
+ * tracking structure.
+ */
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
+
+/**
+ * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and
+ *   returns it if found.
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @mgid: MGID of multicast group.
+ * @rec: Location to copy SA multicast member record.
  */
-static inline int
-ib_sa_mcmember_rec_delete(struct ib_sa_client *client,
-			  struct ib_device *device, u8 port_num,
-			  struct ib_sa_mcmember_rec *rec,
-			  ib_sa_comp_mask comp_mask,
-			  int timeout_ms, gfp_t gfp_mask,
-			  void (*callback)(int status,
-					   struct ib_sa_mcmember_rec *resp,
-					   void *context),
-			  void *context,
-			  struct ib_sa_query **query)
-{
-	return ib_sa_mcmember_rec_query(client, device, port_num,
-					IB_SA_METHOD_DELETE,
-					rec, comp_mask,
-					timeout_ms, gfp_mask, callback,
-					context, query);
-}
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
+
+/**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on
+ * an SA multicast member record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct ib_ah_attr *ah_attr);
 
 /**
  * ib_init_ah_from_path - Initialize address handle attributes based on an SA
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 36cd8a8526a06f39b40af1b5106967ec67394f97..2d6a7705eae7fbafbbaab4a8a9d8d6c9fde8ceac 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -52,10 +52,13 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_ESTABLISHED,
 	RDMA_CM_EVENT_DISCONNECTED,
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
+	RDMA_CM_EVENT_MULTICAST_JOIN,
+	RDMA_CM_EVENT_MULTICAST_ERROR
 };
 
 enum rdma_port_space {
 	RDMA_PS_SDP  = 0x0001,
+	RDMA_PS_IPOIB= 0x0002,
 	RDMA_PS_TCP  = 0x0106,
 	RDMA_PS_UDP  = 0x0111,
 	RDMA_PS_SCTP = 0x0183
@@ -294,5 +297,21 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
  */
 int rdma_disconnect(struct rdma_cm_id *id);
 
-#endif /* RDMA_CM_H */
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ *   address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			void *context);
 
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ *   address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
index 9b176df1d66706d2eef11daff8409a93464365fa..950424b38f1605b17adc6d41a961e7f629cf0c01 100644
--- a/include/rdma/rdma_cm_ib.h
+++ b/include/rdma/rdma_cm_ib.h
@@ -44,7 +44,7 @@
 int rdma_set_ib_paths(struct rdma_cm_id *id,
 		      struct ib_sa_path_rec *path_rec, int num_paths);
 
-/* Global qkey for UD QPs and multicast groups. */
-#define RDMA_UD_QKEY 0x01234567
+/* Global qkey for UDP QPs and multicast groups. */
+#define RDMA_UDP_QKEY 0x01234567
 
 #endif /* RDMA_CM_IB_H */
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index 9572ab8eeac19e74fbd393c4b34d90fca23dc8ab..f632b0c007c9808eb0f50e60bdab3b30c0775e38 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -38,7 +38,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_user_sa.h>
 
-#define RDMA_USER_CM_ABI_VERSION	3
+#define RDMA_USER_CM_ABI_VERSION	4
 
 #define RDMA_MAX_PRIVATE_DATA		256
 
@@ -58,7 +58,9 @@ enum {
 	RDMA_USER_CM_CMD_GET_EVENT,
 	RDMA_USER_CM_CMD_GET_OPTION,
 	RDMA_USER_CM_CMD_SET_OPTION,
-	RDMA_USER_CM_CMD_NOTIFY
+	RDMA_USER_CM_CMD_NOTIFY,
+	RDMA_USER_CM_CMD_JOIN_MCAST,
+	RDMA_USER_CM_CMD_LEAVE_MCAST
 };
 
 /*
@@ -188,6 +190,13 @@ struct rdma_ucm_notify {
 	__u32 event;
 };
 
+struct rdma_ucm_join_mcast {
+	__u64 response;		/* rdma_ucm_create_id_resp */
+	__u64 uid;
+	struct sockaddr_in6 addr;
+	__u32 id;
+};
+
 struct rdma_ucm_get_event {
 	__u64 response;
 };