From: John Feeney <jfeeney@redhat.com> Date: Tue, 21 Apr 2009 19:02:01 -0400 Subject: [misc] I/O AT: update network changes Message-id: 49EE5069.80603@redhat.com O-Subject: [RHEL5.4 PATCH 3/8] Update I/O AT: network changes Bugzilla: 436048 RH-Acked-by: Prarit Bhargava <prarit@redhat.com> RH-Acked-by: John W. Linville <linville@redhat.com> bz436048 Update I/O AT code to upstream This patch changes to network stack to allow for the new dmaengine v3. core/dev.c | 116 +++++++++++++++++++++++++++++++++++++++----------------- core/user_dma.c | 6 +- ipv4/tcp.c | 9 ++-- 3 files changed, 90 insertions(+), 41 deletions(-) Note: Function names were changed because original dmaengine functions changed parameters. The commits to each file and any modifications are listed below. net/ipv4/tcp.c function name change to “_v3”, not upstream dma_pin_iovec_pages_v3() dma_async_memcpy_issue_pending_v3() dma_async_memcpy_complete_v3() dma_unpin_iovec_pages_v3() net/core/user_dma.c function name change to “_v3”, not upstream dma_memcpy_to_iovec_v3() dma_memcpy_pg_to_iovec_v3() plus these commits: de5506e155276d385712c2aa1c2d9a27cd4ed947 Leech 6/18/06 already in rhel5 16a37acaaf4aaa631ba3f83710ed6cdb1a597520 Sosnowski 7/23/08 5de22343b2303b278ab562e5d166ffe306566d30 Sosnowske 03/4/09 net/core/dev.c function name change to “_v3”, not upstream .event_callback_v3 dma_async_memcpy_issue_pending() dma_async_client_register_v3() dma_async_client_chan_register_v3() plus: d379b01e9087a582d58f4b678208a4f8d8376fe7 Williams 7/9/07 diff --git a/net/core/dev.c b/net/core/dev.c index 56829cc..4bda12b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -157,9 +157,24 @@ static struct list_head ptype_base[16]; /* 16 way hashed list */ static struct list_head ptype_all; /* Taps */ #ifdef CONFIG_NET_DMA -static struct dma_client *net_dma_client; -static unsigned int net_dma_count; -static spinlock_t net_dma_event_lock; +struct net_dma { + struct dma_client client; + spinlock_t lock; + cpumask_t channel_mask; + struct dma_chan *channels[NR_CPUS]; +}; + +static enum dma_state_client + netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state); + +static struct net_dma net_dma = { + .client = { + .event_callback_v3 = netdev_dma_event, + }, +}; + + #endif /* @@ -2000,12 +2015,13 @@ out: * There may not be any more sk_buffs coming right now, so push * any pending DMA copies to hardware */ - if (net_dma_client) { - struct dma_chan *chan; - rcu_read_lock(); - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) - dma_async_memcpy_issue_pending(chan); - rcu_read_unlock(); + if (!cpus_empty(net_dma.channel_mask)) { + int chan_idx; + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { + struct dma_chan *chan = net_dma.channels[chan_idx]; + if (chan) + dma_async_memcpy_issue_pending_v3(chan); + } } #endif local_irq_enable(); @@ -3472,12 +3488,12 @@ static int dev_cpu_callback(struct notifier_block *nfb, * This is called when the number of channels allocated to the net_dma_client * changes. The net_dma_client tries to have one DMA channel per CPU. */ -static void net_dma_rebalance(void) +static void net_dma_rebalance(struct net_dma *net_dma) { - unsigned int cpu, i, n; + unsigned int cpu, i, n, chan_idx; struct dma_chan *chan; - if (net_dma_count == 0) { + if (cpus_empty(net_dma->channel_mask)) { for_each_online_cpu(cpu) rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; @@ -3486,10 +3502,12 @@ static void net_dma_rebalance(void) i = 0; cpu = first_cpu(cpu_online_map); - rcu_read_lock(); - list_for_each_entry(chan, &net_dma_client->channels, client_node) { - n = ((num_online_cpus() / net_dma_count) - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { + chan = net_dma->channels[chan_idx]; + + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) + + (i < (num_online_cpus() % + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); while(n) { per_cpu(softnet_data, cpu).net_dma = chan; @@ -3498,7 +3516,6 @@ static void net_dma_rebalance(void) } i++; } - rcu_read_unlock(); } /** @@ -3507,23 +3524,54 @@ static void net_dma_rebalance(void) * @chan: DMA channel for the event * @event: event type */ -static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, - enum dma_event event) -{ - spin_lock(&net_dma_event_lock); - switch (event) { - case DMA_RESOURCE_ADDED: - net_dma_count++; - net_dma_rebalance(); +static enum dma_state_client + netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + int i, found = 0, pos = -1; + struct net_dma *net_dma = + container_of(client, struct net_dma, client); + enum dma_state_client ack = DMA_DUP; /* default: take no action */ + + spin_lock(&net_dma->lock); + switch (state) { + case DMA_STATE_RESOURCE_AVAILABLE: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + break; + } else if (net_dma->channels[i] == NULL && pos < 0) + pos = i; + + if (!found && pos >= 0) { + ack = DMA_ACK; + net_dma->channels[pos] = chan; + cpu_set(pos, net_dma->channel_mask); + net_dma_rebalance(net_dma); + } + break; - case DMA_RESOURCE_REMOVED: - net_dma_count--; - net_dma_rebalance(); + case DMA_STATE_RESOURCE_REMOVED: + for (i = 0; i < NR_CPUS; i++) + if (net_dma->channels[i] == chan) { + found = 1; + pos = i; + break; + } + + if (found) { + ack = DMA_ACK; + cpu_clear(pos, net_dma->channel_mask); + net_dma->channels[i] = NULL; + net_dma_rebalance(net_dma); + } break; default: break; } - spin_unlock(&net_dma_event_lock); + spin_unlock(&net_dma->lock); + + return(ack); } /** @@ -3531,12 +3579,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { - spin_lock_init(&net_dma_event_lock); - net_dma_client = dma_async_client_register(netdev_dma_event); - if (net_dma_client == NULL) - return -ENOMEM; - - dma_async_client_chan_request(net_dma_client, num_online_cpus()); + spin_lock_init(&net_dma.lock); + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); + dma_async_client_register_v3(&net_dma.client); + dma_async_client_chan_request_v3(&net_dma.client); return 0; } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 248a6b6..b6b6569 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -34,6 +34,7 @@ #define NET_DMA_DEFAULT_COPYBREAK 4096 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); /** * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. @@ -57,7 +58,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, if (copy > 0) { if (copy > len) copy = len; - cookie = dma_memcpy_to_iovec(chan, to, pinned_list, + cookie = dma_memcpy_to_iovec_v3(chan, to, pinned_list, skb->data + offset, copy); if (cookie < 0) goto fault; @@ -82,7 +83,8 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, if (copy > len) copy = len; - cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page, + cookie = dma_memcpy_pg_to_iovec_v3(chan, to, + pinned_list, page, frag->page_offset + offset - start, copy); if (cookie < 0) goto fault; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index deff959..567fd5d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1147,7 +1147,8 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = - dma_pin_iovec_pages(msg->msg_iov, len); + dma_pin_iovec_pages_v3(msg->msg_iov, + len); } else { preempt_enable_no_resched(); } @@ -1444,9 +1445,9 @@ skip_copy: if (tp->ucopy.dma_chan) { dma_cookie_t done, used; - dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); + dma_async_memcpy_issue_pending_v3(tp->ucopy.dma_chan); - while (dma_async_memcpy_complete(tp->ucopy.dma_chan, + while (dma_async_memcpy_complete_v3(tp->ucopy.dma_chan, tp->ucopy.dma_cookie, &done, &used) == DMA_IN_PROGRESS) { /* do partial cleanup of sk_async_wait_queue */ @@ -1464,7 +1465,7 @@ skip_copy: tp->ucopy.dma_chan = NULL; } if (tp->ucopy.pinned_list) { - dma_unpin_iovec_pages(tp->ucopy.pinned_list); + dma_unpin_iovec_pages_v3(tp->ucopy.pinned_list); tp->ucopy.pinned_list = NULL; } #endif