tcp: tcp_sendpages() should call tcp_push() once
[linux-flexiantxendom0-3.2.10.git] / net / ipv4 / tcp.c
index a09fe25..5d54ed3 100644 (file)
  *     TCP_CLOSE               socket is finished
  */
 
+#define pr_fmt(fmt) "TCP: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -282,11 +284,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
-long sysctl_tcp_mem[3] __read_mostly;
 int sysctl_tcp_wmem[3] __read_mostly;
 int sysctl_tcp_rmem[3] __read_mostly;
 
-EXPORT_SYMBOL(sysctl_tcp_mem);
 EXPORT_SYMBOL(sysctl_tcp_rmem);
 EXPORT_SYMBOL(sysctl_tcp_wmem);
 
@@ -860,7 +860,7 @@ wait_for_memory:
        }
 
 out:
-       if (copied)
+       if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
                tcp_push(sk, flags, mss_now, tp->nonagle);
        return copied;
 
@@ -888,9 +888,6 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
-#define TCP_PAGE(sk)   (sk->sk_sndmsg_page)
-#define TCP_OFF(sk)    (sk->sk_sndmsg_off)
-
 static inline int select_size(const struct sock *sk, bool sg)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
@@ -1008,13 +1005,13 @@ new_segment:
                        } else {
                                int merge = 0;
                                int i = skb_shinfo(skb)->nr_frags;
-                               struct page *page = TCP_PAGE(sk);
+                               struct page *page = sk->sk_sndmsg_page;
                                int off;
 
                                if (page && page_count(page) == 1)
-                                       TCP_OFF(sk) = 0;
+                                       sk->sk_sndmsg_off = 0;
 
-                               off = TCP_OFF(sk);
+                               off = sk->sk_sndmsg_off;
 
                                if (skb_can_coalesce(skb, i, page, off) &&
                                    off != PAGE_SIZE) {
@@ -1031,7 +1028,7 @@ new_segment:
                                } else if (page) {
                                        if (off == PAGE_SIZE) {
                                                put_page(page);
-                                               TCP_PAGE(sk) = page = NULL;
+                                               sk->sk_sndmsg_page = page = NULL;
                                                off = 0;
                                        }
                                } else
@@ -1057,9 +1054,9 @@ new_segment:
                                        /* If this page was new, give it to the
                                         * socket so it does not get leaked.
                                         */
-                                       if (!TCP_PAGE(sk)) {
-                                               TCP_PAGE(sk) = page;
-                                               TCP_OFF(sk) = 0;
+                                       if (!sk->sk_sndmsg_page) {
+                                               sk->sk_sndmsg_page = page;
+                                               sk->sk_sndmsg_off = 0;
                                        }
                                        goto do_error;
                                }
@@ -1069,15 +1066,15 @@ new_segment:
                                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                                } else {
                                        skb_fill_page_desc(skb, i, page, off, copy);
-                                       if (TCP_PAGE(sk)) {
+                                       if (sk->sk_sndmsg_page) {
                                                get_page(page);
                                        } else if (off + copy < PAGE_SIZE) {
                                                get_page(page);
-                                               TCP_PAGE(sk) = page;
+                                               sk->sk_sndmsg_page = page;
                                        }
                                }
 
-                               TCP_OFF(sk) = off + copy;
+                               sk->sk_sndmsg_off = off + copy;
                        }
 
                        if (!copied)
@@ -1680,7 +1677,8 @@ do_prequeue:
 
                                if (tp->ucopy.dma_cookie < 0) {
 
-                                       printk(KERN_ALERT "dma_cookie < 0\n");
+                                       pr_alert("%s: dma_cookie < 0\n",
+                                                __func__);
 
                                        /* Exception. Bailout! */
                                        if (!copied)
@@ -1881,6 +1879,20 @@ void tcp_shutdown(struct sock *sk, int how)
 }
 EXPORT_SYMBOL(tcp_shutdown);
 
+bool tcp_check_oom(struct sock *sk, int shift)
+{
+       bool too_many_orphans, out_of_socket_memory;
+
+       too_many_orphans = tcp_too_many_orphans(sk, shift);
+       out_of_socket_memory = tcp_out_of_memory(sk);
+
+       if (too_many_orphans && net_ratelimit())
+               pr_info("too many orphaned sockets\n");
+       if (out_of_socket_memory && net_ratelimit())
+               pr_info("out of memory -- consider tuning tcp_mem\n");
+       return too_many_orphans || out_of_socket_memory;
+}
+
 void tcp_close(struct sock *sk, long timeout)
 {
        struct sk_buff *skb;
@@ -2020,10 +2032,7 @@ adjudge_to_death:
        }
        if (sk->sk_state != TCP_CLOSE) {
                sk_mem_reclaim(sk);
-               if (tcp_too_many_orphans(sk, 0)) {
-                       if (net_ratelimit())
-                               printk(KERN_INFO "TCP: too many of orphaned "
-                                      "sockets\n");
+               if (tcp_check_oom(sk, 0)) {
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
                        NET_INC_STATS_BH(sock_net(sk),
@@ -3221,11 +3230,21 @@ static int __init set_thash_entries(char *str)
 }
 __setup("thash_entries=", set_thash_entries);
 
+void tcp_init_mem(struct net *net)
+{
+       unsigned long limit = nr_free_buffer_pages() / 8;
+       limit = max(limit, 128UL);
+       net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
+       net->ipv4.sysctl_tcp_mem[1] = limit;
+       net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
+}
+
 void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-       int i, max_share, cnt;
+       int max_share, cnt;
+       unsigned int i;
        unsigned long jiffy = jiffies;
 
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
@@ -3268,7 +3287,7 @@ void __init tcp_init(void)
                                        &tcp_hashinfo.bhash_size,
                                        NULL,
                                        64 * 1024);
-       tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
+       tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
        for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
                spin_lock_init(&tcp_hashinfo.bhash[i].lock);
                INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
@@ -3281,14 +3300,10 @@ void __init tcp_init(void)
        sysctl_tcp_max_orphans = cnt / 2;
        sysctl_max_syn_backlog = max(128, cnt / 256);
 
-       limit = nr_free_buffer_pages() / 8;
-       limit = max(limit, 128UL);
-       sysctl_tcp_mem[0] = limit / 4 * 3;
-       sysctl_tcp_mem[1] = limit;
-       sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
-
+       tcp_init_mem(&init_net);
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
-       limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
+       limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
+       limit = max(limit, 128UL);
        max_share = min(4UL*1024*1024, limit);
 
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
@@ -3299,9 +3314,8 @@ void __init tcp_init(void)
        sysctl_tcp_rmem[1] = 87380;
        sysctl_tcp_rmem[2] = max(87380, max_share);
 
-       printk(KERN_INFO "TCP: Hash tables configured "
-              "(established %u bind %u)\n",
-              tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
+       pr_info("Hash tables configured (established %u bind %u)\n",
+               tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
 
        tcp_register_congestion_control(&tcp_reno);