diff --git a/kernel.spec b/kernel.spec index 457c2bc..a00c5a2 100644 --- a/kernel.spec +++ b/kernel.spec @@ -62,7 +62,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 101 +%global baserelease 100 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -74,7 +74,7 @@ Summary: The Linux kernel %if 0%{?released_kernel} # Do we have a -stable update to apply? -%define stable_update 6 +%define stable_update 7 # Is it a -stable RC? %define stable_rc 0 # Set rpm version accordingly @@ -786,9 +786,6 @@ Patch25133: fix-buslogic.patch #rhbz 1023413 Patch25135: alps-Support-for-Dell-XT2-model.patch -#CVE-2013-4470 rhbz 1023477 1023495 -Patch25136: net_311.mbox - #rhbz 1011621 Patch25137: cifs-Allow-LANMAN-auth-for-unencapsulated-auth-methods.patch @@ -1525,9 +1522,6 @@ ApplyPatch fix-buslogic.patch #rhbz 1023413 ApplyPatch alps-Support-for-Dell-XT2-model.patch -#CVE-2013-4470 rhbz 1023477 1023495 -ApplyPatch net_311.mbox - #rhbz 1011621 ApplyPatch cifs-Allow-LANMAN-auth-for-unencapsulated-auth-methods.patch @@ -2382,6 +2376,9 @@ fi # ||----w | # || || %changelog +* Mon Nov 04 2013 Justin M. Forbes - 3.11.7-100 +- Linux v3.11.7 + * Fri Nov 01 2013 Josh Boyer - 3.11.6-101 - Revert blocking patches causing systemd to crash on resume (rhbz 1010603) - CVE-2013-4348 net: deadloop path in skb_flow_dissect (rhbz 1007939 1025647) diff --git a/net_311.mbox b/net_311.mbox deleted file mode 100644 index d420777..0000000 --- a/net_311.mbox +++ /dev/null @@ -1,3794 +0,0 @@ -From 5444e381f5784d32d741864312909d2a6afe428e Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Tue, 27 Aug 2013 05:46:32 -0700 -Subject: [PATCH 01/47] tcp: TSO packets automatic sizing - -[ Upstream commits 6d36824e730f247b602c90e8715a792003e3c5a7, - 02cf4ebd82ff0ac7254b88e466820a290ed8289a, and parts of - 7eec4174ff29cd42f2acfae8112f51c228545d40 ] - -After hearing many people over past years complaining against TSO being -bursty or even buggy, we are proud to present automatic sizing of TSO -packets. - -One part of the problem is that tcp_tso_should_defer() uses an heuristic -relying on upcoming ACKS instead of a timer, but more generally, having -big TSO packets makes little sense for low rates, as it tends to create -micro bursts on the network, and general consensus is to reduce the -buffering amount. - -This patch introduces a per socket sk_pacing_rate, that approximates -the current sending rate, and allows us to size the TSO packets so -that we try to send one packet every ms. - -This field could be set by other transports. - -Patch has no impact for high speed flows, where having large TSO packets -makes sense to reach line rate. - -For other flows, this helps better packet scheduling and ACK clocking. - -This patch increases performance of TCP flows in lossy environments. - -A new sysctl (tcp_min_tso_segs) is added, to specify the -minimal size of a TSO packet (default being 2). - -A follow-up patch will provide a new packet scheduler (FQ), using -sk_pacing_rate as an input to perform optional per flow pacing. - -This explains why we chose to set sk_pacing_rate to twice the current -rate, allowing 'slow start' ramp up. - -sk_pacing_rate = 2 * cwnd * mss / srtt - -v2: Neal Cardwell reported a suspect deferring of last two segments on -initial write of 10 MSS, I had to change tcp_tso_should_defer() to take -into account tp->xmit_size_goal_segs - -Signed-off-by: Eric Dumazet -Cc: Neal Cardwell -Cc: Yuchung Cheng -Cc: Van Jacobson -Cc: Tom Herbert -Acked-by: Yuchung Cheng -Acked-by: Neal Cardwell -Signed-off-by: David S. Miller ---- - Documentation/networking/ip-sysctl.txt | 9 +++++++++ - include/net/sock.h | 2 ++ - include/net/tcp.h | 1 + - net/core/sock.c | 1 + - net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ - net/ipv4/tcp.c | 28 +++++++++++++++++++++++----- - net/ipv4/tcp_input.c | 34 +++++++++++++++++++++++++++++++++- - net/ipv4/tcp_output.c | 2 +- - 8 files changed, 80 insertions(+), 7 deletions(-) - -diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt -index 1074290..b522883 100644 ---- a/Documentation/networking/ip-sysctl.txt -+++ b/Documentation/networking/ip-sysctl.txt -@@ -478,6 +478,15 @@ tcp_syn_retries - INTEGER - tcp_timestamps - BOOLEAN - Enable timestamps as defined in RFC1323. - -+tcp_min_tso_segs - INTEGER -+ Minimal number of segments per TSO frame. -+ Since linux-3.12, TCP does an automatic sizing of TSO frames, -+ depending on flow rate, instead of filling 64Kbytes packets. -+ For specific usages, it's possible to force TCP to build big -+ TSO frames. Note that TCP stack might split too big TSO packets -+ if available window is too small. -+ Default: 2 -+ - tcp_tso_win_divisor - INTEGER - This allows control over what percentage of the congestion window - can be consumed by a single TSO frame. -diff --git a/include/net/sock.h b/include/net/sock.h -index 31d5cfb..04e148f 100644 ---- a/include/net/sock.h -+++ b/include/net/sock.h -@@ -232,6 +232,7 @@ struct cg_proto; - * @sk_napi_id: id of the last napi context to receive data for sk - * @sk_ll_usec: usecs to busypoll when there is no data - * @sk_allocation: allocation mode -+ * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) - * @sk_sndbuf: size of send buffer in bytes - * @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, - * %SO_OOBINLINE settings, %SO_TIMESTAMPING settings -@@ -361,6 +362,7 @@ struct sock { - kmemcheck_bitfield_end(flags); - int sk_wmem_queued; - gfp_t sk_allocation; -+ u32 sk_pacing_rate; /* bytes per second */ - netdev_features_t sk_route_caps; - netdev_features_t sk_route_nocaps; - int sk_gso_type; -diff --git a/include/net/tcp.h b/include/net/tcp.h -index d198005..46cb8a4 100644 ---- a/include/net/tcp.h -+++ b/include/net/tcp.h -@@ -284,6 +284,7 @@ extern int sysctl_tcp_thin_dupack; - extern int sysctl_tcp_early_retrans; - extern int sysctl_tcp_limit_output_bytes; - extern int sysctl_tcp_challenge_ack_limit; -+extern int sysctl_tcp_min_tso_segs; - - extern atomic_long_t tcp_memory_allocated; - extern struct percpu_counter tcp_sockets_allocated; -diff --git a/net/core/sock.c b/net/core/sock.c -index 2c097c5..8729d91 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -2297,6 +2297,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) - sk->sk_ll_usec = sysctl_net_busy_read; - #endif - -+ sk->sk_pacing_rate = ~0U; - /* - * Before updating sk_refcnt, we must commit prior changes to memory - * (Documentation/RCU/rculist_nulls.txt for details) -diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c -index 610e324..6900b8b 100644 ---- a/net/ipv4/sysctl_net_ipv4.c -+++ b/net/ipv4/sysctl_net_ipv4.c -@@ -29,6 +29,7 @@ - static int zero; - static int one = 1; - static int four = 4; -+static int gso_max_segs = GSO_MAX_SEGS; - static int tcp_retr1_max = 255; - static int ip_local_port_range_min[] = { 1, 1 }; - static int ip_local_port_range_max[] = { 65535, 65535 }; -@@ -754,6 +755,15 @@ static struct ctl_table ipv4_table[] = { - .extra2 = &four, - }, - { -+ .procname = "tcp_min_tso_segs", -+ .data = &sysctl_tcp_min_tso_segs, -+ .maxlen = sizeof(int), -+ .mode = 0644, -+ .proc_handler = proc_dointvec_minmax, -+ .extra1 = &zero, -+ .extra2 = &gso_max_segs, -+ }, -+ { - .procname = "udp_mem", - .data = &sysctl_udp_mem, - .maxlen = sizeof(sysctl_udp_mem), -diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c -index 95544e4..ec586e5 100644 ---- a/net/ipv4/tcp.c -+++ b/net/ipv4/tcp.c -@@ -283,6 +283,8 @@ - - int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; - -+int sysctl_tcp_min_tso_segs __read_mostly = 2; -+ - struct percpu_counter tcp_orphan_count; - EXPORT_SYMBOL_GPL(tcp_orphan_count); - -@@ -789,12 +791,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, - xmit_size_goal = mss_now; - - if (large_allowed && sk_can_gso(sk)) { -- xmit_size_goal = ((sk->sk_gso_max_size - 1) - -- inet_csk(sk)->icsk_af_ops->net_header_len - -- inet_csk(sk)->icsk_ext_hdr_len - -- tp->tcp_header_len); -+ u32 gso_size, hlen; -+ -+ /* Maybe we should/could use sk->sk_prot->max_header here ? */ -+ hlen = inet_csk(sk)->icsk_af_ops->net_header_len + -+ inet_csk(sk)->icsk_ext_hdr_len + -+ tp->tcp_header_len; -+ -+ /* Goal is to send at least one packet per ms, -+ * not one big TSO packet every 100 ms. -+ * This preserves ACK clocking and is consistent -+ * with tcp_tso_should_defer() heuristic. -+ */ -+ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); -+ gso_size = max_t(u32, gso_size, -+ sysctl_tcp_min_tso_segs * mss_now); -+ -+ xmit_size_goal = min_t(u32, gso_size, -+ sk->sk_gso_max_size - 1 - hlen); - -- /* TSQ : try to have two TSO segments in flight */ -+ /* TSQ : try to have at least two segments in flight -+ * (one in NIC TX ring, another in Qdisc) -+ */ - xmit_size_goal = min_t(u32, xmit_size_goal, - sysctl_tcp_limit_output_bytes >> 1); - -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 3ca2139..2f0e94b 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) - } - } - -+/* Set the sk_pacing_rate to allow proper sizing of TSO packets. -+ * Note: TCP stack does not yet implement pacing. -+ * FQ packet scheduler can be used to implement cheap but effective -+ * TCP pacing, to smooth the burst on large writes when packets -+ * in flight is significantly lower than cwnd (or rwin) -+ */ -+static void tcp_update_pacing_rate(struct sock *sk) -+{ -+ const struct tcp_sock *tp = tcp_sk(sk); -+ u64 rate; -+ -+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ -+ rate = (u64)tp->mss_cache * 2 * (HZ << 3); -+ -+ rate *= max(tp->snd_cwnd, tp->packets_out); -+ -+ /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), -+ * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) -+ * We probably need usec resolution in the future. -+ * Note: This also takes care of possible srtt=0 case, -+ * when tcp_rtt_estimator() was not yet called. -+ */ -+ if (tp->srtt > 8 + 2) -+ do_div(rate, tp->srtt); -+ -+ sk->sk_pacing_rate = min_t(u64, rate, ~0U); -+} -+ - /* Calculate rto without backoff. This is the second half of Van Jacobson's - * routine referred to above. - */ -@@ -3269,7 +3297,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - u32 ack_seq = TCP_SKB_CB(skb)->seq; - u32 ack = TCP_SKB_CB(skb)->ack_seq; - bool is_dupack = false; -- u32 prior_in_flight; -+ u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; - u32 prior_fackets; - int prior_packets = tp->packets_out; - const int prior_unsacked = tp->packets_out - tp->sacked_out; -@@ -3375,6 +3403,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) - - if (icsk->icsk_pending == ICSK_TIME_RETRANS) - tcp_schedule_loss_probe(sk); -+ if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) -+ tcp_update_pacing_rate(sk); - return 1; - - no_queue: -@@ -5671,6 +5701,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - } else - tcp_init_metrics(sk); - -+ tcp_update_pacing_rate(sk); -+ - /* Prevent spurious tcp_cwnd_restart() on first data packet */ - tp->lsndtime = tcp_time_stamp; - -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 170737a..7b263c3 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -1628,7 +1628,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) - - /* If a full-sized TSO skb can be sent, do it. */ - if (limit >= min_t(unsigned int, sk->sk_gso_max_size, -- sk->sk_gso_max_segs * tp->mss_cache)) -+ tp->xmit_size_goal_segs * tp->mss_cache)) - goto send_now; - - /* Middle in queue won't get any more data, full sendable already? */ --- -1.7.11.7 - - -From 1b6c7d9979e1db1d42bd0545452a9d204c019582 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Fri, 27 Sep 2013 03:28:54 -0700 -Subject: [PATCH 02/47] tcp: TSQ can use a dynamic limit - -[ Upstream commit c9eeec26e32e087359160406f96e0949b3cc6f10 ] - -When TCP Small Queues was added, we used a sysctl to limit amount of -packets queues on Qdisc/device queues for a given TCP flow. - -Problem is this limit is either too big for low rates, or too small -for high rates. - -Now TCP stack has rate estimation in sk->sk_pacing_rate, and TSO -auto sizing, it can better control number of packets in Qdisc/device -queues. - -New limit is two packets or at least 1 to 2 ms worth of packets. - -Low rates flows benefit from this patch by having even smaller -number of packets in queues, allowing for faster recovery, -better RTT estimations. - -High rates flows benefit from this patch by allowing more than 2 packets -in flight as we had reports this was a limiting factor to reach line -rate. [ In particular if TX completion is delayed because of coalescing -parameters ] - -Example for a single flow on 10Gbp link controlled by FQ/pacing - -14 packets in flight instead of 2 - -$ tc -s -d qd -qdisc fq 8001: dev eth0 root refcnt 32 limit 10000p flow_limit 100p -buckets 1024 quantum 3028 initial_quantum 15140 - Sent 1168459366606 bytes 771822841 pkt (dropped 0, overlimits 0 -requeues 6822476) - rate 9346Mbit 771713pps backlog 953820b 14p requeues 6822476 - 2047 flow, 2046 inactive, 1 throttled, delay 15673 ns - 2372 gc, 0 highprio, 0 retrans, 9739249 throttled, 0 flows_plimit - -Note that sk_pacing_rate is currently set to twice the actual rate, but -this might be refined in the future when a flow is in congestion -avoidance. - -Additional change : skb->destructor should be set to tcp_wfree(). - -A future patch (for linux 3.13+) might remove tcp_limit_output_bytes - -Signed-off-by: Eric Dumazet -Cc: Wei Liu -Cc: Cong Wang -Cc: Yuchung Cheng -Cc: Neal Cardwell -Acked-by: Neal Cardwell -Signed-off-by: David S. Miller ---- - net/ipv4/tcp_output.c | 17 +++++++++++------ - 1 file changed, 11 insertions(+), 6 deletions(-) - -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index 7b263c3..fe897ed 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -892,8 +892,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, - - skb_orphan(skb); - skb->sk = sk; -- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? -- tcp_wfree : sock_wfree; -+ skb->destructor = tcp_wfree; - atomic_add(skb->truesize, &sk->sk_wmem_alloc); - - /* Build TCP header and checksum it. */ -@@ -1837,7 +1836,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - while ((skb = tcp_send_head(sk))) { - unsigned int limit; - -- - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); - BUG_ON(!tso_segs); - -@@ -1866,13 +1864,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, - break; - } - -- /* TSQ : sk_wmem_alloc accounts skb truesize, -- * including skb overhead. But thats OK. -+ /* TCP Small Queues : -+ * Control number of packets in qdisc/devices to two packets / or ~1 ms. -+ * This allows for : -+ * - better RTT estimation and ACK scheduling -+ * - faster recovery -+ * - high rates - */ -- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { -+ limit = max(skb->truesize, sk->sk_pacing_rate >> 10); -+ -+ if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tp->tsq_flags); - break; - } -+ - limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) - limit = tcp_mss_split_point(sk, skb, mss_now, --- -1.7.11.7 - - -From 4f25abff83e2780265eaa17d437b7659ea543bd5 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Tue, 15 Oct 2013 11:54:30 -0700 -Subject: [PATCH 03/47] tcp: must unclone packets before mangling them - -[ Upstream commit c52e2421f7368fd36cbe330d2cf41b10452e39a9 ] - -TCP stack should make sure it owns skbs before mangling them. - -We had various crashes using bnx2x, and it turned out gso_size -was cleared right before bnx2x driver was populating TC descriptor -of the _previous_ packet send. TCP stack can sometime retransmit -packets that are still in Qdisc. - -Of course we could make bnx2x driver more robust (using -ACCESS_ONCE(shinfo->gso_size) for example), but the bug is TCP stack. - -We have identified two points where skb_unclone() was needed. - -This patch adds a WARN_ON_ONCE() to warn us if we missed another -fix of this kind. - -Kudos to Neal for finding the root cause of this bug. Its visible -using small MSS. - -Signed-off-by: Eric Dumazet -Signed-off-by: Neal Cardwell -Cc: Yuchung Cheng -Signed-off-by: David S. Miller ---- - net/ipv4/tcp_output.c | 9 ++++++--- - 1 file changed, 6 insertions(+), 3 deletions(-) - -diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c -index fe897ed..28c0d6a 100644 ---- a/net/ipv4/tcp_output.c -+++ b/net/ipv4/tcp_output.c -@@ -981,6 +981,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) - static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, - unsigned int mss_now) - { -+ /* Make sure we own this skb before messing gso_size/gso_segs */ -+ WARN_ON_ONCE(skb_cloned(skb)); -+ - if (skb->len <= mss_now || !sk_can_gso(sk) || - skb->ip_summed == CHECKSUM_NONE) { - /* Avoid the costly divide in the normal -@@ -1062,9 +1065,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, - if (nsize < 0) - nsize = 0; - -- if (skb_cloned(skb) && -- skb_is_nonlinear(skb) && -- pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) -+ if (skb_unclone(skb, GFP_ATOMIC)) - return -ENOMEM; - - /* Get a new skb... force flag on. */ -@@ -2339,6 +2340,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) - int oldpcount = tcp_skb_pcount(skb); - - if (unlikely(oldpcount > 1)) { -+ if (skb_unclone(skb, GFP_ATOMIC)) -+ return -ENOMEM; - tcp_init_tso_segs(sk, skb, cur_mss); - tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); - } --- -1.7.11.7 - - -From 8731e25f7527ca851045eb0715d998d1ac07aadb Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Fri, 4 Oct 2013 10:31:41 -0700 -Subject: [PATCH 04/47] tcp: do not forget FIN in tcp_shifted_skb() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 5e8a402f831dbe7ee831340a91439e46f0d38acd ] - -Yuchung found following problem : - - There are bugs in the SACK processing code, merging part in - tcp_shift_skb_data(), that incorrectly resets or ignores the sacked - skbs FIN flag. When a receiver first SACK the FIN sequence, and later - throw away ofo queue (e.g., sack-reneging), the sender will stop - retransmitting the FIN flag, and hangs forever. - -Following packetdrill test can be used to reproduce the bug. - -$ cat sack-merge-bug.pkt -`sysctl -q net.ipv4.tcp_fack=0` - -// Establish a connection and send 10 MSS. -0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 -+.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 -+.000 bind(3, ..., ...) = 0 -+.000 listen(3, 1) = 0 - -+.050 < S 0:0(0) win 32792 -+.000 > S. 0:0(0) ack 1 -+.001 < . 1:1(0) ack 1 win 1024 -+.000 accept(3, ..., ...) = 4 - -+.100 write(4, ..., 12000) = 12000 -+.000 shutdown(4, SHUT_WR) = 0 -+.000 > . 1:10001(10000) ack 1 -+.050 < . 1:1(0) ack 2001 win 257 -+.000 > FP. 10001:12001(2000) ack 1 -+.050 < . 1:1(0) ack 2001 win 257 -+.050 < . 1:1(0) ack 2001 win 257 -// SACK reneg -+.050 < . 1:1(0) ack 12001 win 257 -+0 %{ print "unacked: ",tcpi_unacked }% -+5 %{ print "" }% - -First, a typo inverted left/right of one OR operation, then -code forgot to advance end_seq if the merged skb carried FIN. - -Bug was added in 2.6.29 by commit 832d11c5cd076ab -("tcp: Try to restore large SKBs while SACK processing") - -Signed-off-by: Eric Dumazet -Signed-off-by: Yuchung Cheng -Acked-by: Neal Cardwell -Cc: Ilpo Järvinen -Acked-by: Ilpo Järvinen -Signed-off-by: David S. Miller ---- - net/ipv4/tcp_input.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 2f0e94b..61e2360 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -1279,7 +1279,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, - tp->lost_cnt_hint -= tcp_skb_pcount(prev); - } - -- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; -+ TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; -+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) -+ TCP_SKB_CB(prev)->end_seq++; -+ - if (skb == tcp_highest_sack(sk)) - tcp_advance_highest_sack(sk, skb); - --- -1.7.11.7 - - -From bfc0a00d669a4fa0835c417f01c50c18996d1e60 Mon Sep 17 00:00:00 2001 -From: Yuchung Cheng -Date: Sat, 12 Oct 2013 10:16:27 -0700 -Subject: [PATCH 05/47] tcp: fix incorrect ca_state in tail loss probe - -[ Upstream commit 031afe4990a7c9dbff41a3a742c44d3e740ea0a1 ] - -On receiving an ACK that covers the loss probe sequence, TLP -immediately sets the congestion state to Open, even though some packets -are not recovered and retransmisssion are on the way. The later ACks -may trigger a WARN_ON check in step D of tcp_fastretrans_alert(), e.g., -https://bugzilla.redhat.com/show_bug.cgi?id=989251 - -The fix is to follow the similar procedure in recovery by calling -tcp_try_keep_open(). The sender switches to Open state if no packets -are retransmissted. Otherwise it goes to Disorder and let subsequent -ACKs move the state to Recovery or Open. - -Reported-By: Michael Sterrett -Tested-By: Dormando -Signed-off-by: Yuchung Cheng -Acked-by: Neal Cardwell -Signed-off-by: David S. Miller ---- - net/ipv4/tcp_input.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index 61e2360..723951a 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -3284,7 +3284,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) - tcp_init_cwnd_reduction(sk, true); - tcp_set_ca_state(sk, TCP_CA_CWR); - tcp_end_cwnd_reduction(sk); -- tcp_set_ca_state(sk, TCP_CA_Open); -+ tcp_try_keep_open(sk); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPLOSSPROBERECOVERY); - } --- -1.7.11.7 - - -From 05c9fdfad860abd64136d8ccd88dbf84e40bd5f5 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Tue, 1 Oct 2013 21:04:11 -0700 -Subject: [PATCH 06/47] net: do not call sock_put() on TIMEWAIT sockets - -[ Upstream commit 80ad1d61e72d626e30ebe8529a0455e660ca4693 ] - -commit 3ab5aee7fe84 ("net: Convert TCP & DCCP hash tables to use RCU / -hlist_nulls") incorrectly used sock_put() on TIMEWAIT sockets. - -We should instead use inet_twsk_put() - -Signed-off-by: Eric Dumazet -Signed-off-by: David S. Miller ---- - net/ipv4/inet_hashtables.c | 2 +- - net/ipv6/inet6_hashtables.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c -index 7bd8983..96da9c7 100644 ---- a/net/ipv4/inet_hashtables.c -+++ b/net/ipv4/inet_hashtables.c -@@ -287,7 +287,7 @@ begintw: - if (unlikely(!INET_TW_MATCH(sk, net, acookie, - saddr, daddr, ports, - dif))) { -- sock_put(sk); -+ inet_twsk_put(inet_twsk(sk)); - goto begintw; - } - goto out; -diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c -index 32b4a16..066640e 100644 ---- a/net/ipv6/inet6_hashtables.c -+++ b/net/ipv6/inet6_hashtables.c -@@ -116,7 +116,7 @@ begintw: - } - if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, - ports, dif))) { -- sock_put(sk); -+ inet_twsk_put(inet_twsk(sk)); - goto begintw; - } - goto out; --- -1.7.11.7 - - -From bc7fd34d31c17b0e4c100013e77277a2ed7e15cf Mon Sep 17 00:00:00 2001 -From: Matthias Schiffer -Date: Fri, 27 Sep 2013 18:03:39 +0200 -Subject: [PATCH 07/47] batman-adv: set up network coding packet handlers - during module init - -[ Upstream commit 6c519bad7b19a2c14a075b400edabaa630330123 ] - -batman-adv saves its table of packet handlers as a global state, so handlers -must be set up only once (and setting them up a second time will fail). - -The recently-added network coding support tries to set up its handler each time -a new softif is registered, which obviously fails when more that one softif is -used (and in consequence, the softif creation fails). - -Fix this by splitting up batadv_nc_init into batadv_nc_init (which is called -only once) and batadv_nc_mesh_init (which is called for each softif); in -addition batadv_nc_free is renamed to batadv_nc_mesh_free to keep naming -consistent. - -Signed-off-by: Matthias Schiffer -Signed-off-by: Marek Lindner -Signed-off-by: Antonio Quartulli ---- - net/batman-adv/main.c | 5 +++-- - net/batman-adv/network-coding.c | 28 ++++++++++++++++++---------- - net/batman-adv/network-coding.h | 14 ++++++++++---- - 3 files changed, 31 insertions(+), 16 deletions(-) - -diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c -index 08125f3..c8e0671 100644 ---- a/net/batman-adv/main.c -+++ b/net/batman-adv/main.c -@@ -61,6 +61,7 @@ static int __init batadv_init(void) - batadv_recv_handler_init(); - - batadv_iv_init(); -+ batadv_nc_init(); - - batadv_event_workqueue = create_singlethread_workqueue("bat_events"); - -@@ -138,7 +139,7 @@ int batadv_mesh_init(struct net_device *soft_iface) - if (ret < 0) - goto err; - -- ret = batadv_nc_init(bat_priv); -+ ret = batadv_nc_mesh_init(bat_priv); - if (ret < 0) - goto err; - -@@ -163,7 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface) - batadv_vis_quit(bat_priv); - - batadv_gw_node_purge(bat_priv); -- batadv_nc_free(bat_priv); -+ batadv_nc_mesh_free(bat_priv); - batadv_dat_free(bat_priv); - batadv_bla_free(bat_priv); - -diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c -index a487d46..4ecc0b6 100644 ---- a/net/batman-adv/network-coding.c -+++ b/net/batman-adv/network-coding.c -@@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, - struct batadv_hard_iface *recv_if); - - /** -+ * batadv_nc_init - one-time initialization for network coding -+ */ -+int __init batadv_nc_init(void) -+{ -+ int ret; -+ -+ /* Register our packet type */ -+ ret = batadv_recv_handler_register(BATADV_CODED, -+ batadv_nc_recv_coded_packet); -+ -+ return ret; -+} -+ -+/** - * batadv_nc_start_timer - initialise the nc periodic worker - * @bat_priv: the bat priv with all the soft interface information - */ -@@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) - } - - /** -- * batadv_nc_init - initialise coding hash table and start house keeping -+ * batadv_nc_mesh_init - initialise coding hash table and start house keeping - * @bat_priv: the bat priv with all the soft interface information - */ --int batadv_nc_init(struct batadv_priv *bat_priv) -+int batadv_nc_mesh_init(struct batadv_priv *bat_priv) - { - bat_priv->nc.timestamp_fwd_flush = jiffies; - bat_priv->nc.timestamp_sniffed_purge = jiffies; -@@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv) - batadv_hash_set_lock_class(bat_priv->nc.coding_hash, - &batadv_nc_decoding_hash_lock_class_key); - -- /* Register our packet type */ -- if (batadv_recv_handler_register(BATADV_CODED, -- batadv_nc_recv_coded_packet) < 0) -- goto err; -- - INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); - batadv_nc_start_timer(bat_priv); - -@@ -1721,12 +1730,11 @@ free_nc_packet: - } - - /** -- * batadv_nc_free - clean up network coding memory -+ * batadv_nc_mesh_free - clean up network coding memory - * @bat_priv: the bat priv with all the soft interface information - */ --void batadv_nc_free(struct batadv_priv *bat_priv) -+void batadv_nc_mesh_free(struct batadv_priv *bat_priv) - { -- batadv_recv_handler_unregister(BATADV_CODED); - cancel_delayed_work_sync(&bat_priv->nc.work); - - batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); -diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h -index 85a4ec8..ddfa618 100644 ---- a/net/batman-adv/network-coding.h -+++ b/net/batman-adv/network-coding.h -@@ -22,8 +22,9 @@ - - #ifdef CONFIG_BATMAN_ADV_NC - --int batadv_nc_init(struct batadv_priv *bat_priv); --void batadv_nc_free(struct batadv_priv *bat_priv); -+int batadv_nc_init(void); -+int batadv_nc_mesh_init(struct batadv_priv *bat_priv); -+void batadv_nc_mesh_free(struct batadv_priv *bat_priv); - void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh_node, -@@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); - - #else /* ifdef CONFIG_BATMAN_ADV_NC */ - --static inline int batadv_nc_init(struct batadv_priv *bat_priv) -+static inline int batadv_nc_init(void) - { - return 0; - } - --static inline void batadv_nc_free(struct batadv_priv *bat_priv) -+static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) -+{ -+ return 0; -+} -+ -+static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) - { - return; - } --- -1.7.11.7 - - -From 8be4005ed947924104df5850944a20b7f6570137 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Fran=C3=A7ois=20Cachereul?= -Date: Wed, 2 Oct 2013 10:16:02 +0200 -Subject: [PATCH 08/47] l2tp: fix kernel panic when using IPv4-mapped IPv6 - addresses -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit e18503f41f9b12132c95d7c31ca6ee5155e44e5c ] - -IPv4 mapped addresses cause kernel panic. -The patch juste check whether the IPv6 address is an IPv4 mapped -address. If so, use IPv4 API instead of IPv6. - -[ 940.026915] general protection fault: 0000 [#1] -[ 940.026915] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppox ppp_generic slhc loop psmouse -[ 940.026915] CPU: 0 PID: 3184 Comm: memcheck-amd64- Not tainted 3.11.0+ #1 -[ 940.026915] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 -[ 940.026915] task: ffff880007130e20 ti: ffff88000737e000 task.ti: ffff88000737e000 -[ 940.026915] RIP: 0010:[] [] ip6_xmit+0x276/0x326 -[ 940.026915] RSP: 0018:ffff88000737fd28 EFLAGS: 00010286 -[ 940.026915] RAX: c748521a75ceff48 RBX: ffff880000c30800 RCX: 0000000000000000 -[ 940.026915] RDX: ffff88000075cc4e RSI: 0000000000000028 RDI: ffff8800060e5a40 -[ 940.026915] RBP: ffff8800060e5a40 R08: 0000000000000000 R09: ffff88000075cc90 -[ 940.026915] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88000737fda0 -[ 940.026915] R13: 0000000000000000 R14: 0000000000002000 R15: ffff880005d3b580 -[ 940.026915] FS: 00007f163dc5e800(0000) GS:ffffffff81623000(0000) knlGS:0000000000000000 -[ 940.026915] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 940.026915] CR2: 00000004032dc940 CR3: 0000000005c25000 CR4: 00000000000006f0 -[ 940.026915] Stack: -[ 940.026915] ffff88000075cc4e ffffffff81694e90 ffff880000c30b38 0000000000000020 -[ 940.026915] 11000000523c4bac ffff88000737fdb4 0000000000000000 ffff880000c30800 -[ 940.026915] ffff880005d3b580 ffff880000c30b38 ffff8800060e5a40 0000000000000020 -[ 940.026915] Call Trace: -[ 940.026915] [] ? inet6_csk_xmit+0xa4/0xc4 -[ 940.026915] [] ? l2tp_xmit_skb+0x503/0x55a [l2tp_core] -[ 940.026915] [] ? pskb_expand_head+0x161/0x214 -[ 940.026915] [] ? pppol2tp_xmit+0xf2/0x143 [l2tp_ppp] -[ 940.026915] [] ? ppp_channel_push+0x36/0x8b [ppp_generic] -[ 940.026915] [] ? ppp_write+0xaf/0xc5 [ppp_generic] -[ 940.026915] [] ? vfs_write+0xa2/0x106 -[ 940.026915] [] ? SyS_write+0x56/0x8a -[ 940.026915] [] ? system_call_fastpath+0x16/0x1b -[ 940.026915] Code: 00 49 8b 8f d8 00 00 00 66 83 7c 11 02 00 74 60 49 -8b 47 58 48 83 e0 fe 48 8b 80 18 01 00 00 48 85 c0 74 13 48 8b 80 78 02 -00 00 <48> ff 40 28 41 8b 57 68 48 01 50 30 48 8b 54 24 08 49 c7 c1 51 -[ 940.026915] RIP [] ip6_xmit+0x276/0x326 -[ 940.026915] RSP -[ 940.057945] ---[ end trace be8aba9a61c8b7f3 ]--- -[ 940.058583] Kernel panic - not syncing: Fatal exception in interrupt - -Signed-off-by: François CACHEREUL -Signed-off-by: David S. Miller ---- - net/l2tp/l2tp_core.c | 27 +++++++++++++++++++++++---- - net/l2tp/l2tp_core.h | 3 +++ - 2 files changed, 26 insertions(+), 4 deletions(-) - -diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c -index feae495..aedaa2c 100644 ---- a/net/l2tp/l2tp_core.c -+++ b/net/l2tp/l2tp_core.c -@@ -496,6 +496,7 @@ out: - static inline int l2tp_verify_udp_checksum(struct sock *sk, - struct sk_buff *skb) - { -+ struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; - struct udphdr *uh = udp_hdr(skb); - u16 ulen = ntohs(uh->len); - __wsum psum; -@@ -504,7 +505,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, - return 0; - - #if IS_ENABLED(CONFIG_IPV6) -- if (sk->sk_family == PF_INET6) { -+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { - if (!uh->check) { - LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); - return 1; -@@ -1128,7 +1129,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, - /* Queue the packet to IP for output */ - skb->local_df = 1; - #if IS_ENABLED(CONFIG_IPV6) -- if (skb->sk->sk_family == PF_INET6) -+ if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) - error = inet6_csk_xmit(skb, NULL); - else - #endif -@@ -1255,7 +1256,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len - - /* Calculate UDP checksum if configured to do so */ - #if IS_ENABLED(CONFIG_IPV6) -- if (sk->sk_family == PF_INET6) -+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) - l2tp_xmit_ipv6_csum(sk, skb, udp_len); - else - #endif -@@ -1704,6 +1705,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 - if (cfg != NULL) - tunnel->debug = cfg->debug; - -+#if IS_ENABLED(CONFIG_IPV6) -+ if (sk->sk_family == PF_INET6) { -+ struct ipv6_pinfo *np = inet6_sk(sk); -+ -+ if (ipv6_addr_v4mapped(&np->saddr) && -+ ipv6_addr_v4mapped(&np->daddr)) { -+ struct inet_sock *inet = inet_sk(sk); -+ -+ tunnel->v4mapped = true; -+ inet->inet_saddr = np->saddr.s6_addr32[3]; -+ inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; -+ inet->inet_daddr = np->daddr.s6_addr32[3]; -+ } else { -+ tunnel->v4mapped = false; -+ } -+ } -+#endif -+ - /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ - tunnel->encap = encap; - if (encap == L2TP_ENCAPTYPE_UDP) { -@@ -1712,7 +1731,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 - udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; - udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; - #if IS_ENABLED(CONFIG_IPV6) -- if (sk->sk_family == PF_INET6) -+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) - udpv6_encap_enable(); - else - #endif -diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h -index 66a559b..6f251cb 100644 ---- a/net/l2tp/l2tp_core.h -+++ b/net/l2tp/l2tp_core.h -@@ -194,6 +194,9 @@ struct l2tp_tunnel { - struct sock *sock; /* Parent socket */ - int fd; /* Parent fd, if tunnel socket - * was created by userspace */ -+#if IS_ENABLED(CONFIG_IPV6) -+ bool v4mapped; -+#endif - - struct work_struct del_work; - --- -1.7.11.7 - - -From 0ec2b01190b1a2ba020241ab89730bf7e7d77b9c Mon Sep 17 00:00:00 2001 -From: "David S. Miller" -Date: Tue, 8 Oct 2013 15:44:26 -0400 -Subject: [PATCH 09/47] l2tp: Fix build warning with ipv6 disabled. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 8d8a51e26a6d415e1470759f2cf5f3ee3ee86196 ] - -net/l2tp/l2tp_core.c: In function ‘l2tp_verify_udp_checksum’: -net/l2tp/l2tp_core.c:499:22: warning: unused variable ‘tunnel’ [-Wunused-variable] - -Create a helper "l2tp_tunnel()" to facilitate this, and as a side -effect get rid of a bunch of unnecessary void pointer casts. - -Signed-off-by: David S. Miller ---- - net/l2tp/l2tp_core.c | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c -index aedaa2c..b076e83 100644 ---- a/net/l2tp/l2tp_core.c -+++ b/net/l2tp/l2tp_core.c -@@ -115,6 +115,11 @@ struct l2tp_net { - static void l2tp_session_set_header_len(struct l2tp_session *session, int version); - static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); - -+static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) -+{ -+ return sk->sk_user_data; -+} -+ - static inline struct l2tp_net *l2tp_pernet(struct net *net) - { - BUG_ON(!net); -@@ -496,7 +501,6 @@ out: - static inline int l2tp_verify_udp_checksum(struct sock *sk, - struct sk_buff *skb) - { -- struct l2tp_tunnel *tunnel = (struct l2tp_tunnel *)sk->sk_user_data; - struct udphdr *uh = udp_hdr(skb); - u16 ulen = ntohs(uh->len); - __wsum psum; -@@ -505,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, - return 0; - - #if IS_ENABLED(CONFIG_IPV6) -- if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) { -+ if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { - if (!uh->check) { - LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); - return 1; -@@ -1305,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); - */ - static void l2tp_tunnel_destruct(struct sock *sk) - { -- struct l2tp_tunnel *tunnel; -+ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); - struct l2tp_net *pn; - -- tunnel = sk->sk_user_data; - if (tunnel == NULL) - goto end; - -@@ -1676,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 - } - - /* Check if this socket has already been prepped */ -- tunnel = (struct l2tp_tunnel *)sk->sk_user_data; -+ tunnel = l2tp_tunnel(sk); - if (tunnel != NULL) { - /* This socket has already been prepped */ - err = -EBUSY; --- -1.7.11.7 - - -From 35e64a9e465a85ffacd373439c1caa757e407656 Mon Sep 17 00:00:00 2001 -From: Sebastian Hesselbarth -Date: Wed, 2 Oct 2013 12:57:20 +0200 -Subject: [PATCH 10/47] net: mv643xx_eth: update statistics timer from timer - context only - -[ Upstream commit 041b4ddb84989f06ff1df0ca869b950f1ee3cb1c ] - -Each port driver installs a periodic timer to update port statistics -by calling mib_counters_update. As mib_counters_update is also called -from non-timer context, we should not reschedule the timer there but -rather move it to timer-only context. - -Signed-off-by: Sebastian Hesselbarth -Acked-by: Jason Cooper -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/marvell/mv643xx_eth.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c -index c35db73..51c138b 100644 ---- a/drivers/net/ethernet/marvell/mv643xx_eth.c -+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c -@@ -1131,15 +1131,13 @@ static void mib_counters_update(struct mv643xx_eth_private *mp) - p->rx_discard += rdlp(mp, RX_DISCARD_FRAME_CNT); - p->rx_overrun += rdlp(mp, RX_OVERRUN_FRAME_CNT); - spin_unlock_bh(&mp->mib_counters_lock); -- -- mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); - } - - static void mib_counters_timer_wrapper(unsigned long _mp) - { - struct mv643xx_eth_private *mp = (void *)_mp; -- - mib_counters_update(mp); -+ mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); - } - - --- -1.7.11.7 - - -From b6b20d9c54b23ba35c5807e45ff7d9579503bffa Mon Sep 17 00:00:00 2001 -From: Sebastian Hesselbarth -Date: Wed, 2 Oct 2013 12:57:21 +0200 -Subject: [PATCH 11/47] net: mv643xx_eth: fix orphaned statistics timer crash - -[ Upstream commit f564412c935111c583b787bcc18157377b208e2e ] - -The periodic statistics timer gets started at port _probe() time, but -is stopped on _stop() only. In a modular environment, this can cause -the timer to access already deallocated memory, if the module is unloaded -without starting the eth device. To fix this, we add the timer right -before the port is started, instead of at _probe() time. - -Signed-off-by: Sebastian Hesselbarth -Acked-by: Jason Cooper -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/marvell/mv643xx_eth.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c -index 51c138b..39334d4 100644 ---- a/drivers/net/ethernet/marvell/mv643xx_eth.c -+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c -@@ -2235,6 +2235,7 @@ static int mv643xx_eth_open(struct net_device *dev) - mp->int_mask |= INT_TX_END_0 << i; - } - -+ add_timer(&mp->mib_counters_timer); - port_start(mp); - - wrlp(mp, INT_MASK_EXT, INT_EXT_LINK_PHY | INT_EXT_TX); -@@ -2914,7 +2915,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev) - mp->mib_counters_timer.data = (unsigned long)mp; - mp->mib_counters_timer.function = mib_counters_timer_wrapper; - mp->mib_counters_timer.expires = jiffies + 30 * HZ; -- add_timer(&mp->mib_counters_timer); - - spin_lock_init(&mp->mib_counters_lock); - --- -1.7.11.7 - - -From b8baf1c21a214c1b836eef390c9d6e153293fef9 Mon Sep 17 00:00:00 2001 -From: Dan Carpenter -Date: Thu, 3 Oct 2013 00:27:20 +0300 -Subject: [PATCH 12/47] net: heap overflow in __audit_sockaddr() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 1661bf364ae9c506bc8795fef70d1532931be1e8 ] - -We need to cap ->msg_namelen or it leads to a buffer overflow when we -to the memcpy() in __audit_sockaddr(). It requires CAP_AUDIT_CONTROL to -exploit this bug. - -The call tree is: -___sys_recvmsg() - move_addr_to_user() - audit_sockaddr() - __audit_sockaddr() - -Reported-by: Jüri Aedla -Signed-off-by: Dan Carpenter -Signed-off-by: David S. Miller ---- - net/compat.c | 2 ++ - net/socket.c | 24 ++++++++++++++++++++---- - 2 files changed, 22 insertions(+), 4 deletions(-) - -diff --git a/net/compat.c b/net/compat.c -index f0a1ba6..8903258 100644 ---- a/net/compat.c -+++ b/net/compat.c -@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) - __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || - __get_user(kmsg->msg_flags, &umsg->msg_flags)) - return -EFAULT; -+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) -+ return -EINVAL; - kmsg->msg_name = compat_ptr(tmp1); - kmsg->msg_iov = compat_ptr(tmp2); - kmsg->msg_control = compat_ptr(tmp3); -diff --git a/net/socket.c b/net/socket.c -index b2d7c62..4b94643 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -1973,6 +1973,16 @@ struct used_address { - unsigned int name_len; - }; - -+static int copy_msghdr_from_user(struct msghdr *kmsg, -+ struct msghdr __user *umsg) -+{ -+ if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) -+ return -EFAULT; -+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) -+ return -EINVAL; -+ return 0; -+} -+ - static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned int flags, - struct used_address *used_address) -@@ -1991,8 +2001,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, - if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(msg_sys, msg_compat)) - return -EFAULT; -- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) -- return -EFAULT; -+ } else { -+ err = copy_msghdr_from_user(msg_sys, msg); -+ if (err) -+ return err; -+ } - - if (msg_sys->msg_iovlen > UIO_FASTIOV) { - err = -EMSGSIZE; -@@ -2200,8 +2213,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, - if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(msg_sys, msg_compat)) - return -EFAULT; -- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) -- return -EFAULT; -+ } else { -+ err = copy_msghdr_from_user(msg_sys, msg); -+ if (err) -+ return err; -+ } - - if (msg_sys->msg_iovlen > UIO_FASTIOV) { - err = -EMSGSIZE; --- -1.7.11.7 - - -From 6e24497ef79e18f5b1ddce66712d55093a6cf3e9 Mon Sep 17 00:00:00 2001 -From: Willem de Bruijn -Date: Tue, 22 Oct 2013 10:59:18 -0400 -Subject: [PATCH 13/47] sit: amend "allow to use rtnl ops on fb tunnel" - -Amend backport to 3.11.y of - - [ Upstream commit 205983c43700ac3a81e7625273a3fa83cd2759b5 ] - -The discussion thread in the upstream commit mentions that in -backports to stable-* branches, the line - - - unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); - -must be omitted if that branch does not have commit 5e6700b3bf98 -("sit: add support of x-netns"). This line has correctly been omitted -in the backport to 3.10, which indeed does not have that commit. - -It was also removed in the backport to 3.11.y, which does have that -commit. - -This causes the following steps to hit a BUG at net/core/dev.c:5039: - - `modprobe sit; rmmod sit` - -The bug demonstrates that it causes a device to be unregistered twice. -The simple fix is to apply the one line in the upstream commit that -was dropped in the backport to 3.11 (3783100374653e2e7fbdf68c710f5). -This brings the logic in line with upstream linux, net and net-next -branches. - -Signed-off-by: Willem de Bruijn -Acked-by: Nicolas Dichtel -Reviewed-by: Veaceslav Falico ---- - net/ipv6/sit.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c -index 86f639b..a51ad07 100644 ---- a/net/ipv6/sit.c -+++ b/net/ipv6/sit.c -@@ -1708,7 +1708,6 @@ static void __net_exit sit_exit_net(struct net *net) - - rtnl_lock(); - sit_destroy_tunnels(sitn, &list); -- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); - } --- -1.7.11.7 - - -From 6c7e3c3382670fe98debedf2ddaff8abf2944bb4 Mon Sep 17 00:00:00 2001 -From: Mathias Krause -Date: Mon, 30 Sep 2013 22:03:06 +0200 -Subject: [PATCH 14/47] proc connector: fix info leaks - -[ Upstream commit e727ca82e0e9616ab4844301e6bae60ca7327682 ] - -Initialize event_data for all possible message types to prevent leaking -kernel stack contents to userland (up to 20 bytes). Also set the flags -member of the connector message to 0 to prevent leaking two more stack -bytes this way. - -Cc: stable@vger.kernel.org # v2.6.15+ -Signed-off-by: Mathias Krause -Signed-off-by: David S. Miller ---- - drivers/connector/cn_proc.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c -index 08ae128..c73fc2b 100644 ---- a/drivers/connector/cn_proc.c -+++ b/drivers/connector/cn_proc.c -@@ -65,6 +65,7 @@ void proc_fork_connector(struct task_struct *task) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -80,6 +81,7 @@ void proc_fork_connector(struct task_struct *task) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - /* If cn_netlink_send() failed, the data is not sent */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } -@@ -96,6 +98,7 @@ void proc_exec_connector(struct task_struct *task) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -106,6 +109,7 @@ void proc_exec_connector(struct task_struct *task) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -122,6 +126,7 @@ void proc_id_connector(struct task_struct *task, int which_id) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - ev->what = which_id; - ev->event_data.id.process_pid = task->pid; - ev->event_data.id.process_tgid = task->tgid; -@@ -145,6 +150,7 @@ void proc_id_connector(struct task_struct *task, int which_id) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -160,6 +166,7 @@ void proc_sid_connector(struct task_struct *task) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -170,6 +177,7 @@ void proc_sid_connector(struct task_struct *task) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -185,6 +193,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -203,6 +212,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -218,6 +228,7 @@ void proc_comm_connector(struct task_struct *task) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -229,6 +240,7 @@ void proc_comm_connector(struct task_struct *task) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -244,6 +256,7 @@ void proc_coredump_connector(struct task_struct *task) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -254,6 +267,7 @@ void proc_coredump_connector(struct task_struct *task) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -269,6 +283,7 @@ void proc_exit_connector(struct task_struct *task) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -281,6 +296,7 @@ void proc_exit_connector(struct task_struct *task) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = 0; /* not used */ - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - -@@ -304,6 +320,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) - - msg = (struct cn_msg *)buffer; - ev = (struct proc_event *)msg->data; -+ memset(&ev->event_data, 0, sizeof(ev->event_data)); - msg->seq = rcvd_seq; - ktime_get_ts(&ts); /* get high res monotonic timestamp */ - put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); -@@ -313,6 +330,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) - memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); - msg->ack = rcvd_ack + 1; - msg->len = sizeof(*ev); -+ msg->flags = 0; /* not used */ - cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); - } - --- -1.7.11.7 - - -From f3d398e2465b3b74987a3a2fc42ea3e8c83d2166 Mon Sep 17 00:00:00 2001 -From: Jiri Benc -Date: Fri, 4 Oct 2013 17:04:48 +0200 -Subject: [PATCH 15/47] ipv4: fix ineffective source address selection - -[ Upstream commit 0a7e22609067ff524fc7bbd45c6951dd08561667 ] - -When sending out multicast messages, the source address in inet->mc_addr is -ignored and rewritten by an autoselected one. This is caused by a typo in -commit 813b3b5db831 ("ipv4: Use caller's on-stack flowi as-is in output -route lookups"). - -Signed-off-by: Jiri Benc -Acked-by: Eric Dumazet -Signed-off-by: David S. Miller ---- - net/ipv4/route.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/ipv4/route.c b/net/ipv4/route.c -index a9a54a2..2de16d9 100644 ---- a/net/ipv4/route.c -+++ b/net/ipv4/route.c -@@ -2074,7 +2074,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) - RT_SCOPE_LINK); - goto make_route; - } -- if (fl4->saddr) { -+ if (!fl4->saddr) { - if (ipv4_is_multicast(fl4->daddr)) - fl4->saddr = inet_select_addr(dev_out, 0, - fl4->flowi4_scope); --- -1.7.11.7 - - -From 8fd516716afeb4631cf790a2be7ca30d0a664b01 Mon Sep 17 00:00:00 2001 -From: Marc Kleine-Budde -Date: Sat, 5 Oct 2013 21:25:17 +0200 -Subject: [PATCH 16/47] can: dev: fix nlmsg size calculation in can_get_size() - -[ Upstream commit fe119a05f8ca481623a8d02efcc984332e612528 ] - -This patch fixes the calculation of the nlmsg size, by adding the missing -nla_total_size(). - -Signed-off-by: Marc Kleine-Budde -Signed-off-by: David S. Miller ---- - drivers/net/can/dev.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c -index f9cba41..1870c47 100644 ---- a/drivers/net/can/dev.c -+++ b/drivers/net/can/dev.c -@@ -705,14 +705,14 @@ static size_t can_get_size(const struct net_device *dev) - size_t size; - - size = nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ -- size += sizeof(struct can_ctrlmode); /* IFLA_CAN_CTRLMODE */ -+ size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ - size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ -- size += sizeof(struct can_bittiming); /* IFLA_CAN_BITTIMING */ -- size += sizeof(struct can_clock); /* IFLA_CAN_CLOCK */ -+ size += nla_total_size(sizeof(struct can_bittiming)); /* IFLA_CAN_BITTIMING */ -+ size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ - if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ -- size += sizeof(struct can_berr_counter); -+ size += nla_total_size(sizeof(struct can_berr_counter)); - if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ -- size += sizeof(struct can_bittiming_const); -+ size += nla_total_size(sizeof(struct can_bittiming_const)); - - return size; - } --- -1.7.11.7 - - -From 1b3231ca7e26084580145c904dd10a60cac35c63 Mon Sep 17 00:00:00 2001 -From: Fabio Estevam -Date: Sat, 5 Oct 2013 17:56:59 -0300 -Subject: [PATCH 17/47] net: secure_seq: Fix warning when CONFIG_IPV6 and - CONFIG_INET are not selected - -[ Upstream commit cb03db9d0e964568407fb08ea46cc2b6b7f67587 ] - -net_secret() is only used when CONFIG_IPV6 or CONFIG_INET are selected. - -Building a defconfig with both of these symbols unselected (Using the ARM -at91sam9rl_defconfig, for example) leads to the following build warning: - -$ make at91sam9rl_defconfig -# -# configuration written to .config -# - -$ make net/core/secure_seq.o -scripts/kconfig/conf --silentoldconfig Kconfig - CHK include/config/kernel.release - CHK include/generated/uapi/linux/version.h - CHK include/generated/utsrelease.h -make[1]: `include/generated/mach-types.h' is up to date. - CALL scripts/checksyscalls.sh - CC net/core/secure_seq.o -net/core/secure_seq.c:17:13: warning: 'net_secret_init' defined but not used [-Wunused-function] - -Fix this warning by protecting the definition of net_secret() with these -symbols. - -Reported-by: Olof Johansson -Signed-off-by: Fabio Estevam -Signed-off-by: David S. Miller ---- - net/core/secure_seq.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c -index 3f1ec15..8d9d05e 100644 ---- a/net/core/secure_seq.c -+++ b/net/core/secure_seq.c -@@ -10,6 +10,7 @@ - - #include - -+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) - #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) - - static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; -@@ -29,6 +30,7 @@ static void net_secret_init(void) - cmpxchg(&net_secret[--i], 0, tmp); - } - } -+#endif - - #ifdef CONFIG_INET - static u32 seq_scale(u32 seq) --- -1.7.11.7 - - -From 538680b534f30fe6531099f87267bb676c935351 Mon Sep 17 00:00:00 2001 -From: Paul Durrant -Date: Tue, 8 Oct 2013 14:56:44 +0100 -Subject: [PATCH 18/47] xen-netback: Don't destroy the netdev until the vif is - shut down - -[ upstream commit id: 279f438e36c0a70b23b86d2090aeec50155034a9 ] - -Without this patch, if a frontend cycles through states Closing -and Closed (which Windows frontends need to do) then the netdev -will be destroyed and requires re-invocation of hotplug scripts -to restore state before the frontend can move to Connected. Thus -when udev is not in use the backend gets stuck in InitWait. - -With this patch, the netdev is left alone whilst the backend is -still online and is only de-registered and freed just prior to -destroying the vif (which is also nicely symmetrical with the -netdev allocation and registration being done during probe) so -no re-invocation of hotplug scripts is required. - -Signed-off-by: Paul Durrant -Cc: David Vrabel -Cc: Wei Liu -Cc: Ian Campbell ---- - drivers/net/xen-netback/common.h | 1 + - drivers/net/xen-netback/interface.c | 23 +++++++++-------------- - drivers/net/xen-netback/xenbus.c | 17 ++++++++++++----- - 3 files changed, 22 insertions(+), 19 deletions(-) - -diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h -index 8a4d77e..4d9a5e7 100644 ---- a/drivers/net/xen-netback/common.h -+++ b/drivers/net/xen-netback/common.h -@@ -120,6 +120,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref, - unsigned long rx_ring_ref, unsigned int tx_evtchn, - unsigned int rx_evtchn); - void xenvif_disconnect(struct xenvif *vif); -+void xenvif_free(struct xenvif *vif); - - void xenvif_get(struct xenvif *vif); - void xenvif_put(struct xenvif *vif); -diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c -index 087d2db..73336c1 100644 ---- a/drivers/net/xen-netback/interface.c -+++ b/drivers/net/xen-netback/interface.c -@@ -326,6 +326,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, - } - - netdev_dbg(dev, "Successfully created xenvif\n"); -+ -+ __module_get(THIS_MODULE); -+ - return vif; - } - -@@ -413,12 +416,6 @@ void xenvif_carrier_off(struct xenvif *vif) - - void xenvif_disconnect(struct xenvif *vif) - { -- /* Disconnect funtion might get called by generic framework -- * even before vif connects, so we need to check if we really -- * need to do a module_put. -- */ -- int need_module_put = 0; -- - if (netif_carrier_ok(vif->dev)) - xenvif_carrier_off(vif); - -@@ -432,18 +429,16 @@ void xenvif_disconnect(struct xenvif *vif) - unbind_from_irqhandler(vif->tx_irq, vif); - unbind_from_irqhandler(vif->rx_irq, vif); - } -- /* vif->irq is valid, we had a module_get in -- * xenvif_connect. -- */ -- need_module_put = 1; - } - -- unregister_netdev(vif->dev); -- - xen_netbk_unmap_frontend_rings(vif); -+} -+ -+void xenvif_free(struct xenvif *vif) -+{ -+ unregister_netdev(vif->dev); - - free_netdev(vif->dev); - -- if (need_module_put) -- module_put(THIS_MODULE); -+ module_put(THIS_MODULE); - } -diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c -index 1fe48fe3..a53782e 100644 ---- a/drivers/net/xen-netback/xenbus.c -+++ b/drivers/net/xen-netback/xenbus.c -@@ -42,7 +42,7 @@ static int netback_remove(struct xenbus_device *dev) - if (be->vif) { - kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); -- xenvif_disconnect(be->vif); -+ xenvif_free(be->vif); - be->vif = NULL; - } - kfree(be); -@@ -213,9 +213,18 @@ static void disconnect_backend(struct xenbus_device *dev) - { - struct backend_info *be = dev_get_drvdata(&dev->dev); - -+ if (be->vif) -+ xenvif_disconnect(be->vif); -+} -+ -+static void destroy_backend(struct xenbus_device *dev) -+{ -+ struct backend_info *be = dev_get_drvdata(&dev->dev); -+ - if (be->vif) { -+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status"); -- xenvif_disconnect(be->vif); -+ xenvif_free(be->vif); - be->vif = NULL; - } - } -@@ -246,14 +255,11 @@ static void frontend_changed(struct xenbus_device *dev, - case XenbusStateConnected: - if (dev->state == XenbusStateConnected) - break; -- backend_create_xenvif(be); - if (be->vif) - connect(be); - break; - - case XenbusStateClosing: -- if (be->vif) -- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); - disconnect_backend(dev); - xenbus_switch_state(dev, XenbusStateClosing); - break; -@@ -262,6 +268,7 @@ static void frontend_changed(struct xenbus_device *dev, - xenbus_switch_state(dev, XenbusStateClosed); - if (xenbus_dev_is_online(dev)) - break; -+ destroy_backend(dev); - /* fall through if not online */ - case XenbusStateUnknown: - device_unregister(&dev->dev); --- -1.7.11.7 - - -From 29bb21656d747e62d55b9e1929b23eadcd6be324 Mon Sep 17 00:00:00 2001 -From: Amir Vadai -Date: Mon, 7 Oct 2013 13:38:12 +0200 -Subject: [PATCH 19/47] net/mlx4_en: Rename name of mlx4_en_rx_alloc members - -[ Upstream commit 70fbe0794393829d9acd686428d87c27b6f6984b ] - -Add page prefix to page related members: @size and @offset into -@page_size and @page_offset - -CC: Eric Dumazet -Signed-off-by: Amir Vadai -Acked-by: Eric Dumazet -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/mellanox/mlx4/en_rx.c | 40 ++++++++++++++++------------ - drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 4 +-- - 2 files changed, 25 insertions(+), 19 deletions(-) - -diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c -index dec455c..066fc27 100644 ---- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c -+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c -@@ -70,14 +70,15 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, - put_page(page); - return -ENOMEM; - } -- page_alloc->size = PAGE_SIZE << order; -+ page_alloc->page_size = PAGE_SIZE << order; - page_alloc->page = page; - page_alloc->dma = dma; -- page_alloc->offset = frag_info->frag_align; -+ page_alloc->page_offset = frag_info->frag_align; - /* Not doing get_page() for each frag is a big win - * on asymetric workloads. - */ -- atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride); -+ atomic_set(&page->_count, -+ page_alloc->page_size / frag_info->frag_stride); - return 0; - } - -@@ -96,16 +97,19 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, - for (i = 0; i < priv->num_frags; i++) { - frag_info = &priv->frag_info[i]; - page_alloc[i] = ring_alloc[i]; -- page_alloc[i].offset += frag_info->frag_stride; -- if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size) -+ page_alloc[i].page_offset += frag_info->frag_stride; -+ -+ if (page_alloc[i].page_offset + frag_info->frag_stride <= -+ ring_alloc[i].page_size) - continue; -+ - if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) - goto out; - } - - for (i = 0; i < priv->num_frags; i++) { - frags[i] = ring_alloc[i]; -- dma = ring_alloc[i].dma + ring_alloc[i].offset; -+ dma = ring_alloc[i].dma + ring_alloc[i].page_offset; - ring_alloc[i] = page_alloc[i]; - rx_desc->data[i].addr = cpu_to_be64(dma); - } -@@ -117,7 +121,7 @@ out: - frag_info = &priv->frag_info[i]; - if (page_alloc[i].page != ring_alloc[i].page) { - dma_unmap_page(priv->ddev, page_alloc[i].dma, -- page_alloc[i].size, PCI_DMA_FROMDEVICE); -+ page_alloc[i].page_size, PCI_DMA_FROMDEVICE); - page = page_alloc[i].page; - atomic_set(&page->_count, 1); - put_page(page); -@@ -132,9 +136,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, - { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - -- if (frags[i].offset + frag_info->frag_stride > frags[i].size) -- dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size, -- PCI_DMA_FROMDEVICE); -+ if (frags[i].page_offset + frag_info->frag_stride > -+ frags[i].page_size) -+ dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, -+ PCI_DMA_FROMDEVICE); - - if (frags[i].page) - put_page(frags[i].page); -@@ -161,7 +166,7 @@ out: - - page_alloc = &ring->page_alloc[i]; - dma_unmap_page(priv->ddev, page_alloc->dma, -- page_alloc->size, PCI_DMA_FROMDEVICE); -+ page_alloc->page_size, PCI_DMA_FROMDEVICE); - page = page_alloc->page; - atomic_set(&page->_count, 1); - put_page(page); -@@ -184,10 +189,11 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, - i, page_count(page_alloc->page)); - - dma_unmap_page(priv->ddev, page_alloc->dma, -- page_alloc->size, PCI_DMA_FROMDEVICE); -- while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) { -+ page_alloc->page_size, PCI_DMA_FROMDEVICE); -+ while (page_alloc->page_offset + frag_info->frag_stride < -+ page_alloc->page_size) { - put_page(page_alloc->page); -- page_alloc->offset += frag_info->frag_stride; -+ page_alloc->page_offset += frag_info->frag_stride; - } - page_alloc->page = NULL; - } -@@ -478,7 +484,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, - /* Save page reference in skb */ - __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); - skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); -- skb_frags_rx[nr].page_offset = frags[nr].offset; -+ skb_frags_rx[nr].page_offset = frags[nr].page_offset; - skb->truesize += frag_info->frag_stride; - frags[nr].page = NULL; - } -@@ -517,7 +523,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, - - /* Get pointer to first fragment so we could copy the headers into the - * (linear part of the) skb */ -- va = page_address(frags[0].page) + frags[0].offset; -+ va = page_address(frags[0].page) + frags[0].page_offset; - - if (length <= SMALL_PACKET_SIZE) { - /* We are copying all relevant data to the skb - temporarily -@@ -645,7 +651,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud - dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), - DMA_FROM_DEVICE); - ethh = (struct ethhdr *)(page_address(frags[0].page) + -- frags[0].offset); -+ frags[0].page_offset); - - if (is_multicast_ether_addr(ethh->h_dest)) { - struct mlx4_mac_entry *entry; -diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h -index 5e0aa56..bf06e36 100644 ---- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h -+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h -@@ -237,8 +237,8 @@ struct mlx4_en_tx_desc { - struct mlx4_en_rx_alloc { - struct page *page; - dma_addr_t dma; -- u32 offset; -- u32 size; -+ u32 page_offset; -+ u32 page_size; - }; - - struct mlx4_en_tx_ring { --- -1.7.11.7 - - -From 4bd2cc99115d31513bfe3c2bd7bcfe67fc081ae8 Mon Sep 17 00:00:00 2001 -From: Amir Vadai -Date: Mon, 7 Oct 2013 13:38:13 +0200 -Subject: [PATCH 20/47] net/mlx4_en: Fix pages never dma unmapped on rx - -[ Upstream commit 021f1107ffdae7a82af6c53f4c52654062e365c6 ] - -This patch fixes a bug introduced by commit 51151a16 (mlx4: allow -order-0 memory allocations in RX path). - -dma_unmap_page never reached because condition to detect last fragment -in page is wrong. offset+frag_stride can't be greater than size, need to -make sure no additional frag will fit in page => compare offset + -frag_stride + next_frag_size instead. -next_frag_size is the same as the current one, since page is shared only -with frags of the same size. - -CC: Eric Dumazet -Signed-off-by: Amir Vadai -Acked-by: Eric Dumazet -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/mellanox/mlx4/en_rx.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c -index 066fc27..afe2efa 100644 ---- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c -+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c -@@ -135,9 +135,10 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, - int i) - { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; -+ u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - -- if (frags[i].page_offset + frag_info->frag_stride > -- frags[i].page_size) -+ -+ if (next_frag_end > frags[i].page_size) - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - PCI_DMA_FROMDEVICE); - --- -1.7.11.7 - - -From af64f33fff313187ca01ddb7db09b537a89208dd Mon Sep 17 00:00:00 2001 -From: Marc Kleine-Budde -Date: Mon, 7 Oct 2013 23:19:58 +0200 -Subject: [PATCH 21/47] net: vlan: fix nlmsg size calculation in - vlan_get_size() - -[ Upstream commit c33a39c575068c2ea9bffb22fd6de2df19c74b89 ] - -This patch fixes the calculation of the nlmsg size, by adding the missing -nla_total_size(). - -Cc: Patrick McHardy -Signed-off-by: Marc Kleine-Budde -Signed-off-by: David S. Miller ---- - net/8021q/vlan_netlink.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c -index 3091297..c7e634a 100644 ---- a/net/8021q/vlan_netlink.c -+++ b/net/8021q/vlan_netlink.c -@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) - - return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ - nla_total_size(2) + /* IFLA_VLAN_ID */ -- sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ -+ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ - vlan_qos_map_size(vlan->nr_ingress_mappings) + - vlan_qos_map_size(vlan->nr_egress_mappings); - } --- -1.7.11.7 - - -From 74869292aeb07213144e34b0e21e23f7e3c9f61f Mon Sep 17 00:00:00 2001 -From: Vlad Yasevich -Date: Thu, 10 Oct 2013 15:57:59 -0400 -Subject: [PATCH 22/47] bridge: update mdb expiration timer upon reports. - -[ Upstream commit f144febd93d5ee534fdf23505ab091b2b9088edc ] - -commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b - bridge: only expire the mdb entry when query is received -changed the mdb expiration timer to be armed only when QUERY is -received. Howerver, this causes issues in an environment where -the multicast server socket comes and goes very fast while a client -is trying to send traffic to it. - -The root cause is a race where a sequence of LEAVE followed by REPORT -messages can race against QUERY messages generated in response to LEAVE. -The QUERY ends up starting the expiration timer, and that timer can -potentially expire after the new REPORT message has been received signaling -the new join operation. This leads to a significant drop in multicast -traffic and possible complete stall. - -The solution is to have REPORT messages update the expiration timer -on entries that already exist. - -CC: Cong Wang -CC: Herbert Xu -CC: Stephen Hemminger -Signed-off-by: Vlad Yasevich -Acked-by: Herbert Xu -Signed-off-by: David S. Miller ---- - net/bridge/br_multicast.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c -index bbcb435..0e3fea7 100644 ---- a/net/bridge/br_multicast.c -+++ b/net/bridge/br_multicast.c -@@ -610,6 +610,9 @@ rehash: - break; - - default: -+ /* If we have an existing entry, update it's expire timer */ -+ mod_timer(&mp->timer, -+ jiffies + br->multicast_membership_interval); - goto out; - } - -@@ -679,8 +682,12 @@ static int br_multicast_add_group(struct net_bridge *br, - for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { -- if (p->port == port) -+ if (p->port == port) { -+ /* We already have a portgroup, update the timer. */ -+ mod_timer(&p->timer, -+ jiffies + br->multicast_membership_interval); - goto out; -+ } - if ((unsigned long)p->port < (unsigned long)port) - break; - } --- -1.7.11.7 - - -From d9f02cfe59400677feea276d4b27981f6d91825a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Linus=20L=C3=BCssing?= -Date: Sun, 20 Oct 2013 00:58:57 +0200 -Subject: [PATCH 23/47] Revert "bridge: only expire the mdb entry when query - is received" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 454594f3b93a49ef568cd190c5af31376b105a7b ] - -While this commit was a good attempt to fix issues occuring when no -multicast querier is present, this commit still has two more issues: - -1) There are cases where mdb entries do not expire even if there is a -querier present. The bridge will unnecessarily continue flooding -multicast packets on the according ports. - -2) Never removing an mdb entry could be exploited for a Denial of -Service by an attacker on the local link, slowly, but steadily eating up -all memory. - -Actually, this commit became obsolete with -"bridge: disable snooping if there is no querier" (b00589af3b) -which included fixes for a few more cases. - -Therefore reverting the following commits (the commit stated in the -commit message plus three of its follow up fixes): - -==================== -Revert "bridge: update mdb expiration timer upon reports." -This reverts commit f144febd93d5ee534fdf23505ab091b2b9088edc. -Revert "bridge: do not call setup_timer() multiple times" -This reverts commit 1faabf2aab1fdaa1ace4e8c829d1b9cf7bfec2f1. -Revert "bridge: fix some kernel warning in multicast timer" -This reverts commit c7e8e8a8f7a70b343ca1e0f90a31e35ab2d16de1. -Revert "bridge: only expire the mdb entry when query is received" -This reverts commit 9f00b2e7cf241fa389733d41b615efdaa2cb0f5b. -==================== - -CC: Cong Wang -Signed-off-by: Linus Lüssing -Reviewed-by: Vlad Yasevich -Signed-off-by: David S. Miller ---- - net/bridge/br_mdb.c | 2 +- - net/bridge/br_multicast.c | 47 +++++++++++++++++++++++++++-------------------- - net/bridge/br_private.h | 1 - - 3 files changed, 28 insertions(+), 22 deletions(-) - -diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c -index 6319c43..de3a0e7 100644 ---- a/net/bridge/br_mdb.c -+++ b/net/bridge/br_mdb.c -@@ -451,7 +451,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) - call_rcu_bh(&p->rcu, br_multicast_free_pg); - err = 0; - -- if (!mp->ports && !mp->mglist && mp->timer_armed && -+ if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - break; -diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c -index 0e3fea7..fbad619 100644 ---- a/net/bridge/br_multicast.c -+++ b/net/bridge/br_multicast.c -@@ -271,7 +271,7 @@ static void br_multicast_del_pg(struct net_bridge *br, - del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); - -- if (!mp->ports && !mp->mglist && mp->timer_armed && -+ if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - -@@ -610,9 +610,6 @@ rehash: - break; - - default: -- /* If we have an existing entry, update it's expire timer */ -- mod_timer(&mp->timer, -- jiffies + br->multicast_membership_interval); - goto out; - } - -@@ -622,7 +619,6 @@ rehash: - - mp->br = br; - mp->addr = *group; -- - setup_timer(&mp->timer, br_multicast_group_expired, - (unsigned long)mp); - -@@ -662,6 +658,7 @@ static int br_multicast_add_group(struct net_bridge *br, - struct net_bridge_mdb_entry *mp; - struct net_bridge_port_group *p; - struct net_bridge_port_group __rcu **pp; -+ unsigned long now = jiffies; - int err; - - spin_lock(&br->multicast_lock); -@@ -676,18 +673,15 @@ static int br_multicast_add_group(struct net_bridge *br, - - if (!port) { - mp->mglist = true; -+ mod_timer(&mp->timer, now + br->multicast_membership_interval); - goto out; - } - - for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { -- if (p->port == port) { -- /* We already have a portgroup, update the timer. */ -- mod_timer(&p->timer, -- jiffies + br->multicast_membership_interval); -- goto out; -- } -+ if (p->port == port) -+ goto found; - if ((unsigned long)p->port < (unsigned long)port) - break; - } -@@ -698,6 +692,8 @@ static int br_multicast_add_group(struct net_bridge *br, - rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB); - -+found: -+ mod_timer(&p->timer, now + br->multicast_membership_interval); - out: - err = 0; - -@@ -1197,9 +1193,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, - if (!mp) - goto out; - -- mod_timer(&mp->timer, now + br->multicast_membership_interval); -- mp->timer_armed = true; -- - max_delay *= br->multicast_last_member_count; - - if (mp->mglist && -@@ -1276,9 +1269,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, - if (!mp) - goto out; - -- mod_timer(&mp->timer, now + br->multicast_membership_interval); -- mp->timer_armed = true; -- - max_delay *= br->multicast_last_member_count; - if (mp->mglist && - (timer_pending(&mp->timer) ? -@@ -1364,7 +1354,7 @@ static void br_multicast_leave_group(struct net_bridge *br, - call_rcu_bh(&p->rcu, br_multicast_free_pg); - br_mdb_notify(br->dev, port, group, RTM_DELMDB); - -- if (!mp->ports && !mp->mglist && mp->timer_armed && -+ if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - } -@@ -1376,12 +1366,30 @@ static void br_multicast_leave_group(struct net_bridge *br, - br->multicast_last_member_interval; - - if (!port) { -- if (mp->mglist && mp->timer_armed && -+ if (mp->mglist && - (timer_pending(&mp->timer) ? - time_after(mp->timer.expires, time) : - try_to_del_timer_sync(&mp->timer) >= 0)) { - mod_timer(&mp->timer, time); - } -+ -+ goto out; -+ } -+ -+ for (p = mlock_dereference(mp->ports, br); -+ p != NULL; -+ p = mlock_dereference(p->next, br)) { -+ if (p->port != port) -+ continue; -+ -+ if (!hlist_unhashed(&p->mglist) && -+ (timer_pending(&p->timer) ? -+ time_after(p->timer.expires, time) : -+ try_to_del_timer_sync(&p->timer) >= 0)) { -+ mod_timer(&p->timer, time); -+ } -+ -+ break; - } - out: - spin_unlock(&br->multicast_lock); -@@ -1798,7 +1806,6 @@ void br_multicast_stop(struct net_bridge *br) - hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], - hlist[ver]) { - del_timer(&mp->timer); -- mp->timer_armed = false; - call_rcu_bh(&mp->rcu, br_multicast_free_group); - } - } -diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h -index cde1eb1..aa05bd8 100644 ---- a/net/bridge/br_private.h -+++ b/net/bridge/br_private.h -@@ -126,7 +126,6 @@ struct net_bridge_mdb_entry - struct timer_list timer; - struct br_ip addr; - bool mglist; -- bool timer_armed; - }; - - struct net_bridge_mdb_htable --- -1.7.11.7 - - -From 40420baad983147cd23e6de95c958c96b96be727 Mon Sep 17 00:00:00 2001 -From: Christophe Gouault -Date: Tue, 8 Oct 2013 17:21:22 +0200 -Subject: [PATCH 24/47] vti: get rid of nf mark rule in prerouting - -[ Upstream commit 7263a5187f9e9de45fcb51349cf0e031142c19a1 ] - -This patch fixes and improves the use of vti interfaces (while -lightly changing the way of configuring them). - -Currently: - -- it is necessary to identify and mark inbound IPsec - packets destined to each vti interface, via netfilter rules in - the mangle table at prerouting hook. - -- the vti module cannot retrieve the right tunnel in input since - commit b9959fd3: vti tunnels all have an i_key, but the tunnel lookup - is done with flag TUNNEL_NO_KEY, so there no chance to retrieve them. - -- the i_key is used by the outbound processing as a mark to lookup - for the right SP and SA bundle. - -This patch uses the o_key to store the vti mark (instead of i_key) and -enables: - -- to avoid the need for previously marking the inbound skbuffs via a - netfilter rule. -- to properly retrieve the right tunnel in input, only based on the IPsec - packet outer addresses. -- to properly perform an inbound policy check (using the tunnel o_key - as a mark). -- to properly perform an outbound SPD and SAD lookup (using the tunnel - o_key as a mark). -- to keep the current mark of the skbuff. The skbuff mark is neither - used nor changed by the vti interface. Only the vti interface o_key - is used. - -SAs have a wildcard mark. -SPs have a mark equal to the vti interface o_key. - -The vti interface must be created as follows (i_key = 0, o_key = mark): - - ip link add vti1 mode vti local 1.1.1.1 remote 2.2.2.2 okey 1 - -The SPs attached to vti1 must be created as follows (mark = vti1 o_key): - - ip xfrm policy add dir out mark 1 tmpl src 1.1.1.1 dst 2.2.2.2 \ - proto esp mode tunnel - ip xfrm policy add dir in mark 1 tmpl src 2.2.2.2 dst 1.1.1.1 \ - proto esp mode tunnel - -The SAs are created with the default wildcard mark. There is no -distinction between global vs. vti SAs. Just their addresses will -possibly link them to a vti interface: - - ip xfrm state add src 1.1.1.1 dst 2.2.2.2 proto esp spi 1000 mode tunnel \ - enc "cbc(aes)" "azertyuiopqsdfgh" - - ip xfrm state add src 2.2.2.2 dst 1.1.1.1 proto esp spi 2000 mode tunnel \ - enc "cbc(aes)" "sqbdhgqsdjqjsdfh" - -To avoid matching "global" (not vti) SPs in vti interfaces, global SPs -should no use the default wildcard mark, but explicitly match mark 0. - -To avoid a double SPD lookup in input and output (in global and vti SPDs), -the NOPOLICY and NOXFRM options should be set on the vti interfaces: - - echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_policy - echo 1 > /proc/sys/net/ipv4/conf/vti1/disable_xfrm - -The outgoing traffic is steered to vti1 by a route via the vti interface: - - ip route add 192.168.0.0/16 dev vti1 - -The incoming IPsec traffic is steered to vti1 because its outer addresses -match the vti1 tunnel configuration. - -Signed-off-by: Christophe Gouault -Signed-off-by: David S. Miller ---- - net/ipv4/ip_vti.c | 14 +++++++++++--- - 1 file changed, 11 insertions(+), 3 deletions(-) - -diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c -index 17cc0ff..0656041 100644 ---- a/net/ipv4/ip_vti.c -+++ b/net/ipv4/ip_vti.c -@@ -285,8 +285,17 @@ static int vti_rcv(struct sk_buff *skb) - tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); - if (tunnel != NULL) { - struct pcpu_tstats *tstats; -+ u32 oldmark = skb->mark; -+ int ret; - -- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) -+ -+ /* temporarily mark the skb with the tunnel o_key, to -+ * only match policies with this mark. -+ */ -+ skb->mark = be32_to_cpu(tunnel->parms.o_key); -+ ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); -+ skb->mark = oldmark; -+ if (!ret) - return -1; - - tstats = this_cpu_ptr(tunnel->dev->tstats); -@@ -295,7 +304,6 @@ static int vti_rcv(struct sk_buff *skb) - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - -- skb->mark = 0; - secpath_reset(skb); - skb->dev = tunnel->dev; - return 1; -@@ -327,7 +335,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) - - memset(&fl4, 0, sizeof(fl4)); - flowi4_init_output(&fl4, tunnel->parms.link, -- be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), -+ be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), - RT_SCOPE_UNIVERSE, - IPPROTO_IPIP, 0, - dst, tiph->saddr, 0, 0); --- -1.7.11.7 - - -From d74d8a563ec79425464d7a8aeaa1796724fea7bc Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Thu, 10 Oct 2013 06:30:09 -0700 -Subject: [PATCH 25/47] l2tp: must disable bh before calling l2tp_xmit_skb() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 455cc32bf128e114455d11ad919321ab89a2c312 ] - -François Cachereul made a very nice bug report and suspected -the bh_lock_sock() / bh_unlok_sock() pair used in l2tp_xmit_skb() from -process context was not good. - -This problem was added by commit 6af88da14ee284aaad6e4326da09a89191ab6165 -("l2tp: Fix locking in l2tp_core.c"). - -l2tp_eth_dev_xmit() runs from BH context, so we must disable BH -from other l2tp_xmit_skb() users. - -[ 452.060011] BUG: soft lockup - CPU#1 stuck for 23s! [accel-pppd:6662] -[ 452.061757] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core pppoe pppox -ppp_generic slhc ipv6 ext3 mbcache jbd virtio_balloon xfs exportfs dm_mod -virtio_blk ata_generic virtio_net floppy ata_piix libata virtio_pci virtio_ring virtio [last unloaded: scsi_wait_scan] -[ 452.064012] CPU 1 -[ 452.080015] BUG: soft lockup - CPU#2 stuck for 23s! [accel-pppd:6643] -[ 452.080015] CPU 2 -[ 452.080015] -[ 452.080015] Pid: 6643, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs -[ 452.080015] RIP: 0010:[] [] do_raw_spin_lock+0x17/0x1f -[ 452.080015] RSP: 0018:ffff88007125fc18 EFLAGS: 00000293 -[ 452.080015] RAX: 000000000000aba9 RBX: ffffffff811d0703 RCX: 0000000000000000 -[ 452.080015] RDX: 00000000000000ab RSI: ffff8800711f6896 RDI: ffff8800745c8110 -[ 452.080015] RBP: ffff88007125fc18 R08: 0000000000000020 R09: 0000000000000000 -[ 452.080015] R10: 0000000000000000 R11: 0000000000000280 R12: 0000000000000286 -[ 452.080015] R13: 0000000000000020 R14: 0000000000000240 R15: 0000000000000000 -[ 452.080015] FS: 00007fdc0cc24700(0000) GS:ffff8800b6f00000(0000) knlGS:0000000000000000 -[ 452.080015] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 452.080015] CR2: 00007fdb054899b8 CR3: 0000000074404000 CR4: 00000000000006a0 -[ 452.080015] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 -[ 452.080015] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 -[ 452.080015] Process accel-pppd (pid: 6643, threadinfo ffff88007125e000, task ffff8800b27e6dd0) -[ 452.080015] Stack: -[ 452.080015] ffff88007125fc28 ffffffff81256559 ffff88007125fc98 ffffffffa01b2bd1 -[ 452.080015] ffff88007125fc58 000000000000000c 00000000029490d0 0000009c71dbe25e -[ 452.080015] 000000000000005c 000000080000000e 0000000000000000 ffff880071170600 -[ 452.080015] Call Trace: -[ 452.080015] [] _raw_spin_lock+0xe/0x10 -[ 452.080015] [] l2tp_xmit_skb+0x189/0x4ac [l2tp_core] -[ 452.080015] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] -[ 452.080015] [] __sock_sendmsg_nosec+0x22/0x24 -[ 452.080015] [] sock_sendmsg+0xa1/0xb6 -[ 452.080015] [] ? __schedule+0x5c1/0x616 -[ 452.080015] [] ? __dequeue_signal+0xb7/0x10c -[ 452.080015] [] ? fget_light+0x75/0x89 -[ 452.080015] [] ? sockfd_lookup_light+0x20/0x56 -[ 452.080015] [] sys_sendto+0x10c/0x13b -[ 452.080015] [] system_call_fastpath+0x16/0x1b -[ 452.080015] Code: 81 48 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 <8a> 07 eb f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 -[ 452.080015] Call Trace: -[ 452.080015] [] _raw_spin_lock+0xe/0x10 -[ 452.080015] [] l2tp_xmit_skb+0x189/0x4ac [l2tp_core] -[ 452.080015] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] -[ 452.080015] [] __sock_sendmsg_nosec+0x22/0x24 -[ 452.080015] [] sock_sendmsg+0xa1/0xb6 -[ 452.080015] [] ? __schedule+0x5c1/0x616 -[ 452.080015] [] ? __dequeue_signal+0xb7/0x10c -[ 452.080015] [] ? fget_light+0x75/0x89 -[ 452.080015] [] ? sockfd_lookup_light+0x20/0x56 -[ 452.080015] [] sys_sendto+0x10c/0x13b -[ 452.080015] [] system_call_fastpath+0x16/0x1b -[ 452.064012] -[ 452.064012] Pid: 6662, comm: accel-pppd Not tainted 3.2.46.mini #1 Bochs Bochs -[ 452.064012] RIP: 0010:[] [] do_raw_spin_lock+0x19/0x1f -[ 452.064012] RSP: 0018:ffff8800b6e83ba0 EFLAGS: 00000297 -[ 452.064012] RAX: 000000000000aaa9 RBX: ffff8800b6e83b40 RCX: 0000000000000002 -[ 452.064012] RDX: 00000000000000aa RSI: 000000000000000a RDI: ffff8800745c8110 -[ 452.064012] RBP: ffff8800b6e83ba0 R08: 000000000000c802 R09: 000000000000001c -[ 452.064012] R10: ffff880071096c4e R11: 0000000000000006 R12: ffff8800b6e83b18 -[ 452.064012] R13: ffffffff8125d51e R14: ffff8800b6e83ba0 R15: ffff880072a589c0 -[ 452.064012] FS: 00007fdc0b81e700(0000) GS:ffff8800b6e80000(0000) knlGS:0000000000000000 -[ 452.064012] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 -[ 452.064012] CR2: 0000000000625208 CR3: 0000000074404000 CR4: 00000000000006a0 -[ 452.064012] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 -[ 452.064012] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 -[ 452.064012] Process accel-pppd (pid: 6662, threadinfo ffff88007129a000, task ffff8800744f7410) -[ 452.064012] Stack: -[ 452.064012] ffff8800b6e83bb0 ffffffff81256559 ffff8800b6e83bc0 ffffffff8121c64a -[ 452.064012] ffff8800b6e83bf0 ffffffff8121ec7a ffff880072a589c0 ffff880071096c62 -[ 452.064012] 0000000000000011 ffffffff81430024 ffff8800b6e83c80 ffffffff8121f276 -[ 452.064012] Call Trace: -[ 452.064012] -[ 452.064012] [] _raw_spin_lock+0xe/0x10 -[ 452.064012] [] spin_lock+0x9/0xb -[ 452.064012] [] udp_queue_rcv_skb+0x186/0x269 -[ 452.064012] [] __udp4_lib_rcv+0x297/0x4ae -[ 452.064012] [] ? raw_rcv+0xe9/0xf0 -[ 452.064012] [] udp_rcv+0x1a/0x1c -[ 452.064012] [] ip_local_deliver_finish+0x12b/0x1a5 -[ 452.064012] [] ip_local_deliver+0x53/0x84 -[ 452.064012] [] ip_rcv_finish+0x2bc/0x2f3 -[ 452.064012] [] ip_rcv+0x210/0x269 -[ 452.064012] [] ? kvm_clock_get_cycles+0x9/0xb -[ 452.064012] [] __netif_receive_skb+0x3a5/0x3f7 -[ 452.064012] [] netif_receive_skb+0x57/0x5e -[ 452.064012] [] ? __netdev_alloc_skb+0x1f/0x3b -[ 452.064012] [] virtnet_poll+0x4ba/0x5a4 [virtio_net] -[ 452.064012] [] net_rx_action+0x73/0x184 -[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] -[ 452.064012] [] __do_softirq+0xc3/0x1a8 -[ 452.064012] [] ? ack_APIC_irq+0x10/0x12 -[ 452.064012] [] ? _raw_spin_lock+0xe/0x10 -[ 452.064012] [] call_softirq+0x1c/0x26 -[ 452.064012] [] do_softirq+0x45/0x82 -[ 452.064012] [] irq_exit+0x42/0x9c -[ 452.064012] [] do_IRQ+0x8e/0xa5 -[ 452.064012] [] common_interrupt+0x6e/0x6e -[ 452.064012] -[ 452.064012] [] ? kfree+0x8a/0xa3 -[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] -[ 452.064012] [] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core] -[ 452.064012] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] -[ 452.064012] [] __sock_sendmsg_nosec+0x22/0x24 -[ 452.064012] [] sock_sendmsg+0xa1/0xb6 -[ 452.064012] [] ? __schedule+0x5c1/0x616 -[ 452.064012] [] ? __dequeue_signal+0xb7/0x10c -[ 452.064012] [] ? fget_light+0x75/0x89 -[ 452.064012] [] ? sockfd_lookup_light+0x20/0x56 -[ 452.064012] [] sys_sendto+0x10c/0x13b -[ 452.064012] [] system_call_fastpath+0x16/0x1b -[ 452.064012] Code: 89 e5 72 0c 31 c0 48 81 ff 45 66 25 81 0f 92 c0 5d c3 55 b8 00 01 00 00 48 89 e5 f0 66 0f c1 07 0f b6 d4 38 d0 74 06 f3 90 8a 07 f6 5d c3 90 90 55 48 89 e5 9c 58 0f 1f 44 00 00 5d c3 55 48 -[ 452.064012] Call Trace: -[ 452.064012] [] _raw_spin_lock+0xe/0x10 -[ 452.064012] [] spin_lock+0x9/0xb -[ 452.064012] [] udp_queue_rcv_skb+0x186/0x269 -[ 452.064012] [] __udp4_lib_rcv+0x297/0x4ae -[ 452.064012] [] ? raw_rcv+0xe9/0xf0 -[ 452.064012] [] udp_rcv+0x1a/0x1c -[ 452.064012] [] ip_local_deliver_finish+0x12b/0x1a5 -[ 452.064012] [] ip_local_deliver+0x53/0x84 -[ 452.064012] [] ip_rcv_finish+0x2bc/0x2f3 -[ 452.064012] [] ip_rcv+0x210/0x269 -[ 452.064012] [] ? kvm_clock_get_cycles+0x9/0xb -[ 452.064012] [] __netif_receive_skb+0x3a5/0x3f7 -[ 452.064012] [] netif_receive_skb+0x57/0x5e -[ 452.064012] [] ? __netdev_alloc_skb+0x1f/0x3b -[ 452.064012] [] virtnet_poll+0x4ba/0x5a4 [virtio_net] -[ 452.064012] [] net_rx_action+0x73/0x184 -[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] -[ 452.064012] [] __do_softirq+0xc3/0x1a8 -[ 452.064012] [] ? ack_APIC_irq+0x10/0x12 -[ 452.064012] [] ? _raw_spin_lock+0xe/0x10 -[ 452.064012] [] call_softirq+0x1c/0x26 -[ 452.064012] [] do_softirq+0x45/0x82 -[ 452.064012] [] irq_exit+0x42/0x9c -[ 452.064012] [] do_IRQ+0x8e/0xa5 -[ 452.064012] [] common_interrupt+0x6e/0x6e -[ 452.064012] [] ? kfree+0x8a/0xa3 -[ 452.064012] [] ? l2tp_xmit_skb+0x27a/0x4ac [l2tp_core] -[ 452.064012] [] ? l2tp_xmit_skb+0x1dd/0x4ac [l2tp_core] -[ 452.064012] [] pppol2tp_sendmsg+0x15e/0x19c [l2tp_ppp] -[ 452.064012] [] __sock_sendmsg_nosec+0x22/0x24 -[ 452.064012] [] sock_sendmsg+0xa1/0xb6 -[ 452.064012] [] ? __schedule+0x5c1/0x616 -[ 452.064012] [] ? __dequeue_signal+0xb7/0x10c -[ 452.064012] [] ? fget_light+0x75/0x89 -[ 452.064012] [] ? sockfd_lookup_light+0x20/0x56 -[ 452.064012] [] sys_sendto+0x10c/0x13b -[ 452.064012] [] system_call_fastpath+0x16/0x1b - -Reported-by: François Cachereul -Tested-by: François Cachereul -Signed-off-by: Eric Dumazet -Cc: James Chapman -Signed-off-by: David S. Miller ---- - net/l2tp/l2tp_ppp.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c -index 5ebee2d..8c46b27 100644 ---- a/net/l2tp/l2tp_ppp.c -+++ b/net/l2tp/l2tp_ppp.c -@@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh - goto error_put_sess_tun; - } - -+ local_bh_disable(); - l2tp_xmit_skb(session, skb, session->hdr_len); -+ local_bh_enable(); - - sock_put(ps->tunnel_sock); - sock_put(sk); -@@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) - skb->data[0] = ppph[0]; - skb->data[1] = ppph[1]; - -+ local_bh_disable(); - l2tp_xmit_skb(session, skb, session->hdr_len); -+ local_bh_enable(); - - sock_put(sk_tun); - sock_put(sk); --- -1.7.11.7 - - -From 5bf1c228293765ff84e4121cf2f92395403b7e33 Mon Sep 17 00:00:00 2001 -From: stephen hemminger -Date: Sun, 6 Oct 2013 15:15:33 -0700 -Subject: [PATCH 26/47] netem: update backlog after drop -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 638a52b801e40ed276ceb69b73579ad99365361a ] - -When packet is dropped from rb-tree netem the backlog statistic should -also be updated. - -Reported-by: Сергеев Сергей -Signed-off-by: Stephen Hemminger -Acked-by: Eric Dumazet -Signed-off-by: David S. Miller ---- - net/sched/sch_netem.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c -index 82f6016..7dc79940 100644 ---- a/net/sched/sch_netem.c -+++ b/net/sched/sch_netem.c -@@ -523,6 +523,7 @@ static unsigned int netem_drop(struct Qdisc *sch) - skb->next = NULL; - skb->prev = NULL; - len = qdisc_pkt_len(skb); -+ sch->qstats.backlog -= len; - kfree_skb(skb); - } - } --- -1.7.11.7 - - -From ddc30868db0e31c0c2ab4691131a050f9136f3bf Mon Sep 17 00:00:00 2001 -From: stephen hemminger -Date: Sun, 6 Oct 2013 15:16:49 -0700 -Subject: [PATCH 27/47] netem: free skb's in tree on reset -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit ff704050f2fc0f3382b5a70bba56a51a3feca79d ] - -Netem can leak memory because packets get stored in red-black -tree and it is not cleared on reset. - -Reported by: Сергеев Сергей -Signed-off-by: Stephen Hemminger -Signed-off-by: David S. Miller ---- - net/sched/sch_netem.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c -index 7dc79940..3626010 100644 ---- a/net/sched/sch_netem.c -+++ b/net/sched/sch_netem.c -@@ -358,6 +358,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche - return PSCHED_NS2TICKS(ticks); - } - -+static void tfifo_reset(struct Qdisc *sch) -+{ -+ struct netem_sched_data *q = qdisc_priv(sch); -+ struct rb_node *p; -+ -+ while ((p = rb_first(&q->t_root))) { -+ struct sk_buff *skb = netem_rb_to_skb(p); -+ -+ rb_erase(p, &q->t_root); -+ skb->next = NULL; -+ skb->prev = NULL; -+ kfree_skb(skb); -+ } -+} -+ - static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) - { - struct netem_sched_data *q = qdisc_priv(sch); -@@ -613,6 +628,7 @@ static void netem_reset(struct Qdisc *sch) - struct netem_sched_data *q = qdisc_priv(sch); - - qdisc_reset_queue(sch); -+ tfifo_reset(sch); - if (q->qdisc) - qdisc_reset(q->qdisc); - qdisc_watchdog_cancel(&q->watchdog); --- -1.7.11.7 - - -From c871c477136615360e283471acdb33df95d70470 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Salva=20Peir=C3=B3?= -Date: Fri, 11 Oct 2013 12:50:03 +0300 -Subject: [PATCH 28/47] farsync: fix info leak in ioctl - -[ Upstream commit 96b340406724d87e4621284ebac5e059d67b2194 ] - -The fst_get_iface() code fails to initialize the two padding bytes of -struct sync_serial_settings after the ->loopback member. Add an explicit -memset(0) before filling the structure to avoid the info leak. - -Signed-off-by: Dan Carpenter -Signed-off-by: David S. Miller ---- - drivers/net/wan/farsync.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c -index 3f0c4f2..bcfff0d 100644 ---- a/drivers/net/wan/farsync.c -+++ b/drivers/net/wan/farsync.c -@@ -1972,6 +1972,7 @@ fst_get_iface(struct fst_card_info *card, struct fst_port_info *port, - } - - i = port->index; -+ memset(&sync, 0, sizeof(sync)); - sync.clock_rate = FST_RDL(card, portConfig[i].lineSpeed); - /* Lucky card and linux use same encoding here */ - sync.clock_type = FST_RDB(card, portConfig[i].internalClock) == --- -1.7.11.7 - - -From e69ccba66791d0edd0d596520de268369aaab610 Mon Sep 17 00:00:00 2001 -From: Mathias Krause -Date: Mon, 30 Sep 2013 22:05:40 +0200 -Subject: [PATCH 29/47] unix_diag: fix info leak - -[ Upstream commit 6865d1e834be84ddd5808d93d5035b492346c64a ] - -When filling the netlink message we miss to wipe the pad field, -therefore leak one byte of heap memory to userland. Fix this by -setting pad to 0. - -Signed-off-by: Mathias Krause -Signed-off-by: David S. Miller ---- - net/unix/diag.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/net/unix/diag.c b/net/unix/diag.c -index d591091..86fa0f3 100644 ---- a/net/unix/diag.c -+++ b/net/unix/diag.c -@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r - rep->udiag_family = AF_UNIX; - rep->udiag_type = sk->sk_type; - rep->udiag_state = sk->sk_state; -+ rep->pad = 0; - rep->udiag_ino = sk_ino; - sock_diag_save_cookie(sk, rep->udiag_cookie); - --- -1.7.11.7 - - -From 00fa721e6873ccbb36fc008558bb7d23e9e3c21f Mon Sep 17 00:00:00 2001 -From: Mathias Krause -Date: Mon, 30 Sep 2013 22:03:07 +0200 -Subject: [PATCH 30/47] connector: use nlmsg_len() to check message length - -[ Upstream commit 162b2bedc084d2d908a04c93383ba02348b648b0 ] - -The current code tests the length of the whole netlink message to be -at least as long to fit a cn_msg. This is wrong as nlmsg_len includes -the length of the netlink message header. Use nlmsg_len() instead to -fix this "off-by-NLMSG_HDRLEN" size check. - -Cc: stable@vger.kernel.org # v2.6.14+ -Signed-off-by: Mathias Krause -Signed-off-by: David S. Miller ---- - drivers/connector/connector.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c -index 6ecfa75..0daa11e 100644 ---- a/drivers/connector/connector.c -+++ b/drivers/connector/connector.c -@@ -157,17 +157,18 @@ static int cn_call_callback(struct sk_buff *skb) - static void cn_rx_skb(struct sk_buff *__skb) - { - struct nlmsghdr *nlh; -- int err; - struct sk_buff *skb; -+ int len, err; - - skb = skb_get(__skb); - - if (skb->len >= NLMSG_HDRLEN) { - nlh = nlmsg_hdr(skb); -+ len = nlmsg_len(nlh); - -- if (nlh->nlmsg_len < sizeof(struct cn_msg) || -+ if (len < (int)sizeof(struct cn_msg) || - skb->len < nlh->nlmsg_len || -- nlh->nlmsg_len > CONNECTOR_MAX_MSG_SIZE) { -+ len > CONNECTOR_MAX_MSG_SIZE) { - kfree_skb(skb); - return; - } --- -1.7.11.7 - - -From d99d51100021c9f8b335fc1931880618eaa448e3 Mon Sep 17 00:00:00 2001 -From: Eric Dumazet -Date: Sat, 12 Oct 2013 14:08:34 -0700 -Subject: [PATCH 31/47] bnx2x: record rx queue for LRO packets - -[ Upstream commit 60e66fee56b2256dcb1dc2ea1b2ddcb6e273857d ] - -RPS support is kind of broken on bnx2x, because only non LRO packets -get proper rx queue information. This triggers reorders, as it seems -bnx2x like to generate a non LRO packet for segment including TCP PUSH -flag : (this might be pure coincidence, but all the reorders I've -seen involve segments with a PUSH) - -11:13:34.335847 IP A > B: . 415808:447136(31328) ack 1 win 457 -11:13:34.335992 IP A > B: . 447136:448560(1424) ack 1 win 457 -11:13:34.336391 IP A > B: . 448560:479888(31328) ack 1 win 457 -11:13:34.336425 IP A > B: P 511216:512640(1424) ack 1 win 457 -11:13:34.336423 IP A > B: . 479888:511216(31328) ack 1 win 457 -11:13:34.336924 IP A > B: . 512640:543968(31328) ack 1 win 457 -11:13:34.336963 IP A > B: . 543968:575296(31328) ack 1 win 457 - -We must call skb_record_rx_queue() to properly give to RPS (and more -generally for TX queue selection on forward path) the receive queue -information. - -Similar fix is needed for skb_mark_napi_id(), but will be handled -in a separate patch to ease stable backports. - -Signed-off-by: Eric Dumazet -Cc: Willem de Bruijn -Cc: Eilon Greenstein -Acked-by: Dmitry Kravkov -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c -index 0cc2611..4b0877e 100644 ---- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c -+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c -@@ -676,6 +676,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp, - } - } - #endif -+ skb_record_rx_queue(skb, fp->rx_queue); - napi_gro_receive(&fp->napi, skb); - } - --- -1.7.11.7 - - -From 3f1db36c01909701d0e34cd2413a1127e144bcc3 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Tue, 15 Oct 2013 11:18:58 +0800 -Subject: [PATCH 32/47] virtio-net: don't respond to cpu hotplug notifier if - we're not ready - -[ Upstream commit 3ab098df35f8b98b6553edc2e40234af512ba877 ] - -We're trying to re-configure the affinity unconditionally in cpu hotplug -callback. This may lead the issue during resuming from s3/s4 since - -- virt queues haven't been allocated at that time. -- it's unnecessary since thaw method will re-configure the affinity. - -Fix this issue by checking the config_enable and do nothing is we're not ready. - -The bug were introduced by commit 8de4b2f3ae90c8fc0f17eeaab87d5a951b66ee17 -(virtio-net: reset virtqueue affinity when doing cpu hotplug). - -Cc: Rusty Russell -Cc: Michael S. Tsirkin -Cc: Wanlong Gao -Acked-by: Michael S. Tsirkin -Reviewed-by: Wanlong Gao -Signed-off-by: Jason Wang -Signed-off-by: David S. Miller ---- - drivers/net/virtio_net.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c -index 3d2a90a..43a71d9 100644 ---- a/drivers/net/virtio_net.c -+++ b/drivers/net/virtio_net.c -@@ -1094,6 +1094,11 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, - { - struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); - -+ mutex_lock(&vi->config_lock); -+ -+ if (!vi->config_enable) -+ goto done; -+ - switch(action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: -@@ -1106,6 +1111,9 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, - default: - break; - } -+ -+done: -+ mutex_unlock(&vi->config_lock); - return NOTIFY_OK; - } - --- -1.7.11.7 - - -From 24ef3b7cfd16ce5ac263deebfecb661d1c784670 Mon Sep 17 00:00:00 2001 -From: Jason Wang -Date: Tue, 15 Oct 2013 11:18:59 +0800 -Subject: [PATCH 33/47] virtio-net: refill only when device is up during - setting queues - -[ Upstream commit 35ed159bfd96a7547ec277ed8b550c7cbd9841b6 ] - -We used to schedule the refill work unconditionally after changing the -number of queues. This may lead an issue if the device is not -up. Since we only try to cancel the work in ndo_stop(), this may cause -the refill work still work after removing the device. Fix this by only -schedule the work when device is up. - -The bug were introduce by commit 9b9cd8024a2882e896c65222aa421d461354e3f2. -(virtio-net: fix the race between channels setting and refill) - -Cc: Rusty Russell -Cc: Michael S. Tsirkin -Signed-off-by: Jason Wang -Signed-off-by: David S. Miller ---- - drivers/net/virtio_net.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c -index 43a71d9..1d01534 100644 ---- a/drivers/net/virtio_net.c -+++ b/drivers/net/virtio_net.c -@@ -916,7 +916,9 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) - return -EINVAL; - } else { - vi->curr_queue_pairs = queue_pairs; -- schedule_delayed_work(&vi->refill, 0); -+ /* virtnet_open() will refill when device is going to up. */ -+ if (dev->flags & IFF_UP) -+ schedule_delayed_work(&vi->refill, 0); - } - - return 0; -@@ -1714,7 +1716,9 @@ static int virtnet_restore(struct virtio_device *vdev) - vi->config_enable = true; - mutex_unlock(&vi->config_lock); - -+ rtnl_lock(); - virtnet_set_queues(vi, vi->curr_queue_pairs); -+ rtnl_unlock(); - - return 0; - } --- -1.7.11.7 - - -From d616bd8bf902f82ea742462a29bf4080aaa8f497 Mon Sep 17 00:00:00 2001 -From: Vlad Yasevich -Date: Tue, 15 Oct 2013 14:57:45 -0400 -Subject: [PATCH 34/47] bridge: Correctly clamp MAX forward_delay when - enabling STP - -[ Upstream commit 4b6c7879d84ad06a2ac5b964808ed599187a188d ] - -Commit be4f154d5ef0ca147ab6bcd38857a774133f5450 - bridge: Clamp forward_delay when enabling STP -had a typo when attempting to clamp maximum forward delay. - -It is possible to set bridge_forward_delay to be higher then -permitted maximum when STP is off. When turning STP on, the -higher then allowed delay has to be clamed down to max value. - -CC: Herbert Xu -CC: Stephen Hemminger -Signed-off-by: Vlad Yasevich -Reviewed-by: Veaceslav Falico -Acked-by: Herbert Xu -Signed-off-by: David S. Miller ---- - net/bridge/br_stp_if.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c -index 108084a..656a6f3 100644 ---- a/net/bridge/br_stp_if.c -+++ b/net/bridge/br_stp_if.c -@@ -134,7 +134,7 @@ static void br_stp_start(struct net_bridge *br) - - if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) - __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); -- else if (br->bridge_forward_delay < BR_MAX_FORWARD_DELAY) -+ else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) - __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); - - if (r == 0) { --- -1.7.11.7 - - -From 803490b7c577add0b976aa08e4bbfdd95f505270 Mon Sep 17 00:00:00 2001 -From: Vlad Yasevich -Date: Tue, 15 Oct 2013 22:01:29 -0400 -Subject: [PATCH 35/47] net: dst: provide accessor function to dst->xfrm - -[ Upstream commit e87b3998d795123b4139bc3f25490dd236f68212 ] - -dst->xfrm is conditionally defined. Provide accessor funtion that -is always available. - -Signed-off-by: Vlad Yasevich -Acked-by: Neil Horman -Signed-off-by: David S. Miller ---- - include/net/dst.h | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/include/net/dst.h b/include/net/dst.h -index 1f8fd10..e0c97f5 100644 ---- a/include/net/dst.h -+++ b/include/net/dst.h -@@ -477,10 +477,22 @@ static inline struct dst_entry *xfrm_lookup(struct net *net, - { - return dst_orig; - } -+ -+static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) -+{ -+ return NULL; -+} -+ - #else - extern struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, - const struct flowi *fl, struct sock *sk, - int flags); -+ -+/* skb attached with this dst needs transformation if dst->xfrm is valid */ -+static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst) -+{ -+ return dst->xfrm; -+} - #endif - - #endif /* _NET_DST_H */ --- -1.7.11.7 - - -From 371a65903ccb75fc71fd42b30a310a28c42e54a3 Mon Sep 17 00:00:00 2001 -From: Fan Du -Date: Tue, 15 Oct 2013 22:01:30 -0400 -Subject: [PATCH 36/47] sctp: Use software crc32 checksum when xfrm transform - will happen. - -[ Upstream commit 27127a82561a2a3ed955ce207048e1b066a80a2a ] - -igb/ixgbe have hardware sctp checksum support, when this feature is enabled -and also IPsec is armed to protect sctp traffic, ugly things happened as -xfrm_output checks CHECKSUM_PARTIAL to do checksum operation(sum every thing -up and pack the 16bits result in the checksum field). The result is fail -establishment of sctp communication. - -Cc: Neil Horman -Cc: Steffen Klassert -Signed-off-by: Fan Du -Signed-off-by: Vlad Yasevich -Acked-by: Neil Horman -Signed-off-by: David S. Miller ---- - net/sctp/output.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/net/sctp/output.c b/net/sctp/output.c -index a46d1eb..a06a9b6 100644 ---- a/net/sctp/output.c -+++ b/net/sctp/output.c -@@ -542,7 +542,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) - * by CRC32-C as described in . - */ - if (!sctp_checksum_disable) { -- if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { -+ if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || -+ (dst_xfrm(dst) != NULL)) { - __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); - - /* 3) Put the resultant value into the checksum field in the --- -1.7.11.7 - - -From 9067790bb296fb5818894222d7e85407238e9843 Mon Sep 17 00:00:00 2001 -From: Vlad Yasevich -Date: Tue, 15 Oct 2013 22:01:31 -0400 -Subject: [PATCH 37/47] sctp: Perform software checksum if packet has to be - fragmented. - -[ Upstream commit d2dbbba77e95dff4b4f901fee236fef6d9552072 ] - -IP/IPv6 fragmentation knows how to compute only TCP/UDP checksum. -This causes problems if SCTP packets has to be fragmented and -ipsummed has been set to PARTIAL due to checksum offload support. -This condition can happen when retransmitting after MTU discover, -or when INIT or other control chunks are larger then MTU. -Check for the rare fragmentation condition in SCTP and use software -checksum calculation in this case. - -CC: Fan Du -Signed-off-by: Vlad Yasevich -Acked-by: Neil Horman -Signed-off-by: David S. Miller ---- - net/sctp/output.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/sctp/output.c b/net/sctp/output.c -index a06a9b6..013a07d 100644 ---- a/net/sctp/output.c -+++ b/net/sctp/output.c -@@ -543,7 +543,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) - */ - if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || -- (dst_xfrm(dst) != NULL)) { -+ (dst_xfrm(dst) != NULL) || packet->ipfragok) { - __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); - - /* 3) Put the resultant value into the checksum field in the --- -1.7.11.7 - - -From 22e825ed8144360271614511563166f37fef9f90 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Salva=20Peir=C3=B3?= -Date: Wed, 16 Oct 2013 12:46:50 +0200 -Subject: [PATCH 38/47] wanxl: fix info leak in ioctl -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -[ Upstream commit 2b13d06c9584b4eb773f1e80bbaedab9a1c344e1 ] - -The wanxl_ioctl() code fails to initialize the two padding bytes of -struct sync_serial_settings after the ->loopback member. Add an explicit -memset(0) before filling the structure to avoid the info leak. - -Signed-off-by: Salva Peiró -Signed-off-by: David S. Miller ---- - drivers/net/wan/wanxl.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c -index 6a24a5a..4c0a697 100644 ---- a/drivers/net/wan/wanxl.c -+++ b/drivers/net/wan/wanxl.c -@@ -355,6 +355,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) - ifr->ifr_settings.size = size; /* data size wanted */ - return -ENOBUFS; - } -+ memset(&line, 0, sizeof(line)); - line.clock_type = get_status(port)->clocking; - line.clock_rate = 0; - line.loopback = 0; --- -1.7.11.7 - - -From b16dd2cff7a4eb3881f43371d71ed242332877dc Mon Sep 17 00:00:00 2001 -From: Vasundhara Volam -Date: Thu, 17 Oct 2013 11:47:14 +0530 -Subject: [PATCH 39/47] be2net: pass if_id for v1 and V2 versions of TX_CREATE - cmd - -[ Upstream commit 0fb88d61bc60779dde88b0fc268da17eb81d0412 ] - -It is a required field for all TX_CREATE cmd versions > 0. -This fixes a driver initialization failure, caused by recent SH-R Firmwares -(versions > 10.0.639.0) failing the TX_CREATE cmd when if_id field is -not passed. - -Signed-off-by: Sathya Perla -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/emulex/benet/be_cmds.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c -index 8ec5d74..13ac104 100644 ---- a/drivers/net/ethernet/emulex/benet/be_cmds.c -+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c -@@ -1150,7 +1150,6 @@ int be_cmd_txq_create(struct be_adapter *adapter, struct be_tx_obj *txo) - - if (lancer_chip(adapter)) { - req->hdr.version = 1; -- req->if_id = cpu_to_le16(adapter->if_handle); - } else if (BEx_chip(adapter)) { - if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) - req->hdr.version = 2; -@@ -1158,6 +1157,8 @@ int be_cmd_txq_create(struct be_adapter *adapter, struct be_tx_obj *txo) - req->hdr.version = 2; - } - -+ if (req->hdr.version > 0) -+ req->if_id = cpu_to_le16(adapter->if_handle); - req->num_pages = PAGES_4K_SPANNED(q_mem->va, q_mem->size); - req->ulp_num = BE_ULP1_NUM; - req->type = BE_ETH_TX_RING_TYPE_STANDARD; --- -1.7.11.7 - - -From 9829aac8208e7a31e4e42e7d2e7e165593c05202 Mon Sep 17 00:00:00 2001 -From: Daniel Borkmann -Date: Thu, 17 Oct 2013 22:51:31 +0200 -Subject: [PATCH 40/47] net: unix: inherit SOCK_PASS{CRED, SEC} flags from - socket to fix race - -[ Upstream commit 90c6bd34f884cd9cee21f1d152baf6c18bcac949 ] - -In the case of credentials passing in unix stream sockets (dgram -sockets seem not affected), we get a rather sparse race after -commit 16e5726 ("af_unix: dont send SCM_CREDENTIALS by default"). - -We have a stream server on receiver side that requests credential -passing from senders (e.g. nc -U). Since we need to set SO_PASSCRED -on each spawned/accepted socket on server side to 1 first (as it's -not inherited), it can happen that in the time between accept() and -setsockopt() we get interrupted, the sender is being scheduled and -continues with passing data to our receiver. At that time SO_PASSCRED -is neither set on sender nor receiver side, hence in cmsg's -SCM_CREDENTIALS we get eventually pid:0, uid:65534, gid:65534 -(== overflow{u,g}id) instead of what we actually would like to see. - -On the sender side, here nc -U, the tests in maybe_add_creds() -invoked through unix_stream_sendmsg() would fail, as at that exact -time, as mentioned, the sender has neither SO_PASSCRED on his side -nor sees it on the server side, and we have a valid 'other' socket -in place. Thus, sender believes it would just look like a normal -connection, not needing/requesting SO_PASSCRED at that time. - -As reverting 16e5726 would not be an option due to the significant -performance regression reported when having creds always passed, -one way/trade-off to prevent that would be to set SO_PASSCRED on -the listener socket and allow inheriting these flags to the spawned -socket on server side in accept(). It seems also logical to do so -if we'd tell the listener socket to pass those flags onwards, and -would fix the race. - -Before, strace: - -recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}], - msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET, - cmsg_type=SCM_CREDENTIALS{pid=0, uid=65534, gid=65534}}, - msg_flags=0}, 0) = 5 - -After, strace: - -recvmsg(4, {msg_name(0)=NULL, msg_iov(1)=[{"blub\n", 4096}], - msg_controllen=32, {cmsg_len=28, cmsg_level=SOL_SOCKET, - cmsg_type=SCM_CREDENTIALS{pid=11580, uid=1000, gid=1000}}, - msg_flags=0}, 0) = 5 - -Signed-off-by: Daniel Borkmann -Cc: Eric Dumazet -Cc: Eric W. Biederman -Signed-off-by: David S. Miller ---- - net/unix/af_unix.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c -index c4ce243..e64bbcf 100644 ---- a/net/unix/af_unix.c -+++ b/net/unix/af_unix.c -@@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) - return 0; - } - -+static void unix_sock_inherit_flags(const struct socket *old, -+ struct socket *new) -+{ -+ if (test_bit(SOCK_PASSCRED, &old->flags)) -+ set_bit(SOCK_PASSCRED, &new->flags); -+ if (test_bit(SOCK_PASSSEC, &old->flags)) -+ set_bit(SOCK_PASSSEC, &new->flags); -+} -+ - static int unix_accept(struct socket *sock, struct socket *newsock, int flags) - { - struct sock *sk = sock->sk; -@@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) - /* attach accepted sock to socket */ - unix_state_lock(tsk); - newsock->state = SS_CONNECTED; -+ unix_sock_inherit_flags(sock, newsock); - sock_graft(tsk, newsock); - unix_state_unlock(tsk); - return 0; --- -1.7.11.7 - - -From 7b48750febb4c3387db39fd0b547936c53ba7364 Mon Sep 17 00:00:00 2001 -From: Seif Mazareeb -Date: Thu, 17 Oct 2013 20:33:21 -0700 -Subject: [PATCH 41/47] net: fix cipso packet validation when !NETLABEL - -[ Upstream commit f2e5ddcc0d12f9c4c7b254358ad245c9dddce13b ] - -When CONFIG_NETLABEL is disabled, the cipso_v4_validate() function could loop -forever in the main loop if opt[opt_iter +1] == 0, this will causing a kernel -crash in an SMP system, since the CPU executing this function will -stall /not respond to IPIs. - -This problem can be reproduced by running the IP Stack Integrity Checker -(http://isic.sourceforge.net) using the following command on a Linux machine -connected to DUT: - -"icmpsic -s rand -d -r 123456" -wait (1-2 min) - -Signed-off-by: Seif Mazareeb -Acked-by: Paul Moore -Signed-off-by: David S. Miller ---- - include/net/cipso_ipv4.h | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h -index a7a683e..a8c2ef6 100644 ---- a/include/net/cipso_ipv4.h -+++ b/include/net/cipso_ipv4.h -@@ -290,6 +290,7 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, - unsigned char err_offset = 0; - u8 opt_len = opt[1]; - u8 opt_iter; -+ u8 tag_len; - - if (opt_len < 8) { - err_offset = 1; -@@ -302,11 +303,12 @@ static inline int cipso_v4_validate(const struct sk_buff *skb, - } - - for (opt_iter = 6; opt_iter < opt_len;) { -- if (opt[opt_iter + 1] > (opt_len - opt_iter)) { -+ tag_len = opt[opt_iter + 1]; -+ if ((tag_len == 0) || (opt[opt_iter + 1] > (opt_len - opt_iter))) { - err_offset = opt_iter + 1; - goto out; - } -- opt_iter += opt[opt_iter + 1]; -+ opt_iter += tag_len; - } - - out: --- -1.7.11.7 - - -From 27e33640a8905b1aeefe9998242551caf24e84a6 Mon Sep 17 00:00:00 2001 -From: Hannes Frederic Sowa -Date: Tue, 22 Oct 2013 00:07:47 +0200 -Subject: [PATCH 42/47] inet: fix possible memory corruption with UDP_CORK and - UFO - -[ This is a simplified -stable version of a set of upstream commits. ] - -This is a replacement patch only for stable which does fix the problems -handled by the following two commits in -net: - -"ip_output: do skb ufo init for peeked non ufo skb as well" (e93b7d748be887cd7639b113ba7d7ef792a7efb9) -"ip6_output: do skb ufo init for peeked non ufo skb as well" (c547dbf55d5f8cf615ccc0e7265e98db27d3fb8b) - -Three frames are written on a corked udp socket for which the output -netdevice has UFO enabled. If the first and third frame are smaller than -the mtu and the second one is bigger, we enqueue the second frame with -skb_append_datato_frags without initializing the gso fields. This leads -to the third frame appended regulary and thus constructing an invalid skb. - -This fixes the problem by always using skb_append_datato_frags as soon -as the first frag got enqueued to the skb without marking the packet -as SKB_GSO_UDP. - -The problem with only two frames for ipv6 was fixed by "ipv6: udp -packets following an UFO enqueued packet need also be handled by UFO" -(2811ebac2521ceac84f2bdae402455baa6a7fb47). - -Cc: Jiri Pirko -Cc: Eric Dumazet -Cc: David Miller -Signed-off-by: Hannes Frederic Sowa ---- - include/linux/skbuff.h | 5 +++++ - net/ipv4/ip_output.c | 2 +- - net/ipv6/ip6_output.c | 2 +- - 3 files changed, 7 insertions(+), 2 deletions(-) - -diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h -index 3b71a4e..6bd165b 100644 ---- a/include/linux/skbuff.h -+++ b/include/linux/skbuff.h -@@ -1316,6 +1316,11 @@ static inline int skb_pagelen(const struct sk_buff *skb) - return len + skb_headlen(skb); - } - -+static inline bool skb_has_frags(const struct sk_buff *skb) -+{ -+ return skb_shinfo(skb)->nr_frags; -+} -+ - /** - * __skb_fill_page_desc - initialise a paged fragment in an skb - * @skb: buffer containing fragment to be initialised -diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c -index a04d872..7f4ab5d 100644 ---- a/net/ipv4/ip_output.c -+++ b/net/ipv4/ip_output.c -@@ -836,7 +836,7 @@ static int __ip_append_data(struct sock *sk, - csummode = CHECKSUM_PARTIAL; - - cork->length += length; -- if (((length > mtu) || (skb && skb_is_gso(skb))) && -+ if (((length > mtu) || (skb && skb_has_frags(skb))) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { - err = ip_ufo_append_data(sk, queue, getfrag, from, length, -diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c -index 44df1c9..2e542d0 100644 ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -1252,7 +1252,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, - skb = skb_peek_tail(&sk->sk_write_queue); - cork->length += length; - if (((length > mtu) || -- (skb && skb_is_gso(skb))) && -+ (skb && skb_has_frags(skb))) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip6_ufo_append_data(sk, getfrag, from, length, --- -1.7.11.7 - - -From 689f77d13532698739438b2288ec8eac2f667584 Mon Sep 17 00:00:00 2001 -From: Julian Anastasov -Date: Sun, 20 Oct 2013 15:43:03 +0300 -Subject: [PATCH 43/47] ipv6: always prefer rt6i_gateway if present - -[ Upstream commit 96dc809514fb2328605198a0602b67554d8cce7b ] - -In v3.9 6fd6ce2056de2709 ("ipv6: Do not depend on rt->n in -ip6_finish_output2()." changed the behaviour of ip6_finish_output2() -such that the recently introduced rt6_nexthop() is used -instead of an assigned neighbor. - -As rt6_nexthop() prefers rt6i_gateway only for gatewayed -routes this causes a problem for users like IPVS, xt_TEE and -RAW(hdrincl) if they want to use different address for routing -compared to the destination address. - -Another case is when redirect can create RTF_DYNAMIC -route without RTF_GATEWAY flag, we ignore the rt6i_gateway -in rt6_nexthop(). - -Fix the above problems by considering the rt6i_gateway if -present, so that traffic routed to address on local subnet is -not wrongly diverted to the destination address. - -Thanks to Simon Horman and Phil Oester for spotting the -problematic commit. - -Thanks to Hannes Frederic Sowa for his review and help in testing. - -Reported-by: Phil Oester -Reported-by: Mark Brooks -Signed-off-by: Julian Anastasov -Acked-by: Hannes Frederic Sowa -Signed-off-by: David S. Miller ---- - include/net/ip6_route.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h -index f667248..0aaf0ec 100644 ---- a/include/net/ip6_route.h -+++ b/include/net/ip6_route.h -@@ -198,7 +198,7 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) - - static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) - { -- if (rt->rt6i_flags & RTF_GATEWAY) -+ if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) - return &rt->rt6i_gateway; - return dest; - } --- -1.7.11.7 - - -From 471dd605429d6645f990becd29c877740d3b32e7 Mon Sep 17 00:00:00 2001 -From: Julian Anastasov -Date: Sun, 20 Oct 2013 15:43:04 +0300 -Subject: [PATCH 44/47] ipv6: fill rt6i_gateway with nexthop address - -[ Upstream commit 550bab42f83308c9d6ab04a980cc4333cef1c8fa ] - -Make sure rt6i_gateway contains nexthop information in -all routes returned from lookup or when routes are directly -attached to skb for generated ICMP packets. - -The effect of this patch should be a faster version of -rt6_nexthop() and the consideration of local addresses as -nexthop. - -Signed-off-by: Julian Anastasov -Acked-by: Hannes Frederic Sowa -Signed-off-by: David S. Miller ---- - include/net/ip6_route.h | 6 ++---- - net/ipv6/ip6_output.c | 4 ++-- - net/ipv6/route.c | 8 ++++++-- - 3 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h -index 0aaf0ec..c7b8860 100644 ---- a/include/net/ip6_route.h -+++ b/include/net/ip6_route.h -@@ -196,11 +196,9 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); - } - --static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, struct in6_addr *dest) -+static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt) - { -- if (rt->rt6i_flags & RTF_GATEWAY || !ipv6_addr_any(&rt->rt6i_gateway)) -- return &rt->rt6i_gateway; -- return dest; -+ return &rt->rt6i_gateway; - } - - #endif -diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c -index 2e542d0..5b25f85 100644 ---- a/net/ipv6/ip6_output.c -+++ b/net/ipv6/ip6_output.c -@@ -130,7 +130,7 @@ static int ip6_finish_output2(struct sk_buff *skb) - } - - rcu_read_lock_bh(); -- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); -+ nexthop = rt6_nexthop((struct rt6_info *)dst); - neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); -@@ -899,7 +899,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, - */ - rt = (struct rt6_info *) *dst; - rcu_read_lock_bh(); -- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); -+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); - err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; - rcu_read_unlock_bh(); - -diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index 8d9a93ed..08e6c40 100644 ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -852,7 +852,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, - if (ort->rt6i_dst.plen != 128 && - ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) - rt->rt6i_flags |= RTF_ANYCAST; -- rt->rt6i_gateway = *daddr; - } - - rt->rt6i_flags |= RTF_CACHE; -@@ -1270,6 +1269,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, - rt->dst.flags |= DST_HOST; - rt->dst.output = ip6_output; - atomic_set(&rt->dst.__refcnt, 1); -+ rt->rt6i_gateway = fl6->daddr; - rt->rt6i_dst.addr = fl6->daddr; - rt->rt6i_dst.plen = 128; - rt->rt6i_idev = idev; -@@ -1824,7 +1824,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - -- rt->rt6i_gateway = ort->rt6i_gateway; -+ if (ort->rt6i_flags & RTF_GATEWAY) -+ rt->rt6i_gateway = ort->rt6i_gateway; -+ else -+ rt->rt6i_gateway = *dest; - rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) -@@ -2111,6 +2114,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - else - rt->rt6i_flags |= RTF_LOCAL; - -+ rt->rt6i_gateway = *addr; - rt->rt6i_dst.addr = *addr; - rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); --- -1.7.11.7 - - -From d01c3be45be54261f56ba63197d94e3d756befdf Mon Sep 17 00:00:00 2001 -From: Julian Anastasov -Date: Sun, 20 Oct 2013 15:43:05 +0300 -Subject: [PATCH 45/47] netfilter: nf_conntrack: fix rt6i_gateway checks for - H.323 helper - -[ Upstream commit 56e42441ed54b092d6c7411138ce60d049e7c731 ] - -Now when rt6_nexthop() can return nexthop address we can use it -for proper nexthop comparison of directly connected destinations. -For more information refer to commit bbb5823cf742a7 -("netfilter: nf_conntrack: fix rt_gateway checks for H.323 helper"). - -Signed-off-by: Julian Anastasov -Acked-by: Hannes Frederic Sowa -Signed-off-by: David S. Miller ---- - net/netfilter/nf_conntrack_h323_main.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c -index bdebd03..70866d1 100644 ---- a/net/netfilter/nf_conntrack_h323_main.c -+++ b/net/netfilter/nf_conntrack_h323_main.c -@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src, - flowi6_to_flowi(&fl1), false)) { - if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, - flowi6_to_flowi(&fl2), false)) { -- if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, -- sizeof(rt1->rt6i_gateway)) && -+ if (ipv6_addr_equal(rt6_nexthop(rt1), -+ rt6_nexthop(rt2)) && - rt1->dst.dev == rt2->dst.dev) - ret = 1; - dst_release(&rt2->dst); --- -1.7.11.7 - - -From 1d98ddb501bedeee62c916d3d6999109f0a22198 Mon Sep 17 00:00:00 2001 -From: Hannes Frederic Sowa -Date: Mon, 21 Oct 2013 06:17:15 +0200 -Subject: [PATCH 46/47] ipv6: probe routes asynchronous in rt6_probe - -[ Upstream commit c2f17e827b419918c856131f592df9521e1a38e3 ] - -Routes need to be probed asynchronous otherwise the call stack gets -exhausted when the kernel attemps to deliver another skb inline, like -e.g. xt_TEE does, and we probe at the same time. - -We update neigh->updated still at once, otherwise we would send to -many probes. - -Cc: Julian Anastasov -Signed-off-by: Hannes Frederic Sowa -Signed-off-by: David S. Miller ---- - net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++------- - 1 file changed, 31 insertions(+), 7 deletions(-) - -diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index 08e6c40..1e32d5c 100644 ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -477,6 +477,24 @@ out: - } - - #ifdef CONFIG_IPV6_ROUTER_PREF -+struct __rt6_probe_work { -+ struct work_struct work; -+ struct in6_addr target; -+ struct net_device *dev; -+}; -+ -+static void rt6_probe_deferred(struct work_struct *w) -+{ -+ struct in6_addr mcaddr; -+ struct __rt6_probe_work *work = -+ container_of(w, struct __rt6_probe_work, work); -+ -+ addrconf_addr_solict_mult(&work->target, &mcaddr); -+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); -+ dev_put(work->dev); -+ kfree(w); -+} -+ - static void rt6_probe(struct rt6_info *rt) - { - struct neighbour *neigh; -@@ -500,17 +518,23 @@ static void rt6_probe(struct rt6_info *rt) - - if (!neigh || - time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { -- struct in6_addr mcaddr; -- struct in6_addr *target; -+ struct __rt6_probe_work *work; -+ -+ work = kmalloc(sizeof(*work), GFP_ATOMIC); - -- if (neigh) { -+ if (neigh && work) - neigh->updated = jiffies; -+ -+ if (neigh) - write_unlock(&neigh->lock); -- } - -- target = (struct in6_addr *)&rt->rt6i_gateway; -- addrconf_addr_solict_mult(target, &mcaddr); -- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); -+ if (work) { -+ INIT_WORK(&work->work, rt6_probe_deferred); -+ work->target = rt->rt6i_gateway; -+ dev_hold(rt->dst.dev); -+ work->dev = rt->dst.dev; -+ schedule_work(&work->work); -+ } - } else { - out: - write_unlock(&neigh->lock); --- -1.7.11.7 - - -From d7710f5e65b37ec3ac09dde758141e81fa47315d Mon Sep 17 00:00:00 2001 -From: Mariusz Ceier -Date: Mon, 21 Oct 2013 19:45:04 +0200 -Subject: [PATCH 47/47] davinci_emac.c: Fix IFF_ALLMULTI setup - -[ Upstream commit d69e0f7ea95fef8059251325a79c004bac01f018 ] - -When IFF_ALLMULTI flag is set on interface and IFF_PROMISC isn't, -emac_dev_mcast_set should only enable RX of multicasts and reset -MACHASH registers. - -It does this, but afterwards it either sets up multicast MACs -filtering or disables RX of multicasts and resets MACHASH registers -again, rendering IFF_ALLMULTI flag useless. - -This patch fixes emac_dev_mcast_set, so that multicast MACs filtering and -disabling of RX of multicasts are skipped when IFF_ALLMULTI flag is set. - -Tested with kernel 2.6.37. - -Signed-off-by: Mariusz Ceier -Acked-by: Mugunthan V N -Signed-off-by: David S. Miller ---- - drivers/net/ethernet/ti/davinci_emac.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c -index 1a222bce..45c167f 100644 ---- a/drivers/net/ethernet/ti/davinci_emac.c -+++ b/drivers/net/ethernet/ti/davinci_emac.c -@@ -876,8 +876,7 @@ static void emac_dev_mcast_set(struct net_device *ndev) - netdev_mc_count(ndev) > EMAC_DEF_MAX_MULTICAST_ADDRESSES) { - mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST); - emac_add_mcast(priv, EMAC_ALL_MULTI_SET, NULL); -- } -- if (!netdev_mc_empty(ndev)) { -+ } else if (!netdev_mc_empty(ndev)) { - struct netdev_hw_addr *ha; - - mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST); --- -1.7.11.7 - diff --git a/sources b/sources index 91571da..585450e 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ fea363551ff45fbe4cb88497b863b261 linux-3.11.tar.xz -c44ebb225fe9956b636b79ab6b61aa42 patch-3.11.6.xz +42361474ed56948d8f52e72958b2cdf0 patch-3.11.7.xz