Skip to content

Commit ee99528

Browse files
xemuldavem330
authored andcommitted
tcp: Initial repair mode
This includes (according the the previous description): * TCP_REPAIR sockoption This one just puts the socket in/out of the repair mode. Allowed for CAP_NET_ADMIN and for closed/establised sockets only. When repair mode is turned off and the socket happens to be in the established state the window probe is sent to the peer to 'unlock' the connection. * TCP_REPAIR_QUEUE sockoption This one sets the queue which we're about to repair. The 'no-queue' is set by default. * TCP_QUEUE_SEQ socoption Sets the write_seq/rcv_nxt of a selected repaired queue. Allowed for TCP_CLOSE-d sockets only. When the socket changes its state the other seq-s are changed by the kernel according to the protocol rules (most of the existing code is actually reused). * Ability to forcibly bind a socket to a port The sk->sk_reuse is set to SK_FORCE_REUSE. * Immediate connect modification The connect syscall initializes the connection, then directly jumps to the code which finalizes it. * Silent close modification The close just aborts the connection (similar to SO_LINGER with 0 time) but without sending any FIN/RST-s to peer. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 370816a commit ee99528

File tree

5 files changed

+111
-8
lines changed

5 files changed

+111
-8
lines changed

include/linux/tcp.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ enum {
106106
#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/
107107
#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
108108
#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */
109+
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
110+
#define TCP_REPAIR_QUEUE 20
111+
#define TCP_QUEUE_SEQ 21
112+
113+
enum {
114+
TCP_NO_QUEUE,
115+
TCP_RECV_QUEUE,
116+
TCP_SEND_QUEUE,
117+
TCP_QUEUES_NR,
118+
};
109119

110120
/* for TCP_INFO socket option */
111121
#define TCPI_OPT_TIMESTAMPS 1
@@ -353,7 +363,9 @@ struct tcp_sock {
353363
u8 nonagle : 4,/* Disable Nagle algorithm? */
354364
thin_lto : 1,/* Use linear timeouts for thin streams */
355365
thin_dupack : 1,/* Fast retransmit on first dupack */
356-
unused : 2;
366+
repair : 1,
367+
unused : 1;
368+
u8 repair_queue;
357369

358370
/* RTT measurement */
359371
u32 srtt; /* smoothed round trip time << 3 */

include/net/tcp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,8 @@ static inline u32 tcp_receive_window(const struct tcp_sock *tp)
612612
*/
613613
extern u32 __tcp_select_window(struct sock *sk);
614614

615+
void tcp_send_window_probe(struct sock *sk);
616+
615617
/* TCP timestamps are only 32-bits, this causes a slight
616618
* complication on 64-bit systems since we store a snapshot
617619
* of jiffies in the buffer control blocks below. We decided

net/ipv4/tcp.c

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1935,7 +1935,9 @@ void tcp_close(struct sock *sk, long timeout)
19351935
* advertise a zero window, then kill -9 the FTP client, wheee...
19361936
* Note: timeout is always zero in such a case.
19371937
*/
1938-
if (data_was_unread) {
1938+
if (unlikely(tcp_sk(sk)->repair)) {
1939+
sk->sk_prot->disconnect(sk, 0);
1940+
} else if (data_was_unread) {
19391941
/* Unread data was tossed, zap the connection. */
19401942
NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
19411943
tcp_set_state(sk, TCP_CLOSE);
@@ -2074,6 +2076,8 @@ int tcp_disconnect(struct sock *sk, int flags)
20742076
/* ABORT function of RFC793 */
20752077
if (old_state == TCP_LISTEN) {
20762078
inet_csk_listen_stop(sk);
2079+
} else if (unlikely(tp->repair)) {
2080+
sk->sk_err = ECONNABORTED;
20772081
} else if (tcp_need_reset(old_state) ||
20782082
(tp->snd_nxt != tp->write_seq &&
20792083
(1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
@@ -2125,6 +2129,12 @@ int tcp_disconnect(struct sock *sk, int flags)
21252129
}
21262130
EXPORT_SYMBOL(tcp_disconnect);
21272131

2132+
static inline int tcp_can_repair_sock(struct sock *sk)
2133+
{
2134+
return capable(CAP_NET_ADMIN) &&
2135+
((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
2136+
}
2137+
21282138
/*
21292139
* Socket option code for TCP.
21302140
*/
@@ -2297,6 +2307,42 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
22972307
tp->thin_dupack = val;
22982308
break;
22992309

2310+
case TCP_REPAIR:
2311+
if (!tcp_can_repair_sock(sk))
2312+
err = -EPERM;
2313+
else if (val == 1) {
2314+
tp->repair = 1;
2315+
sk->sk_reuse = SK_FORCE_REUSE;
2316+
tp->repair_queue = TCP_NO_QUEUE;
2317+
} else if (val == 0) {
2318+
tp->repair = 0;
2319+
sk->sk_reuse = SK_NO_REUSE;
2320+
tcp_send_window_probe(sk);
2321+
} else
2322+
err = -EINVAL;
2323+
2324+
break;
2325+
2326+
case TCP_REPAIR_QUEUE:
2327+
if (!tp->repair)
2328+
err = -EPERM;
2329+
else if (val < TCP_QUEUES_NR)
2330+
tp->repair_queue = val;
2331+
else
2332+
err = -EINVAL;
2333+
break;
2334+
2335+
case TCP_QUEUE_SEQ:
2336+
if (sk->sk_state != TCP_CLOSE)
2337+
err = -EPERM;
2338+
else if (tp->repair_queue == TCP_SEND_QUEUE)
2339+
tp->write_seq = val;
2340+
else if (tp->repair_queue == TCP_RECV_QUEUE)
2341+
tp->rcv_nxt = val;
2342+
else
2343+
err = -EINVAL;
2344+
break;
2345+
23002346
case TCP_CORK:
23012347
/* When set indicates to always queue non-full frames.
23022348
* Later the user clears this option and we transmit
@@ -2632,6 +2678,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
26322678
val = tp->thin_dupack;
26332679
break;
26342680

2681+
case TCP_REPAIR:
2682+
val = tp->repair;
2683+
break;
2684+
2685+
case TCP_REPAIR_QUEUE:
2686+
if (tp->repair)
2687+
val = tp->repair_queue;
2688+
else
2689+
return -EINVAL;
2690+
break;
2691+
2692+
case TCP_QUEUE_SEQ:
2693+
if (tp->repair_queue == TCP_SEND_QUEUE)
2694+
val = tp->write_seq;
2695+
else if (tp->repair_queue == TCP_RECV_QUEUE)
2696+
val = tp->rcv_nxt;
2697+
else
2698+
return -EINVAL;
2699+
break;
2700+
26352701
case TCP_USER_TIMEOUT:
26362702
val = jiffies_to_msecs(icsk->icsk_user_timeout);
26372703
break;

net/ipv4/tcp_ipv4.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,14 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
138138
}
139139
EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140140

141+
static int tcp_repair_connect(struct sock *sk)
142+
{
143+
tcp_connect_init(sk);
144+
tcp_finish_connect(sk, NULL);
145+
146+
return 0;
147+
}
148+
141149
/* This will initiate an outgoing connection. */
142150
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143151
{
@@ -196,7 +204,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
196204
/* Reset inherited state */
197205
tp->rx_opt.ts_recent = 0;
198206
tp->rx_opt.ts_recent_stamp = 0;
199-
tp->write_seq = 0;
207+
if (likely(!tp->repair))
208+
tp->write_seq = 0;
200209
}
201210

202211
if (tcp_death_row.sysctl_tw_recycle &&
@@ -247,15 +256,19 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
247256
sk->sk_gso_type = SKB_GSO_TCPV4;
248257
sk_setup_caps(sk, &rt->dst);
249258

250-
if (!tp->write_seq)
259+
if (!tp->write_seq && likely(!tp->repair))
251260
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
252261
inet->inet_daddr,
253262
inet->inet_sport,
254263
usin->sin_port);
255264

256265
inet->inet_id = tp->write_seq ^ jiffies;
257266

258-
err = tcp_connect(sk);
267+
if (likely(!tp->repair))
268+
err = tcp_connect(sk);
269+
else
270+
err = tcp_repair_connect(sk);
271+
259272
rt = NULL;
260273
if (err)
261274
goto failure;

net/ipv4/tcp_output.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2617,9 +2617,11 @@ void tcp_connect_init(struct sock *sk)
26172617
tp->snd_sml = tp->write_seq;
26182618
tp->snd_up = tp->write_seq;
26192619
tp->snd_nxt = tp->write_seq;
2620-
tp->rcv_nxt = 0;
2621-
tp->rcv_wup = 0;
2622-
tp->copied_seq = 0;
2620+
2621+
if (likely(!tp->repair))
2622+
tp->rcv_nxt = 0;
2623+
tp->rcv_wup = tp->rcv_nxt;
2624+
tp->copied_seq = tp->rcv_nxt;
26232625

26242626
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
26252627
inet_csk(sk)->icsk_retransmits = 0;
@@ -2790,6 +2792,14 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
27902792
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
27912793
}
27922794

2795+
void tcp_send_window_probe(struct sock *sk)
2796+
{
2797+
if (sk->sk_state == TCP_ESTABLISHED) {
2798+
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
2799+
tcp_xmit_probe_skb(sk, 0);
2800+
}
2801+
}
2802+
27932803
/* Initiate keepalive or window probe from timer. */
27942804
int tcp_write_wakeup(struct sock *sk)
27952805
{

0 commit comments

Comments
 (0)