/* -*- c-file-style: "java"; indent-tabs-mode: nil -*- * * corked_demo.c -- Demonstrate Linux 2.2 bug relating to TCP_CORKED sockets. * * Written 2002 by Martin Pool * * Build this, run it as root with something like "sudo ./corked_demo * SOMEHOST 9" to write to the discard port. (It needs root access to * set SO_DEBUG.) The other machine must be running the discard * service to accept the connection and data. * * Basically all it does is open a corked connection, and then drop it * while there is (possibly) data in the SendQ. The socket gets * "stuck" in FIN_WAIT1 and doesn't seem to be able to flush the last * bit of data. * * If you have an affected version of the kernel, most times this is * run run you will get a socket stuck in FIN_WAIT1 state. It looks * like this: * * tcp 0 3201 maudlin:1048 maudlin:discard FIN_WAIT1 root 0 - off (0.00/0/0) * * This happens in 2.2.16, .18, and .21. * * It seems to me that this *has* to be incorrect, because there is * data waiting to go out, but no timer running. The socket stays * stuck, chewing up kernel memory forever. * * Running a hundred iterations gives 36 stuck in this state. * * On the server the situation is almost as bad: the sockets end up in * ESTABLISHED state, but they'll never recieve more data. Presumably * they'll hang around until the server gives up and terminates, or * until the TCP 2-hour timeout elapses. * * Sometimes killing off the server makes the FIN_WAIT1 sockets go * away on the client, but it is not reliable. However, neither side * seems to time out of its own accord -- I left the two machines * sitting overnight and all the sockets were still * FIN_WAIT1/ESTABLISHED in the morning. * * tcpdump shows that the FIN is not sent when the client program * closes the socket. However, when the server program is killed, its * FIN gets things flowing again. * * I think that on the system where this was originally seen, both the * client and the server used corks, and so killing the server program * and closing its socket didn't send a FIN, and therefore things * stayed jammed indefinitely. * * Since this can be provoked with local unprivileged access, and * since the sockets apparently can't be cleared up without a reboot, * it could be considered a kind of resource exhaustion attack. If it * happens inadvertently, it can cause problems on the server by * causing the remote machine to hang until it is killed off. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /** * Open a socket to a tcp remote host with the specified port. * * The socket is (if appropriate) corked on return, so that the third * handshake should be sent containing useful data. * * Stolen from rsync via distcc. * * @todo Don't try for too long to connect. **/ int open_socket_out(const char *host, int port, int *p_fd) { int type = SOCK_STREAM; struct sockaddr_in sock_out; int fd; struct hostent *hp; fd = socket(PF_INET, type, 0); if (fd == -1) { printf("failed to create socket: %s\n", strerror(errno)); exit(1); } hp = gethostbyname(host); if (!hp) { fprintf(stderr, "unknown host: \"%s\"\n", host); (void) close(fd); exit(1); } memcpy(&sock_out.sin_addr, hp->h_addr, (size_t) hp->h_length); sock_out.sin_port = htons(port); sock_out.sin_family = PF_INET; if (connect(fd, (struct sockaddr *) &sock_out, (int) sizeof(sock_out))) { fprintf(stderr, "failed to connect to %s port %d: %s\n", host, port, strerror(errno)); (void) close(fd); exit(1); } printf("client got connection to %s port %d on fd%d\n", host, port, fd); *p_fd = fd; return 0; } /** * Stick a TCP cork in the socket. **/ int tcp_cork_sock(int fd, int corked) { if (setsockopt(fd, SOL_TCP, TCP_CORK, &corked, sizeof corked) == -1) { fprintf(stderr, "setsockopt(corked=%d) failed: %s\n", corked, strerror(errno)); exit(1); } printf("%scorked fd%d\n", corked ? "" : "un", fd); return 0; } int debug_sock(int fd, int debug_on) { if (setsockopt(fd, SOL_SOCKET, SO_DEBUG, &debug_on, sizeof debug_on) == -1) { fprintf(stderr, "setsockopt(debug=%d) failed: %s\n", debug_on, strerror(errno)); exit(1); } printf("%sdebug fd%d\n", debug_on ? "" : "un", fd); return 0; } int dcc_writex(int fd, const void *buf, size_t len) { ssize_t r; while (len > 0) { r = write(fd, buf, len); if (r == -1) { fprintf(stderr, "failed to write: %s\n", strerror(errno)); return -1; } else if (r == 0) { fprintf(stderr, "unexpected eof on fd%d\n", fd); return -1; } else { buf = &((char *) buf)[r]; len -= r; } } return 0; } int send_junk(int fd) { static char trash[100000]; return dcc_writex(fd, trash, sizeof trash); } int main(int argc, char **argv) { int fd; if (argc != 3) { fprintf(stderr, "usage: corked_demo HOST NUMERICPORT\n"); return 1; } open_socket_out(argv[1], atoi(argv[2]), &fd); debug_sock(fd, 1); tcp_cork_sock(fd, 1); send_junk(fd); return 0; }