linux 手册记录-select poll epoll-CSDN博客

本文链接：https://blog.csdn.net/pzqzq/article/details/129021105

本文介绍了Linux系统中的I/O多路复用机制，包括select、poll和epoll三种方法，用于高效处理多个文件描述符的读写事件。select和poll有限制的最大监控数，而epoll具有更好的扩展性，支持边沿触发和水平触发模式。epoll还提供了更细粒度的控制，如EPOLLONESHOT和EPOLLET标志。这些机制常用于服务器程序，如nginx，以及Android系统中。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

I/O 多路复用机制提供订阅多个文件(socket pipe eventfd等)不同 I/O事件的机制, 事件分发由内核统一调度,用户程序处于观察者模式, 能实时高效的消费多个 I/O 事件.

select/pselect: 允许程序监控多个文件描述符，等待一个或多个文件描述符可读/可写（非阻塞条件）就绪事件. 最大监控数由FD_SETSIZE 控制.

pol/ppoll: 功能与select 一样, 但是没有最大监控数的限制

epoll: 功能与poll一样，但是 epoll API 可以用作edge-triggered(ET)/level-triggered(LT) 的接口，并且可以很好地扩展监视大量的文件描述符，使用LT模式监视的 fd 效果与poll等价, 最大监控文件由 /proc/sys/fs/epoll/max_user_watches 文件控制

广泛使用, Android 大量使用多路复用机制(lopper init zygote等), 服务程序(ngxin)也是基于复用机制

select API:

#include <sys/select.h>

#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>

//用于设置 fd 事件
void FD_SET(int fd, fd_set *set);
void FD_ZERO(fd_set *set);
void FD_CLR(int fd, fd_set *set);

//select 返回判断 fd 是否就绪在 readfds/writefds/exceptfds 中
int  FD_ISSET(int fd, fd_set *set);

/*
* 监听参数 readfsd/writefds/exceptfds 订阅的事件列表.
* nfds:   readfds/writefds/exceptfds  中最大的 ******* fd + 1 *************
* • readfds: 若要监控文件的可读事件, 则通过fd_set(fd, &readfds) 设置到此参数集, 若不监控此种事件则传入 NULL
* writefds: 若要监控文件的可写事件, 则通过fd_set(fd, &writefds) 设置到此参数集, 若不监控此种事件则传入 NULL
* exceptfds: 若要监控文件的异常情况, 则通过fd_set(fd, &exceptfds) 设置到此参数集, 若不监控此种事件则传入 NULL
* timeout: 设置 select 等待事件触发的阻塞时间, 若 tv_sec = tv_usec = 0 则 select 不会阻塞, 传入 NULL表示永久阻塞直到信号触发或则事件触发
* sigmask: 更改当前线程阻塞信号的掩码, 执行 select 逻辑, 恢复执行前的掩码, 若不更改掩码则传入 NULL
* return: 成功返回就绪 fd 的个数。 超时返回0。 错误返回-1, 错误详情通过 errno 和 strerror(errno) 获取,  参数 readfds/writefds/exceptfds 不会改变, timeout参数未定义,若再次使用请重新初始化
*/
int select(int nfds, fd_set *readfds, fd_set *writefds,
          fd_set *exceptfds, struct timeval *timeout);
int pselect(int nfds, fd_set *readfds, fd_set *writefds,
           fd_set *exceptfds, const struct timespec *timeout,
           const sigset_t *sigmask);

注意:

每当 select 阻塞被事件唤醒后，readfds/writefds/exceptfds 参数集将被内核改变为哪些 fd 状态就绪，所以再次调用 select 前请重新初始化上述参数集

select 将阻塞线程至: 1.监控的事件集有事件触发 2.超过设置的阻塞时间 3.信号触发

用例:

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/select.h>

int main(int argc, char *argv[]) {
   fd_set rfds;
   struct timeval tv;
   int retval;

   /* Watch stdin (fd 0) to see when it has input. */

   FD_ZERO(&rfds);
   FD_SET(fileno(stdin), &rfds);

   /* Wait up to five seconds. */

   tv.tv_sec = 5;
   tv.tv_usec = 0;

   retval = select(1, &rfds, NULL, NULL, &tv);
   /* Don't rely on the value of tv now! */

   if (retval == -1)
       printf("select() 失败 %d %s", errno, strerror(errno));
   else if (retval) {
       printf("数据可读.\n");
       if (FD_ISSET(fileno(stdin), &rfds)) {
           printf("stdin 数据可读.\n");
       }
   } else
       printf("超时返回.\n");

   exit(EXIT_SUCCESS);
}

poll API

#include <poll.h>

/* 监听参数 fds 订阅的事件列表.
* fds:   struct pollfd 数组, 初始化 fd events 字段,监控需要的事件 
* nfds: fds 数组个数
* timeout: 设置 poll 等待事件触发的阻塞时间, 毫秒级别
* return: 成功返回 就绪/错误情况 fd 的个数, 超时返回0, 错误返回-1, 错误详情通过 errno 和 strerror(errno) 获取
*/
int poll(struct pollfd *fds, nfds_t nfds, int timeout);

#define _GNU_SOURCE         /* See feature_test_macros(7) */
#include <signal.h>
#include <poll.h>

/* 监听参数 fds 订阅的事件列表, 原子的 设置和恢复 线程阻塞信号掩码.
* ... 同上 poll... 
* sigmask: 更改当前线程阻塞信号的掩码, 执行 poll 逻辑, 恢复执行前的掩码, 若不更改掩码则传入 NULL
*/
int ppoll(struct pollfd *fds, nfds_t nfds,
       const struct timespec *tmo_p, const sigset_t *sigmask);

struct pollfd {
   int   fd;         /* 输入型参数, 需要监控的 fd, 若为负数则忽略下面 events 字段,revents被置为0 */
   short events;     /* 输入型参数, 需要监控该 fd 的 以下可用事件, 若为0 revents字段只会返回POLLHUP, POLLERR, and POLLNVAL  */
   short revents;    /* 输出型参数, 若 fd 由 events 事件产生, 则内核更改此字段为真实所产生的事件,  */
};

POLLRDNORM/POLLIN: 有数据可读
POLLPRI: fd 状态异常:1.TCP 套接字上有 out-of-band 数据 2.数据包模式下的伪终端主机已经看到从机上的状态变化 3.cgroup.events 文件已被修改
POLLWRNORM/POLLOUT: fd 可写入数据, 写入的数据如果超过了 socket/pipe 可用空间大小同样会阻塞写操作(O_NONBLOCK 标志可避免此情况)
POLLERR: 错误情况(only returned in revents; ignored in events) , 若 fd 为pipe类型, 当 pipe 读端关闭时, 向写端写入数据会触发此种情况
POLLHUP: (only returned in revents; ignored in events) , 当从 pipe 或 stream socket 等通道读取时，此事件仅表示对端关闭了通道. 只有在通道中所有未完成的数据都被消耗后，从通道的后续读取才会返回 0（EOF）
POLLNVAL: 无效设置: fd 未打开 (only returned in revents; ignored in events) 
POLLRDBAND: Priority band data 可读
POLLWRBAND: Priority band data 可写

用例:

#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <poll.h>

int main(int argc, char *argv[]) {
   struct pollfd fds[1];
   int timeout;
   int retval;

   //订阅 stdin 可读事件
   fds[0].fd = fileno(stdin);
   fds[0].events = POLLIN; 

   //等待 5s
   timeout = 5000;

   retval = poll(fds, 1, timeout);
   if (retval == -1)
       fprintf(stderr, "poll() 失败 %d %s", errno, strerror(errno));
   else if (retval) {
       printf("数据可读.\n");
       for (int i = 0; i < retval; i++) {
           if (fds[i].revents & POLLIN) {
               if (fds[i].fd == fileno(stdin)) {
                    printf("stdin数据可读.\n");
                }
           }
       }
   } else
       printf("超时返回.\n");
   exit(EXIT_SUCCESS);
}

epoll API

#include <sys/epoll.h>

/* 创建 epoll 实例, 所有需要监控的 fd 都将注册此实例下, 以下所有的 epoll api需要使用.
* size: 必须大于0, 否则返回错误
* flags: 0: 等于 epoll_create 函数, EPOLL_CLOEXEC: 对返回的 epoll 实例设置 close-on-exec, 多进程实例中,execv子进程会关闭此 epoll 实例防止资源泄露
*
*/
int epoll_create(int size);
int epoll_create1(int flags);

/*
* 添加、修改或删除挂载到 epoll 实例的对象列表
* epfd: epoll_create 创建的实例
* op: EPOLL_CTL_ADD: 将 fd 对应是将添加到监控列表, EPOLL_CTL_MOD: 修改监控监控中的 fd 的事件, EPOLL_CTL_DEL: 将 fd 移除监控列表
* fd: 需要处理的 fd
* event: fd 对应监控的事件: EPOLLIN EPOLLOUT等 
*/
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);

/*
* 等待事件唤醒
* epfd: epoll_create 创建的实例
* events: 事件数组, 内核赋值对应 fd 触发的真实事件
* maxevents: events 数组大小
* timeout: 设置 epoll 等待事件触发的阻塞时间, 毫秒级别  
* sigmask: 更改当前线程阻塞信号的掩码, 执行 epoll_wait 逻辑, 恢复执行前的掩码, 若不更改掩码则传入 NULL
* return: 成功返回 就绪/错误情况 fd 的个数, 超时返回0, 错误返回-1, 错误详情通过 errno 和 strerror(errno) 获取
*/
int epoll_wait(int epfd, struct epoll_event *events,
                      int maxevents, int timeout);
int epoll_pwait(int epfd, struct epoll_event *events,
                      int maxevents, int timeout,
                      const sigset_t *sigmask);

typedef union epoll_data {
   void        *ptr;
   int          fd;
   uint32_t     u32;
   uint64_t     u64;
} epoll_data_t;

struct epoll_event {
   uint32_t     events;      /* EPOLLIN EPOLLOUT 等事件位 */
   epoll_data_t data;        /* 可为每个 fd 存放私有数据, epoll_wait 唤醒后可获取对应 fd 私有数据*/
};
EPOLLIN: 可读
EPOLLOUT: 写入缓冲区可写入数据
EPOLLRDHUP: socket对端关闭连接或则关闭写入通道, 即接收端不会收到对端的数据,
EPOLLPRI: fd 状态异常:1.TCP 套接字上有 out-of-band 数据 2.数据包模式下的伪终端主机已经看到从机上的状态变化 3.cgroup.events 文件已被修改
EPOLLERR: 若 fd 为pipe类型, 当 pipe 读端关闭时, 向写端写入数据会触发此种情况, 仅内核设置
EPOLLHUP:  当从 pipe 或 stream socket 等通道读取时，此事件仅表示对端关闭了通道. 只有在通道中所有未完成的数据都被消耗后，从通道的后续读取才会返回 0（EOF）, 仅内核设置
EPOLLET: 设置为 edge-triggered 模式, 有效避免惊群效应, 默认为 level-triggered
EPOLLONESHOT: fd 加入此监控标志表示只监控一次事件, 若想继续监控则通过 epoll_ctl EPOLL_CTL_MOD 重新修改此 fd 监控的事件
EPOLLEXCLUSIVE: 新特性(内核版本 > 4.5), 如果多个 epoll 实例监控同一个有此标志的文件, 则只触发一个 epoll_wait 获取监控的事件, 有效避免惊群效应, 若未带有此标记则唤醒所有 epoll_wait 进程, 只能通过 EPOLL_CTL_ADD 添加不能通过 EPOLL_CTL_MOD 修改新增此标志

用例:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/epoll.h>

int main(int argc, char *argv[]) {
   #define MAX_EVENTS 10
   struct epoll_event ev, events[MAX_EVENTS];
   int nfds, epollfd, n, timeout;
    
   //创建 epoll 实例
   epollfd = epoll_create1(0);
   if (epollfd == -1) {
       fprintf(stderr, "epoll_create1 fail: %d %s", errno, strerror(errno));
       exit(EXIT_FAILURE);
   }

   //订阅 stdin 可读事件, 并添加到 epoll 实例
   ev.events = EPOLLIN;
   ev.data.fd = fileno(stdin);
   if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fileno(stdin), &ev) == -1) {
       fprintf(stderr, "epoll_ctl add fail: %d %s", errno, strerror(errno));
       exit(EXIT_FAILURE);
   }

   for (;;) {
       timeout = 5000;
       nfds = epoll_wait(epollfd, events, MAX_EVENTS, timeout);
       if (nfds == -1) {
           fprintf(stderr, "epoll_wait fail: %d %s", errno, strerror(errno));
           exit(EXIT_FAILURE);
       }
       if (nfds == 0) {
           printf("超时返回.\n");
       }

       for (n = 0; n < nfds; ++n) {
           if (events[n].events & EPOLLIN) {
               if (events[n].data.fd == fileno(stdin)) {
                    printf("stdin数据可读.\n");
               }
           }
       }
   }
   exit(EXIT_SUCCESS);
}