# This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2005/01/04 00:30:58+08:00 acassen@xxxxxxxxxxxx # [IPVS] change to run master/backup sync daemon at a time # # Signed-off-by: Wensong Zhang # # net/ipv4/ipvs/ip_vs_sync.c # 2005/01/04 00:30:47+08:00 acassen@xxxxxxxxxxxx +96 -38 # changed to run master/backup sync daemon at a time. # # net/ipv4/ipvs/ip_vs_ctl.c # 2005/01/04 00:30:47+08:00 acassen@xxxxxxxxxxxx +7 -3 # extends the interface of IPVS # # net/ipv4/ipvs/ip_vs_core.c # 2005/01/04 00:30:47+08:00 acassen@xxxxxxxxxxxx +1 -1 # sync a connection when the flag of sync state is master. # # include/net/ip_vs.h # 2005/01/04 00:30:47+08:00 acassen@xxxxxxxxxxxx +8 -5 # extend the structs for master/backup daemon running at a time. # diff -Nru a/include/net/ip_vs.h b/include/net/ip_vs.h --- a/include/net/ip_vs.h 2005-01-04 00:33:06 +08:00 +++ b/include/net/ip_vs.h 2005-01-04 00:33:06 +08:00 @@ -97,6 +97,7 @@ int state; /* sync daemon state */ char mcast_ifn[IP_VS_IFNAME_MAXLEN]; /* multicast interface name */ + int syncid; /* virtual service options */ u_int16_t protocol; @@ -213,8 +214,9 @@ /* The argument to IP_VS_SO_GET_DAEMON */ struct ip_vs_daemon_user { - int state; /* sync daemon state */ - char mcast_ifn[IP_VS_IFNAME_MAXLEN]; /* multicast interface name */ + int state; /* sync daemon state */ + char mcast_master_ifn[IP_VS_IFNAME_MAXLEN]; /* mcast master interface name */ + char mcast_backup_ifn[IP_VS_IFNAME_MAXLEN]; /* mcast backup interface name */ }; @@ -726,9 +728,10 @@ * (from ip_vs_sync.c) */ extern volatile int ip_vs_sync_state; -extern char ip_vs_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -extern int start_sync_thread(int state, char *mcast_ifn); -extern int stop_sync_thread(void); +extern char ip_vs_mcast_master_ifn[IP_VS_IFNAME_MAXLEN]; +extern char ip_vs_mcast_backup_ifn[IP_VS_IFNAME_MAXLEN]; +extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); +extern int stop_sync_thread(int state); extern void ip_vs_sync_conn(struct ip_vs_conn *cp); diff -Nru a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c --- a/net/ipv4/ipvs/ip_vs_core.c 2005-01-04 00:33:06 +08:00 +++ b/net/ipv4/ipvs/ip_vs_core.c 2005-01-04 00:33:06 +08:00 @@ -1131,7 +1131,7 @@ /* increase its packet counter and check if it is needed to be synchronized */ atomic_inc(&cp->in_pkts); - if (ip_vs_sync_state == IP_VS_STATE_MASTER && + if (ip_vs_sync_state & IP_VS_STATE_MASTER && (cp->protocol != IPPROTO_TCP || cp->state == IP_VS_S_ESTABLISHED) && (atomic_read(&cp->in_pkts) % 50 == sysctl_ip_vs_sync_threshold)) diff -Nru a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c --- a/net/ipv4/ipvs/ip_vs_ctl.c 2005-01-04 00:33:06 +08:00 +++ b/net/ipv4/ipvs/ip_vs_ctl.c 2005-01-04 00:33:06 +08:00 @@ -1731,10 +1731,11 @@ ret = ip_vs_set_timeouts(urule); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { - ret = start_sync_thread(urule->state, urule->mcast_ifn); + ret = start_sync_thread(urule->state, urule->mcast_ifn, + urule->syncid); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { - ret = stop_sync_thread(); + ret = stop_sync_thread(urule->state); goto out_unlock; } else if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ @@ -2082,7 +2083,10 @@ goto out; } u.state = ip_vs_sync_state; - strcpy(u.mcast_ifn, ip_vs_mcast_ifn); + if (ip_vs_sync_state & IP_VS_STATE_MASTER) + strcpy(u.mcast_master_ifn, ip_vs_mcast_master_ifn); + if (ip_vs_sync_state & IP_VS_STATE_BACKUP) + strcpy(u.mcast_backup_ifn, ip_vs_mcast_backup_ifn); if (copy_to_user(user, &u, sizeof(u)) != 0) ret = -EFAULT; } diff -Nru a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c --- a/net/ipv4/ipvs/ip_vs_sync.c 2005-01-04 00:33:06 +08:00 +++ b/net/ipv4/ipvs/ip_vs_sync.c 2005-01-04 00:33:06 +08:00 @@ -13,6 +13,9 @@ * through multicast * * Changes: + * Alexandre Cassen : Added master & backup support at a time. + * Alexandre Cassen : Added SyncID support for incoming sync + * messages filtering. * Justin Ossevoort : Fix endian problem on sync message size. */ @@ -74,7 +77,7 @@ 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Count Conns | Reserved | Size | + | Count Conns | Sync ID | Size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (1) | @@ -86,11 +89,16 @@ | | | IPVS Sync Connection (n) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Count Conns : Number of IPVS sync Connection entries. + Sync ID : IPVS sync group we belong to. + Size : Size of packet. + */ #define SYNC_MESG_MAX_SIZE (24*50+4) struct ip_vs_sync_mesg { __u8 nr_conns; - __u8 reserved; + __u8 syncid; __u16 size; /* ip_vs_sync_conn entries start here */ @@ -116,6 +124,18 @@ static struct ip_vs_sync_buff *curr_sb = NULL; static spinlock_t curr_sb_lock = SPIN_LOCK_UNLOCKED; +/* ipvs sync daemon state */ +volatile int ip_vs_sync_state = IP_VS_STATE_NONE; +volatile int ip_vs_master_syncid = 0; +volatile int ip_vs_backup_syncid = 0; + +/* multicast interface name */ +char ip_vs_mcast_master_ifn[IP_VS_IFNAME_MAXLEN]; +char ip_vs_mcast_backup_ifn[IP_VS_IFNAME_MAXLEN]; + +/* multicast addr */ +static struct sockaddr_in mcast_addr; + static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) { spin_lock(&ip_vs_sync_lock); @@ -153,6 +173,7 @@ return NULL; } sb->mesg->nr_conns = 0; + sb->mesg->syncid = ip_vs_master_syncid; sb->mesg->size = 4; sb->head = (unsigned char *)sb->mesg + 4; sb->end = (unsigned char *)sb->mesg + SYNC_MESG_MAX_SIZE; @@ -265,6 +286,13 @@ return; } + /* SyncID sanity check */ + if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { + IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", + m->syncid); + return; + } + p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); for (i=0; inr_conns; i++) { s = (struct ip_vs_sync_conn *)p; @@ -308,16 +336,6 @@ } -/* ipvs sync daemon state */ -volatile int ip_vs_sync_state = IP_VS_STATE_NONE; - -/* multicast interface name */ -char ip_vs_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - -/* multicast addr */ -static struct sockaddr_in mcast_addr; - - /* * Setup loopback of outgoing multicasts on a sending socket */ @@ -429,7 +447,7 @@ return NULL; } - if (set_mcast_if(sock->sk, ip_vs_mcast_ifn) < 0) { + if (set_mcast_if(sock->sk, ip_vs_mcast_master_ifn) < 0) { IP_VS_ERR("Error setting outbound mcast interface\n"); goto error; } @@ -437,7 +455,7 @@ set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); - if (bind_mcastif_addr(sock, ip_vs_mcast_ifn) < 0) { + if (bind_mcastif_addr(sock, ip_vs_mcast_master_ifn) < 0) { IP_VS_ERR("Error binding address of the mcast interface\n"); goto error; } @@ -483,7 +501,7 @@ /* join the multicast group */ if (join_mcast_group(sock->sk, (struct in_addr*)&mcast_addr.sin_addr, - ip_vs_mcast_ifn) < 0) { + ip_vs_mcast_backup_ifn) < 0) { IP_VS_ERR("Error joining to the multicast group\n"); goto error; } @@ -571,10 +589,12 @@ static DECLARE_WAIT_QUEUE_HEAD(sync_wait); -static pid_t sync_pid = 0; +static pid_t sync_master_pid = 0; +static pid_t sync_backup_pid = 0; static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); -static int stop_sync = 0; +static int stop_master_sync = 0; +static int stop_backup_sync = 0; static void sync_master_loop(void) { @@ -586,6 +606,10 @@ if (!sock) return; + IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " + "syncid = %d\n", + ip_vs_mcast_master_ifn, ip_vs_master_syncid); + for (;;) { while ((sb=sb_dequeue())) { ip_vs_send_sync_msg(sock, sb->mesg); @@ -598,7 +622,7 @@ ip_vs_sync_buff_release(sb); } - if (stop_sync) + if (stop_master_sync) break; __set_current_state(TASK_INTERRUPTIBLE); @@ -637,6 +661,10 @@ if (!sock) goto out; + IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " + "syncid = %d\n", + ip_vs_mcast_backup_ifn, ip_vs_backup_syncid); + for (;;) { /* do you have data now? */ while (!skb_queue_empty(&(sock->sk->receive_queue))) { @@ -652,7 +680,7 @@ local_bh_enable(); } - if (stop_sync) + if (stop_backup_sync) break; __set_current_state(TASK_INTERRUPTIBLE); @@ -667,12 +695,31 @@ kfree(buf); } +static void sync_pid_set(int sync_state, pid_t sync_pid) +{ + if (sync_state == IP_VS_STATE_MASTER) + sync_master_pid = sync_pid; + else if (sync_state == IP_VS_STATE_BACKUP) + sync_backup_pid = sync_pid; +} + +static void sync_stop_set(int sync_state, int set) +{ + if (sync_state == IP_VS_STATE_MASTER) + stop_master_sync = set; + else if (sync_state == IP_VS_STATE_BACKUP) + stop_backup_sync = set; + else { + stop_master_sync = set; + stop_backup_sync = set; + } +} static int sync_thread(void *startup) { DECLARE_WAITQUEUE(wait, current); mm_segment_t oldmm; - int state; + int state = IP_VS_STATE_NONE; MOD_INC_USE_COUNT; daemonize(); @@ -680,12 +727,15 @@ oldmm = get_fs(); set_fs(KERNEL_DS); - if (ip_vs_sync_state == IP_VS_STATE_MASTER) + if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { + state = IP_VS_STATE_MASTER; sprintf(current->comm, "ipvs_syncmaster"); - else if (ip_vs_sync_state == IP_VS_STATE_BACKUP) + } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { + state = IP_VS_STATE_BACKUP; sprintf(current->comm, "ipvs_syncbackup"); - else IP_VS_BUG(); + } else IP_VS_BUG(); + /* Block all signals */ spin_lock_irq(¤t->sigmask_lock); siginitsetinv(¤t->blocked, 0); recalc_sigpending(current); @@ -698,9 +748,7 @@ add_wait_queue(&sync_wait, &wait); - state = ip_vs_sync_state; - sync_pid = current->pid; - IP_VS_INFO("sync thread started.\n"); + sync_pid_set(state, current->pid); complete((struct completion *)startup); /* processing master/backup loop here */ @@ -713,13 +761,13 @@ remove_wait_queue(&sync_wait, &wait); /* thread exits */ - sync_pid = 0; + sync_pid_set(state, 0); IP_VS_INFO("sync thread stopped!\n"); set_fs(oldmm); MOD_DEC_USE_COUNT; - stop_sync = 0; + sync_stop_set(state, 0); wake_up(&stop_sync_wait); return 0; @@ -745,20 +793,27 @@ } -int start_sync_thread(int state, char *mcast_ifn) +int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) { DECLARE_COMPLETION(startup); pid_t pid; - if (sync_pid) + if ((state == IP_VS_STATE_MASTER && sync_master_pid) || + (state == IP_VS_STATE_BACKUP && sync_backup_pid)) return -EEXIST; IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %d bytes\n", sizeof(struct ip_vs_sync_conn)); - ip_vs_sync_state = state; - strcpy(ip_vs_mcast_ifn, mcast_ifn); + ip_vs_sync_state |= state; + if (state == IP_VS_STATE_MASTER) { + strcpy(ip_vs_mcast_master_ifn, mcast_ifn); + ip_vs_master_syncid = syncid; + } else { + strcpy(ip_vs_mcast_backup_ifn, mcast_ifn); + ip_vs_backup_syncid = syncid; + } repeat: if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { @@ -775,20 +830,22 @@ } -int stop_sync_thread(void) +int stop_sync_thread(int state) { DECLARE_WAITQUEUE(wait, current); - if (!sync_pid) + if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || + (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) return -ESRCH; IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); - IP_VS_INFO("stopping sync thread %d ...\n", sync_pid); + IP_VS_INFO("stopping sync thread %d ...\n", + (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&stop_sync_wait, &wait); - ip_vs_sync_state = IP_VS_STATE_NONE; - stop_sync = 1; + sync_stop_set(state, 1); + ip_vs_sync_state -= state; wake_up(&sync_wait); schedule(); __set_current_state(TASK_RUNNING); @@ -797,7 +854,8 @@ /* Note: no need to reap the sync thread, because its parent process is the init process */ - if (stop_sync) + if ((state == IP_VS_STATE_MASTER && stop_master_sync) || + (state == IP_VS_STATE_BACKUP && stop_backup_sync)) IP_VS_BUG(); return 0;