netdev
[Top] [All Lists]

IPV6 RFC3542 compliance [PATCH]

To: davem@xxxxxxxxxxxxx, yoshfuji@xxxxxxxxxxxxxx
Subject: IPV6 RFC3542 compliance [PATCH]
From: David Stevens <dlstevens@xxxxxxxxxx>
Date: Mon, 6 Jun 2005 13:48:26 -0600
Cc: netdev@xxxxxxxxxxx
Sender: netdev-bounce@xxxxxxxxxxx
I've been looking at RFC 3542 (Advanced Sockets API) compliance,
and found the following:

("x" is one of {PKTINFO, HOPLIMIT, RTHDR, DSTOPTS, TCLASS })
What RFC 3542 says:
1) IPV6_x as socket options specify "sticky" option values;
        getsockopt() returns the current values of the sticky options
        setsockopt() sets the values for future sends
2) IPV6_RECVx are boolean socket options indicated whether the
     particular field will be returned in ancillary data on a recvmsg()
        getsockopt() gets the current value (1 or 0)
        setsockopt() sets or clears the boolean value
3) Ancillary data (send and receive) use IPV6_x for the corresponding
    data item

What current kernel does:
1) IPV6_x are boolean options
2) the sticky versions are not implemented
3) TCLASS is not implemented

The patch below adds sending and receiving of traffic class, the
definitions for IPV6_RECVx and changes the boolean socket options
to their RFC 3542 names. The original names are still there for use
with sticky options in the future (not included here), and as the
ancillary data message types.

The bad news:
This patch changes the argument lists of ip6_append_data() and
datagram_send_ctl(). This, because traffic class is not an extension
header, but part of the IPv6 header. This is analogous to the hop limit,
which is an explicit argument to these functions.

I've tested these pieces, but I have a couple open questions which
may be relevant (will continue looking myself...):

1) In ipv6_pinfo, there is a "hop_limit" field at the top level and 
another
        "cork.hop_limit". Why aren't these the same?
2) The (old name) IPV6_RTHDR socket option allows a value of "2",
        used by TCP. Still need to see what that's about for relevance
        to other options (but this code leaves that unchanged, except
        the name).

                                        +-DLS

in-line for view, attached for applying

Signed-off-by: David L Stevens <dlstevens@xxxxxxxxxx>
diff -ruNp linux-2.6.11.10/include/linux/in6.h 
linux-2.6.11.10T2/include/linux/in6.h
--- linux-2.6.11.10/include/linux/in6.h 2005-05-16 10:51:43.000000000 
-0700
+++ linux-2.6.11.10T2/include/linux/in6.h       2005-05-23 
14:12:59.000000000 -0700
@@ -172,6 +172,7 @@ struct in6_flowlabel_req
 #define IPV6_V6ONLY            26
 #define IPV6_JOIN_ANYCAST      27
 #define IPV6_LEAVE_ANYCAST     28
+#define IPV6_TCLASS            30
 
 /* IPV6_MTU_DISCOVER values */
 #define IPV6_PMTUDISC_DONT             0
@@ -184,6 +185,12 @@ struct in6_flowlabel_req
 
 #define IPV6_IPSEC_POLICY      34
 #define IPV6_XFRM_POLICY       35
+#define IPV6_RTHDRDSTOPTS      36
+#define IPV6_RECVPKTINFO       37
+#define IPV6_RECVHOPLIMIT      38
+#define IPV6_RECVRTHDR         39
+#define IPV6_RECVHOPOPTS       40
+#define IPV6_RECVDSTOPTS       41
 
 /*
  * Multicast:
@@ -198,4 +205,6 @@ struct in6_flowlabel_req
  * MCAST_MSFILTER              48
  */
 
+#define IPV6_RECVTCLASS                49
+
 #endif
diff -ruNp linux-2.6.11.10/include/linux/ipv6.h 
linux-2.6.11.10T2/include/linux/ipv6.h
--- linux-2.6.11.10/include/linux/ipv6.h        2005-05-16 
10:51:43.000000000 -0700
+++ linux-2.6.11.10T2/include/linux/ipv6.h      2005-05-24 
13:18:27.000000000 -0700
@@ -221,7 +221,8 @@ struct ipv6_pinfo {
                                rxhlim:1,
                                hopopts:1,
                                dstopts:1,
-                                rxflow:1;
+                                rxflow:1,
+                               rxtclass:1;
                } bits;
                __u8            all;
        } rxopt;
@@ -244,6 +245,7 @@ struct ipv6_pinfo {
                struct ipv6_txoptions *opt;
                struct rt6_info *rt;
                int hop_limit;
+               int tclass;
        } cork;
 };
 
diff -ruNp linux-2.6.11.10/include/net/ipv6.h 
linux-2.6.11.10T2/include/net/ipv6.h
--- linux-2.6.11.10/include/net/ipv6.h  2005-05-16 10:51:49.000000000 
-0700
+++ linux-2.6.11.10T2/include/net/ipv6.h        2005-05-24 
14:57:23.000000000 -0700
@@ -347,6 +347,7 @@ extern int                  ip6_append_data(struct 
sock
                                                int length,
                                                int transhdrlen,
                                                int hlimit,
+                                               int tclass,
                                                struct ipv6_txoptions 
*opt,
                                                struct flowi *fl,
                                                struct rt6_info *rt,
diff -ruNp linux-2.6.11.10/include/net/transp_v6.h 
linux-2.6.11.10T2/include/net/transp_v6.h
--- linux-2.6.11.10/include/net/transp_v6.h     2005-05-16 
10:51:51.000000000 -0700
+++ linux-2.6.11.10T2/include/net/transp_v6.h   2005-05-24 
14:04:11.000000000 -0700
@@ -37,7 +37,7 @@ extern int                    datagram_recv_ctl(struct 
so
 extern int                     datagram_send_ctl(struct msghdr *msg,
                                                  struct flowi *fl,
                                                  struct ipv6_txoptions 
*opt,
-                                                 int *hlimit);
+                                                 int *hlimit, int 
*tclass);
 
 #define                LOOPBACK4_IPV6 __constant_htonl(0x7f000006)
 
diff -ruNp linux-2.6.11.10/net/ipv6/datagram.c 
linux-2.6.11.10T2/net/ipv6/datagram.c
--- linux-2.6.11.10/net/ipv6/datagram.c 2005-05-16 10:52:00.000000000 
-0700
+++ linux-2.6.11.10T2/net/ipv6/datagram.c       2005-05-24 
14:03:56.000000000 -0700
@@ -388,6 +388,11 @@ int datagram_recv_ctl(struct sock *sk, s
                int hlim = skb->nh.ipv6h->hop_limit;
                put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), 
&hlim);
        }
+       if (np->rxopt.bits.rxtclass) {
+               u8 tclass = (skb->nh.ipv6h->priority << 4) |
+                       ((skb->nh.ipv6h->flow_lbl[0]>>4) & 0xf);
+               put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), 
&tclass);
+       }
 
        if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & 
IPV6_FLOWINFO_MASK)) {
                u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
@@ -414,7 +419,7 @@ int datagram_recv_ctl(struct sock *sk, s
 
 int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
                      struct ipv6_txoptions *opt,
-                     int *hlimit)
+                     int *hlimit, int *tclass)
 {
        struct in6_pktinfo *src_info;
        struct cmsghdr *cmsg;
@@ -587,6 +592,15 @@ int datagram_send_ctl(struct msghdr *msg
                        *hlimit = *(int *)CMSG_DATA(cmsg);
                        break;
 
+               case IPV6_TCLASS:
+                       if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+                               err = -EINVAL;
+                               goto exit_f;
+                       }
+
+                       *tclass = *(int *)CMSG_DATA(cmsg);
+                       break;
+
                default:
                        LIMIT_NETDEBUG(
                                printk(KERN_DEBUG "invalid cmsg type: 
%d\n", cmsg->cmsg_type));
diff -ruNp linux-2.6.11.10/net/ipv6/icmp.c 
linux-2.6.11.10T2/net/ipv6/icmp.c
--- linux-2.6.11.10/net/ipv6/icmp.c     2005-05-16 10:52:00.000000000 
-0700
+++ linux-2.6.11.10T2/net/ipv6/icmp.c   2005-05-24 15:05:14.000000000 
-0700
@@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, in
        int iif = 0;
        int addr_type = 0;
        int len;
-       int hlimit;
+       int hlimit, tclass;
        int err = 0;
 
        if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
@@ -381,6 +381,9 @@ void icmpv6_send(struct sk_buff *skb, in
                hlimit = np->hop_limit;
        if (hlimit < 0)
                hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+       tclass = np->cork.tclass;
+       if (tclass < 0)
+               tclass = 0;
 
        msg.skb = skb;
        msg.offset = skb->nh.raw - skb->data;
@@ -398,7 +401,7 @@ void icmpv6_send(struct sk_buff *skb, in
        err = ip6_append_data(sk, icmpv6_getfrag, &msg,
                              len + sizeof(struct icmp6hdr),
                              sizeof(struct icmp6hdr),
-                             hlimit, NULL, &fl, (struct rt6_info*)dst,
+                             hlimit, tclass, NULL, &fl, (struct 
rt6_info*)dst,
                              MSG_DONTWAIT);
        if (err) {
                ip6_flush_pending_frames(sk);
@@ -432,6 +435,7 @@ static void icmpv6_echo_reply(struct sk_
        struct dst_entry *dst;
        int err = 0;
        int hlimit;
+       int tclass;
 
        saddr = &skb->nh.ipv6h->daddr;
 
@@ -467,15 +471,18 @@ static void icmpv6_echo_reply(struct sk_
                hlimit = np->hop_limit;
        if (hlimit < 0)
                hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+       tclass = np->cork.tclass;
+       if (tclass < 0)
+               tclass = 0;
 
        idev = in6_dev_get(skb->dev);
 
        msg.skb = skb;
        msg.offset = 0;
 
-       err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + 
sizeof(struct icmp6hdr),
-                               sizeof(struct icmp6hdr), hlimit, NULL, 
&fl,
-                               (struct rt6_info*)dst, MSG_DONTWAIT);
+       err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len +
+               sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit,
+               tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT);
 
        if (err) {
                ip6_flush_pending_frames(sk);
diff -ruNp linux-2.6.11.10/net/ipv6/ip6_flowlabel.c 
linux-2.6.11.10T2/net/ipv6/ip6_flowlabel.c
--- linux-2.6.11.10/net/ipv6/ip6_flowlabel.c    2005-05-16 
10:52:00.000000000 -0700
+++ linux-2.6.11.10T2/net/ipv6/ip6_flowlabel.c  2005-05-24 
14:04:28.000000000 -0700
@@ -311,7 +311,7 @@ fl_create(struct in6_flowlabel_req *freq
                msg.msg_control = (void*)(fl->opt+1);
                flowi.oif = 0;
 
-               err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk);
+               err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, 
&junk);
                if (err)
                        goto done;
                err = -EINVAL;
diff -ruNp linux-2.6.11.10/net/ipv6/ip6_output.c 
linux-2.6.11.10T2/net/ipv6/ip6_output.c
--- linux-2.6.11.10/net/ipv6/ip6_output.c       2005-05-16 
10:52:00.000000000 -0700
+++ linux-2.6.11.10T2/net/ipv6/ip6_output.c     2005-05-24 
14:58:51.000000000 -0700
@@ -211,7 +211,7 @@ int ip6_xmit(struct sock *sk, struct sk_
        struct ipv6hdr *hdr;
        u8  proto = fl->proto;
        int seg_len = skb->len;
-       int hlimit;
+       int hlimit, tclass;
        u32 mtu;
 
        if (opt) {
@@ -253,6 +253,13 @@ int ip6_xmit(struct sock *sk, struct sk_
                hlimit = np->hop_limit;
        if (hlimit < 0)
                hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+       tclass = -1;
+       if (np)
+               tclass = np->cork.tclass;
+       if (tclass < 0)
+               tclass = 0;
+       hdr->priority = (np->cork.tclass>>4) &0xf;
+       hdr->flow_lbl[0] |= (np->cork.tclass & 0xf)<<4;
 
        hdr->payload_len = htons(seg_len);
        hdr->nexthdr = proto;
@@ -806,10 +813,11 @@ out_err_release:
        return err;
 }
 
-int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, 
int offset, int len, int odd, struct sk_buff *skb),
-                   void *from, int length, int transhdrlen,
-                   int hlimit, struct ipv6_txoptions *opt, struct flowi 
*fl, struct rt6_info *rt,
-                   unsigned int flags)
+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
+       int offset, int len, int odd, struct sk_buff *skb),
+       void *from, int length, int transhdrlen,
+       int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi 
*fl,
+       struct rt6_info *rt, unsigned int flags)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
@@ -847,6 +855,7 @@ int ip6_append_data(struct sock *sk, int
                np->cork.rt = rt;
                inet->cork.fl = *fl;
                np->cork.hop_limit = hlimit;
+               np->cork.tclass = tclass;
                inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
                inet->cork.length = 0;
                sk->sk_sndmsg_page = NULL;
@@ -1130,6 +1139,10 @@ int ip6_push_pending_frames(struct sock 
 
        *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
 
+       /* traffic class */
+       hdr->priority = (np->cork.tclass>>4) & 0xf;
+       hdr->flow_lbl[0] |= (np->cork.tclass & 0xf)<<4;
+
        if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
                hdr->payload_len = htons(skb->len - sizeof(struct 
ipv6hdr));
        else
diff -ruNp linux-2.6.11.10/net/ipv6/ipv6_sockglue.c 
linux-2.6.11.10T2/net/ipv6/ipv6_sockglue.c
--- linux-2.6.11.10/net/ipv6/ipv6_sockglue.c    2005-05-16 
10:52:00.000000000 -0700
+++ linux-2.6.11.10T2/net/ipv6/ipv6_sockglue.c  2005-06-06 
11:52:15.000000000 -0700
@@ -208,33 +208,38 @@ int ipv6_setsockopt(struct sock *sk, int
                retv = 0;
                break;
 
-       case IPV6_PKTINFO:
+       case IPV6_RECVPKTINFO:
                np->rxopt.bits.rxinfo = valbool;
                retv = 0;
                break;
 
-       case IPV6_HOPLIMIT:
+       case IPV6_RECVHOPLIMIT:
                np->rxopt.bits.rxhlim = valbool;
                retv = 0;
                break;
 
-       case IPV6_RTHDR:
+       case IPV6_RECVRTHDR:
                if (val < 0 || val > 2)
                        goto e_inval;
                np->rxopt.bits.srcrt = val;
                retv = 0;
                break;
 
-       case IPV6_HOPOPTS:
+       case IPV6_RECVHOPOPTS:
                np->rxopt.bits.hopopts = valbool;
                retv = 0;
                break;
 
-       case IPV6_DSTOPTS:
+       case IPV6_RECVDSTOPTS:
                np->rxopt.bits.dstopts = valbool;
                retv = 0;
                break;
 
+       case IPV6_RECVTCLASS:
+               np->rxopt.bits.rxtclass = valbool;
+               retv = 0;
+               break;
+
        case IPV6_FLOWINFO:
                np->rxopt.bits.rxflow = valbool;
                retv = 0;
@@ -274,7 +279,7 @@ int ipv6_setsockopt(struct sock *sk, int
                msg.msg_controllen = optlen;
                msg.msg_control = (void*)(opt+1);
 
-               retv = datagram_send_ctl(&msg, &fl, opt, &junk);
+               retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk);
                if (retv)
                        goto done;
 update:
@@ -620,26 +625,30 @@ int ipv6_getsockopt(struct sock *sk, int
                val = np->ipv6only;
                break;
 
-       case IPV6_PKTINFO:
+       case IPV6_RECVPKTINFO:
                val = np->rxopt.bits.rxinfo;
                break;
 
-       case IPV6_HOPLIMIT:
+       case IPV6_RECVHOPLIMIT:
                val = np->rxopt.bits.rxhlim;
                break;
 
-       case IPV6_RTHDR:
+       case IPV6_RECVRTHDR:
                val = np->rxopt.bits.srcrt;
                break;
 
-       case IPV6_HOPOPTS:
+       case IPV6_RECVHOPOPTS:
                val = np->rxopt.bits.hopopts;
                break;
 
-       case IPV6_DSTOPTS:
+       case IPV6_RECVDSTOPTS:
                val = np->rxopt.bits.dstopts;
                break;
 
+       case IPV6_RECVTCLASS:
+               val = np->rxopt.bits.rxtclass;
+               break;
+
        case IPV6_FLOWINFO:
                val = np->rxopt.bits.rxflow;
                break;
diff -ruNp linux-2.6.11.10/net/ipv6/raw.c linux-2.6.11.10T2/net/ipv6/raw.c
--- linux-2.6.11.10/net/ipv6/raw.c      2005-05-16 10:52:00.000000000 
-0700
+++ linux-2.6.11.10T2/net/ipv6/raw.c    2005-05-24 15:09:42.000000000 
-0700
@@ -617,6 +617,7 @@ static int rawv6_sendmsg(struct kiocb *i
        struct flowi fl;
        int addr_len = msg->msg_namelen;
        int hlimit = -1;
+       int tclass = -1;
        u16 proto;
        int err;
 
@@ -702,7 +703,7 @@ static int rawv6_sendmsg(struct kiocb *i
                memset(opt, 0, sizeof(struct ipv6_txoptions));
                opt->tot_len = sizeof(struct ipv6_txoptions);
 
-               err = datagram_send_ctl(msg, &fl, opt, &hlimit);
+               err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
@@ -758,6 +759,12 @@ static int rawv6_sendmsg(struct kiocb *i
                        hlimit = dst_metric(dst, RTAX_HOPLIMIT);
        }
 
+       if (tclass < 0) {
+               tclass = np->cork.tclass;
+               if (tclass < 0)
+                       tclass = 0;
+       }
+
        if (msg->msg_flags&MSG_CONFIRM)
                goto do_confirm;
 
@@ -766,8 +773,9 @@ back_from_confirm:
                err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, 
(struct rt6_info*)dst, msg->msg_flags);
        } else {
                lock_sock(sk);
-               err = ip6_append_data(sk, ip_generic_getfrag, 
msg->msg_iov, len, 0,
-                                       hlimit, opt, &fl, (struct 
rt6_info*)dst, msg->msg_flags);
+               err = ip6_append_data(sk, ip_generic_getfrag, 
msg->msg_iov,
+                       len, 0, hlimit, tclass, opt, &fl, (struct 
rt6_info*)dst,
+                       msg->msg_flags);
 
                if (err)
                        ip6_flush_pending_frames(sk);
diff -ruNp linux-2.6.11.10/net/ipv6/udp.c linux-2.6.11.10T2/net/ipv6/udp.c
--- linux-2.6.11.10/net/ipv6/udp.c      2005-05-16 10:52:00.000000000 
-0700
+++ linux-2.6.11.10T2/net/ipv6/udp.c    2005-05-24 15:11:58.000000000 
-0700
@@ -637,6 +637,7 @@ static int udpv6_sendmsg(struct kiocb *i
        int addr_len = msg->msg_namelen;
        int ulen = len;
        int hlimit = -1;
+       int tclass = -1;
        int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
        int err;
 
@@ -758,7 +759,7 @@ do_udp_sendmsg:
                memset(opt, 0, sizeof(struct ipv6_txoptions));
                opt->tot_len = sizeof(*opt);
 
-               err = datagram_send_ctl(msg, fl, opt, &hlimit);
+               err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
@@ -812,6 +813,11 @@ do_udp_sendmsg:
                if (hlimit < 0)
                        hlimit = dst_metric(dst, RTAX_HOPLIMIT);
        }
+       if (tclass < 0) {
+               tclass = np->cork.tclass;
+               if (tclass < 0)
+                       tclass = 0;
+       }
 
        if (msg->msg_flags&MSG_CONFIRM)
                goto do_confirm;
@@ -832,9 +838,10 @@ back_from_confirm:
 
 do_append_data:
        up->len += ulen;
-       err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 
sizeof(struct udphdr),
-                             hlimit, opt, fl, (struct rt6_info*)dst,
-                             corkreq ? msg->msg_flags|MSG_MORE : 
msg->msg_flags);
+       err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
+               sizeof(struct udphdr), hlimit, tclass, opt, fl,
+               (struct rt6_info*)dst,
+               corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
        if (err)
                udp_v6_flush_pending_frames(sk);
        else if (!corkreq)

Attachment: rfc3542.patch
Description: Binary data

<Prev in Thread] Current Thread [Next in Thread>