Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

N5.3 Adding tables, chains, rules (userspace)

netlink messages are used to setup tables, chains, etc from userspace.

Let us dissect the netlink message that is sent by nft by running strace . Some parts of the netlink message that are not parsed by strace, have also been formatted here.

$ strace -f nft add table T2

nft creates a netlink socket and first sends a request to get rule-set generation NFT_MSG_GETGEN. In response, we receive the generation, which in this case is 3.

socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER) = 3
sendto(3,
	{
        struct nlmsghdr {
            nlmsg_len=20,
            nlmsg_type=NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_GETGEN, //<- 0xa10
            nlmsg_flags=NLM_F_REQUEST,
            nlmsg_seq=0, nlmsg_pid=0
        },
        struct nfgenmsg{ // <- "\x00\x00\x00\x00"
            nfgen_family=AF_UNSPEC,
            version = NFNETLINK_V0,
            res_id = 0,
        }
        
    },20,
    0,{sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000},12) = 20
    
recvmsg(3,
   {
       msg_name={
           sa_family=AF_NETLINK,
           nl_pid=0,
           nl_groups=00000000
       },
       msg_namelen=12,
       msg_iov=[
           {
               iov_base={
                   struct nlmsghdr {
                       nlmsg_len=44,
                       nlmsg_type=NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_NEWGEN //<- 0xa0f
                       nlmsg_flags=0,
                       nlmsg_seq=0,
                       nlmsg_pid=214676 // pid
                   },
                   struct nfgenmsg{ // <-  "\x00\x00\x00\x03"
                            nfgen_family=AF_UNSPEC,
                            version = NFNETLINK_V0,
                            (be16) res_id = 0x0003, // nft_base_seq(net)
                   },
                   struct nlattr {
                       nla_len = 8, //<- \x08\x00
                       nla_type = NFTA_GEN_ID, // <- \x01\x00
                   },
                   data = htonl(nft_net->base_seq) "\x00\x00\x00\x03",
                   padding = "",
                   struct nlattr {
                       nla_len = 8, //<- \x08\x00
                       nla_type = NFTA_GEN_PROC_PID, // <- "\x02\x00"
                   },
                   data = htonl(pid), // <- htonl(214676) = "\x00\x03\x46\x94",
                   padding = "",
                   struct nlattr {
                       nla_len = 8, //<- \x08\x00
                       nla_type = NFTA_GEN_PROC_PID, // <- "\x03\x00"
                   },
                   data="nft\0", // <-"\x6e\x66\x74\x00"
                   padding = "",
               },
               iov_len=69631
           }
       ],
       msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 44

netfilter stores the generation number in struct nftables_pernet , which is data that is maintained per network namespace.

struct nftables_pernet *nft_net = nft_pernet(net);

struct nftables_pernet {
    struct list_head	tables; // list of tables
    struct list_head    commit_list; // list of transactions that are pending
    ...
    struct mutex        commit_mutex;
    unsigned int        base_seq; // generation number
};

Next, nft constructs and sends a netlink message to setup the table. netlink messages have sections each with a netlink message header and it’s attributes. nlmsghdr->nlmsg_type lets the kernel handle the section accordingly. In the below message we have the following sections:

  1. NFNL_MSG_BATCH_BEGIN: begin a batch of changes to the ruleset
  2. NFT_MSG_NEWTABLE : create a new table, with attributes:
    1. NFTA_TABLE_NAME: "T2\0"
    2. NFTA_TABLE_FLAGS: 0
  3. NFNL_MSG_BATCH_END: end of batch. Now commit these changes (which happens atomically as we will see in a later section).
sendmsg(3,
    {
        msg_name={
            sa_family=AF_NETLINK,
            nl_pid=0,
            nl_groups=00000000
        },
        msg_namelen=12,
        msg_iov=[
            {
                iov_base=[
                    {
                        struct nlmsghdr {
                            nlmsg_len=20,
                            nlmsg_type=NFNL_MSG_BATCH_BEGIN, //<- 0x10
                            nlmsg_flags=NLM_F_REQUEST,
                            nlmsg_seq=0, 
                            nlmsg_pid=0
                        },
                        struct nfgenmsg{ // <- "\x00\x00\x0a\x00"
                            nfgen_family=AF_UNSPEC,
                            version = NFNETLINK_V0,
                            res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
                        }
                    },
                    {
                        struct nlmsghdr {
                            nlmsg_len=36,
                            nlmsg_type=NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_NEWTABLE, // <- 0xa00
                            nlmsg_flags=NLM_F_REQUEST,
                            nlmsg_seq=1,
                            nlmsg_pid=0
                        },
                         struct nfgenmsg{ // <- "\x02\x00\x00\x00"
                            nfgen_family=AF_INET,
                            version = NFNETLINK_V0,
                            res_id = 0, /*resource id */
                        }
                        struct nlattr {
                            nla_len = 7, // <- "\x07\x00"
                            nla_type = NFTA_TABLE_NAME, //<- "\x01\x00"
                        },
                        data = "T2\0",  // <- "\x54\x32\x00"
                        padding = "\x00", // 4 byte alignment
                        
                        struct nlattr {
                            nla_len = 8, // <- "\x08\x00"
                            nla_type = NFTA_TABLE_FLAGS //  "\x02\x00"
                        },
                        (u32) flags = 0, // <- \x00\x00\x00\x00",
                    },
                    {
                        struct nlmsghdr {
                            nlmsg_len=20,
                            nlmsg_type=NFNL_MSG_BATCH_END,
                            nlmsg_flags=NLM_F_REQUEST, 
                            nlmsg_seq=2, 
                            nlmsg_pid=0,
                        }, 
                        struct nfgenmsg{ // <- "\x00\x00\x0a\x00"
                            nfgen_family=AF_UNSPEC,
                            version = NFNETLINK_V0,
                            res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
                        }
                    }
                ], 
                iov_len=76
            }
        ], 
        msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 76

Though I wont show strace output, to setup a chain will have a netlink message with NFT_MSG_NEWCHAIN and attributes NFTA_CHAIN_TABLE, NFTA_CHAIN_NAME , NFTA_CHAIN_HOOK andNFTA_CHAIN_POLICY.

And to setup a rule nft will send a message with NFT_MSG_NEWRULE and attributes NFTA_RULE_TABLE, NFTA_RULE_CHAIN and most importantly NFTA_RULE_EXPRESSIONS which is shown below.

Note: use --string-limit so strace dumps the complete msg data.

strace -f --string-limit=1024  -o one.txt nft add rule T C ip daddr 127.0.0.9 counter
sendmsg(3,
   {msg_name=
       {sa_family=AF_NETLINK,
        nl_pid=0,
        nl_groups=00000000},
    msg_namelen=12,
    msg_iov=[
      {iov_base=[
        {
            struct nlmsghdr {
                nlmsg_len=20,
                nlmsg_type=NFNL_MSG_BATCH_BEGIN, // <- 0x10,
                nlmsg_flags=NLM_F_REQUEST,
                nlmsg_seq=0,
                nlmsg_pid=0},
            struct nfgenmsg { // <- "\x00\x00\x0a\x00"
                nfgen_family=AF_UNSPEC,
                version = NFNETLINK_V0,
                res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
            }
        },
        {
            struct nlmsghdr {
                nlmsg_len=156,
                nlmsg_type= NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_NEWRULE, // <- 0xa06
                nlmsg_flags=NLM_F_REQUEST|0xc00,
                nlmsg_seq=1,
                nlmsg_pid=0
            },
            struct nfgenmsg { // <- "\x02\x00\x00\x00"
                nfgen_family=AF_INET,
                version = NFNETLINK_V0,
                res_id = 0,
            },
            struct nlattr {
                nla_len = 6,       // "\x06\x00"
                nla_type = NFTA_RULE_TABLE, // <- "\x01\x00"
            },                       
            data =  "T\0", //   "\x54\x00
            padding = "\x00\x00",
            struct nlattr {
                nla_len = 6, // <- \x06\x00
                nla_type = NFTA_RULE_CHAIN, // <- "\x02\x00"
            },
            data = "C\0", // \x43\x00
            padding = "\x00\x00",
            struct nlattr {
                nla_len = 120, //<-  \x78\x00,
                nla_type = NLA_F_NESTED | NFTA_RULE_EXPRESSIONS, //<-\x04\x80
            }

            {
                // 1. payload load 4b @ network header + 16 => reg 1
                struct nlattr {
                    nla_len = 52, //<- "\x34\x00"
                    nla_type = NLA_F_NESTED | NFTA_LIST_ELEM,  //<- \x01\x80
                }
                struct nlattr {
                    nla_len = 12, //<- \x0c\x00
                    nla_type = NFTA_EXPR_NAME, // <- \x01\x00
                }
                data = "payload\0",  // <- "\x70\x61\x79\x6c\x6f\x61\x64\x00",
                padding = "",
                struct nlattr {
                    nla_len = 36, // \x24\x00
                    nla_type = NLA_F_NESTED | NFTA_EXPR_DATA, //<- "\x02\x80"
                }
                struct nlattr {
                    nla_len = 8, // \x08\x00
                    nla_type = NFTA_PAYLOAD_DREG // <- \x01\x00
                }
                data = 1, //<-  "\x00\x00\x00\x01"
                payload = "",
                struct nlattr {
                    nla_len = 8, //<- \x08\x00
                    nla_type = NFTA_PAYLOAD_BASE,//<- \x02\x00
                }
                data = NFT_PAYLOAD_NETWORK_HEADER, //<-\x00\x00\x00\x01
                padding = "",
                struct nlattr {
                    nla_len = 8, //\x08\x00
                    nla_type = NFTA_PAYLOAD_OFFSET ,//\x03\x00
                }
                data = 16, // \x00\x00\x00\x0f,
                padding = "",
                struct nlattr {
                    nla_len = 8, //\x08\x00
                    nla_type = NFTA_PAYLOAD_LEN // \x04\x00
                }
                data = 4, //"\x00\x00\x00\x04"
                padding = "",
            }
            {
                //2. cmp eq reg 1 0x0900007f
                struct nlattr {
                    nla_len = 44, // <-\x2c\x00
                    nla_type = NLA_F_NESTED | NFTA_LIST_ELEM, //<- \x01\x80
                }
                struct nlattr {
                    nla_len = 8, // \x08\x00
                    nla_type = NFTA_EXPR_NAME //\x01\x00
                }
                data = "cmp\0", //"\x63\x6d\x70\x00"
                payload = "",
                struct nlattr {
                    nla_len = 32, // <-\x20\x00
                    nla_type = NLA_F_NESTED | NFTA_EXPR_DATA // \x02\x80
                }
                struct nlattr {
                    nla_len = 8, //\x08\x00
                    nla_type = NFTA_CMP_SREG // \x01\x00
                }
                data = 1, // \x00\x00\x00\x01
                padding = "",
                struct nlattr {
                    nla_len = 8, //\x08\x00,
                    nla_type = NFTA_CMP_OP, //\x02\x00
                }
                data =NFT_CMP_EQ, // = 0 <- "\x00\x00\x00\x00",
                padding = "",
                struct nlattr {
                    nla_len = 12, //\x0c\x00
                    nla_type = NLA_F_NESTED | NFTA_CMP_DATA, //\x03\x80
                }
                struct nlattr {
                    nla_len = 8, //\x08\x00
                    nla_type = NFTA_DATA_VALUE, // \x01\x00
                }
                data = 0x0900007f, //"\x7f\x00\x00\x09"
                padding = "",

            }
            {
                // 3. counter pkts 0 bytes 0 
                struct nlattr {
                    nla_len = 20, //\x14\x00
                    nla_type = NLA_F_NESTED | NFTA_LIST_ELEM, //\x01\x80
                }
                struct nlattr {
                    nla_len = 12, //\x0c\x00
                    nla_type = NFTA_EXPR_NAME, // \x01\x00
                }
                data = "counter\0", // "\x63\x6f\x75\x6e\x74\x65\x72\x00"
                padding = "",
                struct nlattr {
                    nla_len = 4 , // \x04\x00
                    nla_type = NLA_F_NESTED | NFTA_EXPR_DATA, //\x02\x80
                    // nested but len is 4 => no additional data 
                }
            }
        },
        {
            {
                nlmsg_len=20,
                nlmsg_type=NFNL_MSG_BATCH_END, // <- 0x11,
                nlmsg_flags=NLM_F_REQUEST,
                nlmsg_seq=2,
                nlmsg_pid=0},
            struct nfgenmsg { // <- "\x00\x00\x0a\x00"
                nfgen_family=AF_UNSPEC,
                version = NFNETLINK_V0,
                res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
            }
        }
    ], iov_len=196}
  ], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 1