N5.3 Adding tables, chains, rules (userspace)
netlink messages are used to setup tables, chains, etc from userspace.
Let us dissect the netlink message that is sent by nft by running strace . Some parts of the netlink message that are not parsed by strace, have also been formatted here.
$ strace -f nft add table T2
nft creates a netlink socket and first sends a request to get rule-set generation NFT_MSG_GETGEN. In response, we receive the generation, which in this case is 3.
socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER) = 3
sendto(3,
{
struct nlmsghdr {
nlmsg_len=20,
nlmsg_type=NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_GETGEN, //<- 0xa10
nlmsg_flags=NLM_F_REQUEST,
nlmsg_seq=0, nlmsg_pid=0
},
struct nfgenmsg{ // <- "\x00\x00\x00\x00"
nfgen_family=AF_UNSPEC,
version = NFNETLINK_V0,
res_id = 0,
}
},20,
0,{sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000},12) = 20
recvmsg(3,
{
msg_name={
sa_family=AF_NETLINK,
nl_pid=0,
nl_groups=00000000
},
msg_namelen=12,
msg_iov=[
{
iov_base={
struct nlmsghdr {
nlmsg_len=44,
nlmsg_type=NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_NEWGEN //<- 0xa0f
nlmsg_flags=0,
nlmsg_seq=0,
nlmsg_pid=214676 // pid
},
struct nfgenmsg{ // <- "\x00\x00\x00\x03"
nfgen_family=AF_UNSPEC,
version = NFNETLINK_V0,
(be16) res_id = 0x0003, // nft_base_seq(net)
},
struct nlattr {
nla_len = 8, //<- \x08\x00
nla_type = NFTA_GEN_ID, // <- \x01\x00
},
data = htonl(nft_net->base_seq) "\x00\x00\x00\x03",
padding = "",
struct nlattr {
nla_len = 8, //<- \x08\x00
nla_type = NFTA_GEN_PROC_PID, // <- "\x02\x00"
},
data = htonl(pid), // <- htonl(214676) = "\x00\x03\x46\x94",
padding = "",
struct nlattr {
nla_len = 8, //<- \x08\x00
nla_type = NFTA_GEN_PROC_PID, // <- "\x03\x00"
},
data="nft\0", // <-"\x6e\x66\x74\x00"
padding = "",
},
iov_len=69631
}
],
msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 44
netfilter stores the generation number in struct nftables_pernet , which is data that is maintained per network namespace.
struct nftables_pernet *nft_net = nft_pernet(net);
struct nftables_pernet {
struct list_head tables; // list of tables
struct list_head commit_list; // list of transactions that are pending
...
struct mutex commit_mutex;
unsigned int base_seq; // generation number
};
Next, nft constructs and sends a netlink message to setup the table. netlink messages have sections each with a netlink message header and it's attributes. nlmsghdr->nlmsg_type lets the kernel handle the section accordingly. In the below message we have the following sections:
NFNL_MSG_BATCH_BEGIN: begin a batch of changes to the rulesetNFT_MSG_NEWTABLE: create a new table, with attributes:NFTA_TABLE_NAME:"T2\0"NFTA_TABLE_FLAGS: 0
NFNL_MSG_BATCH_END: end of batch. Now commit these changes (which happens atomically as we will see in a later section).
sendmsg(3,
{
msg_name={
sa_family=AF_NETLINK,
nl_pid=0,
nl_groups=00000000
},
msg_namelen=12,
msg_iov=[
{
iov_base=[
{
struct nlmsghdr {
nlmsg_len=20,
nlmsg_type=NFNL_MSG_BATCH_BEGIN, //<- 0x10
nlmsg_flags=NLM_F_REQUEST,
nlmsg_seq=0,
nlmsg_pid=0
},
struct nfgenmsg{ // <- "\x00\x00\x0a\x00"
nfgen_family=AF_UNSPEC,
version = NFNETLINK_V0,
res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
}
},
{
struct nlmsghdr {
nlmsg_len=36,
nlmsg_type=NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_NEWTABLE, // <- 0xa00
nlmsg_flags=NLM_F_REQUEST,
nlmsg_seq=1,
nlmsg_pid=0
},
struct nfgenmsg{ // <- "\x02\x00\x00\x00"
nfgen_family=AF_INET,
version = NFNETLINK_V0,
res_id = 0, /*resource id */
}
struct nlattr {
nla_len = 7, // <- "\x07\x00"
nla_type = NFTA_TABLE_NAME, //<- "\x01\x00"
},
data = "T2\0", // <- "\x54\x32\x00"
padding = "\x00", // 4 byte alignment
struct nlattr {
nla_len = 8, // <- "\x08\x00"
nla_type = NFTA_TABLE_FLAGS // "\x02\x00"
},
(u32) flags = 0, // <- \x00\x00\x00\x00",
},
{
struct nlmsghdr {
nlmsg_len=20,
nlmsg_type=NFNL_MSG_BATCH_END,
nlmsg_flags=NLM_F_REQUEST,
nlmsg_seq=2,
nlmsg_pid=0,
},
struct nfgenmsg{ // <- "\x00\x00\x0a\x00"
nfgen_family=AF_UNSPEC,
version = NFNETLINK_V0,
res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
}
}
],
iov_len=76
}
],
msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 76
Though I wont show strace output, to setup a chain will have a netlink message with NFT_MSG_NEWCHAIN and attributes NFTA_CHAIN_TABLE, NFTA_CHAIN_NAME , NFTA_CHAIN_HOOK andNFTA_CHAIN_POLICY.
And to setup a rule nft will send a message with NFT_MSG_NEWRULE and attributes NFTA_RULE_TABLE, NFTA_RULE_CHAIN and most importantly NFTA_RULE_EXPRESSIONS which is shown below.
Note: use --string-limit so strace dumps the complete msg data.
strace -f --string-limit=1024 -o one.txt nft add rule T C ip daddr 127.0.0.9 counter
sendmsg(3,
{msg_name=
{sa_family=AF_NETLINK,
nl_pid=0,
nl_groups=00000000},
msg_namelen=12,
msg_iov=[
{iov_base=[
{
struct nlmsghdr {
nlmsg_len=20,
nlmsg_type=NFNL_MSG_BATCH_BEGIN, // <- 0x10,
nlmsg_flags=NLM_F_REQUEST,
nlmsg_seq=0,
nlmsg_pid=0},
struct nfgenmsg { // <- "\x00\x00\x0a\x00"
nfgen_family=AF_UNSPEC,
version = NFNETLINK_V0,
res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
}
},
{
struct nlmsghdr {
nlmsg_len=156,
nlmsg_type= NFNL_SUBSYS_NFTABLES << 8 | NFT_MSG_NEWRULE, // <- 0xa06
nlmsg_flags=NLM_F_REQUEST|0xc00,
nlmsg_seq=1,
nlmsg_pid=0
},
struct nfgenmsg { // <- "\x02\x00\x00\x00"
nfgen_family=AF_INET,
version = NFNETLINK_V0,
res_id = 0,
},
struct nlattr {
nla_len = 6, // "\x06\x00"
nla_type = NFTA_RULE_TABLE, // <- "\x01\x00"
},
data = "T\0", // "\x54\x00
padding = "\x00\x00",
struct nlattr {
nla_len = 6, // <- \x06\x00
nla_type = NFTA_RULE_CHAIN, // <- "\x02\x00"
},
data = "C\0", // \x43\x00
padding = "\x00\x00",
struct nlattr {
nla_len = 120, //<- \x78\x00,
nla_type = NLA_F_NESTED | NFTA_RULE_EXPRESSIONS, //<-\x04\x80
}
{
// 1. payload load 4b @ network header + 16 => reg 1
struct nlattr {
nla_len = 52, //<- "\x34\x00"
nla_type = NLA_F_NESTED | NFTA_LIST_ELEM, //<- \x01\x80
}
struct nlattr {
nla_len = 12, //<- \x0c\x00
nla_type = NFTA_EXPR_NAME, // <- \x01\x00
}
data = "payload\0", // <- "\x70\x61\x79\x6c\x6f\x61\x64\x00",
padding = "",
struct nlattr {
nla_len = 36, // \x24\x00
nla_type = NLA_F_NESTED | NFTA_EXPR_DATA, //<- "\x02\x80"
}
struct nlattr {
nla_len = 8, // \x08\x00
nla_type = NFTA_PAYLOAD_DREG // <- \x01\x00
}
data = 1, //<- "\x00\x00\x00\x01"
payload = "",
struct nlattr {
nla_len = 8, //<- \x08\x00
nla_type = NFTA_PAYLOAD_BASE,//<- \x02\x00
}
data = NFT_PAYLOAD_NETWORK_HEADER, //<-\x00\x00\x00\x01
padding = "",
struct nlattr {
nla_len = 8, //\x08\x00
nla_type = NFTA_PAYLOAD_OFFSET ,//\x03\x00
}
data = 16, // \x00\x00\x00\x0f,
padding = "",
struct nlattr {
nla_len = 8, //\x08\x00
nla_type = NFTA_PAYLOAD_LEN // \x04\x00
}
data = 4, //"\x00\x00\x00\x04"
padding = "",
}
{
//2. cmp eq reg 1 0x0900007f
struct nlattr {
nla_len = 44, // <-\x2c\x00
nla_type = NLA_F_NESTED | NFTA_LIST_ELEM, //<- \x01\x80
}
struct nlattr {
nla_len = 8, // \x08\x00
nla_type = NFTA_EXPR_NAME //\x01\x00
}
data = "cmp\0", //"\x63\x6d\x70\x00"
payload = "",
struct nlattr {
nla_len = 32, // <-\x20\x00
nla_type = NLA_F_NESTED | NFTA_EXPR_DATA // \x02\x80
}
struct nlattr {
nla_len = 8, //\x08\x00
nla_type = NFTA_CMP_SREG // \x01\x00
}
data = 1, // \x00\x00\x00\x01
padding = "",
struct nlattr {
nla_len = 8, //\x08\x00,
nla_type = NFTA_CMP_OP, //\x02\x00
}
data =NFT_CMP_EQ, // = 0 <- "\x00\x00\x00\x00",
padding = "",
struct nlattr {
nla_len = 12, //\x0c\x00
nla_type = NLA_F_NESTED | NFTA_CMP_DATA, //\x03\x80
}
struct nlattr {
nla_len = 8, //\x08\x00
nla_type = NFTA_DATA_VALUE, // \x01\x00
}
data = 0x0900007f, //"\x7f\x00\x00\x09"
padding = "",
}
{
// 3. counter pkts 0 bytes 0
struct nlattr {
nla_len = 20, //\x14\x00
nla_type = NLA_F_NESTED | NFTA_LIST_ELEM, //\x01\x80
}
struct nlattr {
nla_len = 12, //\x0c\x00
nla_type = NFTA_EXPR_NAME, // \x01\x00
}
data = "counter\0", // "\x63\x6f\x75\x6e\x74\x65\x72\x00"
padding = "",
struct nlattr {
nla_len = 4 , // \x04\x00
nla_type = NLA_F_NESTED | NFTA_EXPR_DATA, //\x02\x80
// nested but len is 4 => no additional data
}
}
},
{
{
nlmsg_len=20,
nlmsg_type=NFNL_MSG_BATCH_END, // <- 0x11,
nlmsg_flags=NLM_F_REQUEST,
nlmsg_seq=2,
nlmsg_pid=0},
struct nfgenmsg { // <- "\x00\x00\x0a\x00"
nfgen_family=AF_UNSPEC,
version = NFNETLINK_V0,
res_id = NFNL_SUBSYS_NFTABLES, /*resource id */
}
}
], iov_len=196}
], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 1