Skip to content

Commit

Permalink
Merge branch 'bpf-next/master' into for-next
Browse files Browse the repository at this point in the history
Signed-off-by: Alexei Starovoitov <[email protected]>
  • Loading branch information
Alexei Starovoitov committed Oct 24, 2024
2 parents 81a132b + c6fb803 commit 2164cae
Show file tree
Hide file tree
Showing 25 changed files with 1,015 additions and 95 deletions.
25 changes: 25 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ enum btf_field_type {
BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
BPF_REFCOUNT = (1 << 9),
BPF_WORKQUEUE = (1 << 10),
BPF_UPTR = (1 << 11),
};

typedef void (*btf_dtor_kfunc_t)(void *);
Expand Down Expand Up @@ -322,6 +323,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "kptr";
case BPF_KPTR_PERCPU:
return "percpu_kptr";
case BPF_UPTR:
return "uptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
Expand Down Expand Up @@ -350,6 +353,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
case BPF_UPTR:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
Expand Down Expand Up @@ -379,6 +383,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
case BPF_UPTR:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
Expand Down Expand Up @@ -419,6 +424,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
case BPF_UPTR:
break;
default:
WARN_ON_ONCE(1);
Expand Down Expand Up @@ -507,6 +513,25 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src
bpf_obj_memcpy(map->record, dst, src, map->value_size, true);
}

static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src)
{
unsigned long *src_uptr, *dst_uptr;
const struct btf_field *field;
int i;

if (!btf_record_has_field(rec, BPF_UPTR))
return;

for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) {
if (field->type != BPF_UPTR)
continue;

src_uptr = src + field->offset;
dst_uptr = dst + field->offset;
swap(*src_uptr, *dst_uptr);
}
}

static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size)
{
u32 curr_off = 0;
Expand Down
12 changes: 9 additions & 3 deletions include/linux/bpf_local_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,13 @@ struct bpf_local_storage_elem {
struct hlist_node map_node; /* Linked to bpf_local_storage_map */
struct hlist_node snode; /* Linked to bpf_local_storage */
struct bpf_local_storage __rcu *local_storage;
struct rcu_head rcu;
union {
struct rcu_head rcu;
struct hlist_node free_node; /* used to postpone
* bpf_selem_free
* after raw_spin_unlock
*/
};
/* 8 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
Expand Down Expand Up @@ -181,7 +187,7 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,

struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
bool charge_mem, gfp_t gfp_flags);
bool charge_mem, bool swap_uptrs, gfp_t gfp_flags);

void bpf_selem_free(struct bpf_local_storage_elem *selem,
struct bpf_local_storage_map *smap,
Expand All @@ -195,7 +201,7 @@ bpf_local_storage_alloc(void *owner,

struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags, gfp_t gfp_flags);
void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags);

u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map);

Expand Down
4 changes: 2 additions & 2 deletions kernel/bpf/bpf_cgrp_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,

bpf_cgrp_storage_lock();
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, map_flags, GFP_ATOMIC);
value, map_flags, false, GFP_ATOMIC);
bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return PTR_ERR_OR_ZERO(sdata);
Expand Down Expand Up @@ -181,7 +181,7 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, BPF_NOEXIST, gfp_flags);
value, BPF_NOEXIST, false, gfp_flags);

unlock:
bpf_cgrp_storage_unlock();
Expand Down
4 changes: 2 additions & 2 deletions kernel/bpf/bpf_inode_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,

sdata = bpf_local_storage_update(file_inode(fd_file(f)),
(struct bpf_local_storage_map *)map,
value, map_flags, GFP_ATOMIC);
value, map_flags, false, GFP_ATOMIC);
return PTR_ERR_OR_ZERO(sdata);
}

Expand Down Expand Up @@ -154,7 +154,7 @@ BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
sdata = bpf_local_storage_update(
inode, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, gfp_flags);
BPF_NOEXIST, false, gfp_flags);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
Expand Down
79 changes: 63 additions & 16 deletions kernel/bpf/bpf_local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)

struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
void *value, bool charge_mem, gfp_t gfp_flags)
void *value, bool charge_mem, bool swap_uptrs, gfp_t gfp_flags)
{
struct bpf_local_storage_elem *selem;

Expand All @@ -99,9 +99,12 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
}

if (selem) {
if (value)
if (value) {
/* No need to call check_and_init_map_value as memory is zero init */
copy_map_value(&smap->map, SDATA(selem)->data, value);
/* No need to call check_and_init_map_value as memory is zero init */
if (swap_uptrs)
bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value);
}
return selem;
}

Expand Down Expand Up @@ -209,8 +212,12 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem,
static void bpf_selem_free_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map *smap;

selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
bpf_mem_cache_raw_free(selem);
}

Expand All @@ -226,23 +233,52 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
struct bpf_local_storage_map *smap,
bool reuse_now)
{
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);

if (!smap->bpf_ma) {
/* Only task storage has uptrs and task storage
* has moved to bpf_mem_alloc. Meaning smap->bpf_ma == true
* for task storage, so this bpf_obj_free_fields() won't unpin
* any uptr.
*/
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}

if (!reuse_now) {
call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
} else {
if (reuse_now) {
/* reuse_now == true only happens when the storage owner
* (e.g. task_struct) is being destructed or the map itself
* is being destructed (ie map_free). In both cases,
* no bpf prog can have a hold on the selem. It is
* safe to unpin the uptrs and free the selem now.
*/
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
/* Instead of using the vanilla call_rcu(),
* bpf_mem_cache_free will be able to reuse selem
* immediately.
*/
migrate_disable();
bpf_mem_cache_free(&smap->selem_ma, selem);
migrate_enable();
return;
}

call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
}

static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
{
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map *smap;
struct hlist_node *n;

/* The "_safe" iteration is needed.
* The loop is not removing the selem from the list
* but bpf_selem_free will use the selem->rcu_head
* which is union-ized with the selem->free_node.
*/
hlist_for_each_entry_safe(selem, n, list, free_node) {
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
bpf_selem_free(selem, smap, reuse_now);
}
}

Expand All @@ -252,7 +288,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
*/
static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem,
bool uncharge_mem, bool reuse_now)
bool uncharge_mem, struct hlist_head *free_selem_list)
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
Expand Down Expand Up @@ -296,7 +332,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);

bpf_selem_free(selem, smap, reuse_now);
hlist_add_head(&selem->free_node, free_selem_list);

if (rcu_access_pointer(local_storage->smap) == smap)
RCU_INIT_POINTER(local_storage->smap, NULL);
Expand Down Expand Up @@ -345,6 +381,7 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
struct bpf_local_storage_map *storage_smap;
struct bpf_local_storage *local_storage;
bool bpf_ma, free_local_storage = false;
HLIST_HEAD(selem_free_list);
unsigned long flags;

if (unlikely(!selem_linked_to_storage_lockless(selem)))
Expand All @@ -360,9 +397,11 @@ static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true, reuse_now);
local_storage, selem, true, &selem_free_list);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);

bpf_selem_free_list(&selem_free_list, reuse_now);

if (free_local_storage)
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
}
Expand Down Expand Up @@ -524,11 +563,12 @@ int bpf_local_storage_alloc(void *owner,
*/
struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
void *value, u64 map_flags, gfp_t gfp_flags)
void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags)
{
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
struct bpf_local_storage *local_storage;
HLIST_HEAD(old_selem_free_list);
unsigned long flags;
int err;

Expand All @@ -550,7 +590,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (err)
return ERR_PTR(err);

selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags);
if (!selem)
return ERR_PTR(-ENOMEM);

Expand Down Expand Up @@ -584,7 +624,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
/* A lookup has just been done before and concluded a new selem is
* needed. The chance of an unnecessary alloc is unlikely.
*/
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags);
if (!alloc_selem)
return ERR_PTR(-ENOMEM);

Expand Down Expand Up @@ -624,11 +664,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (old_sdata) {
bpf_selem_unlink_map(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
true, false);
true, &old_selem_free_list);
}

unlock:
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_selem_free_list(&old_selem_free_list, false);
if (alloc_selem) {
mem_uncharge(smap, owner, smap->elem_size);
bpf_selem_free(alloc_selem, smap, true);
Expand Down Expand Up @@ -706,6 +747,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
struct bpf_local_storage_map *storage_smap;
struct bpf_local_storage_elem *selem;
bool bpf_ma, free_storage = false;
HLIST_HEAD(free_selem_list);
struct hlist_node *n;
unsigned long flags;

Expand Down Expand Up @@ -734,10 +776,12 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
* of the loop will set the free_cgroup_storage to true.
*/
free_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true, true);
local_storage, selem, true, &free_selem_list);
}
raw_spin_unlock_irqrestore(&local_storage->lock, flags);

bpf_selem_free_list(&free_selem_list, true);

if (free_storage)
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
}
Expand Down Expand Up @@ -883,6 +927,9 @@ void bpf_local_storage_map_free(struct bpf_map *map,
synchronize_rcu();

if (smap->bpf_ma) {
rcu_barrier_tasks_trace();
if (!rcu_trace_implies_rcu_gp())
rcu_barrier();
bpf_mem_alloc_destroy(&smap->selem_ma);
bpf_mem_alloc_destroy(&smap->storage_ma);
}
Expand Down
7 changes: 5 additions & 2 deletions kernel/bpf/bpf_task_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
struct pid *pid;
int fd, err;

if ((map_flags & BPF_F_LOCK) && btf_record_has_field(map->record, BPF_UPTR))
return -EOPNOTSUPP;

fd = *(int *)key;
pid = pidfd_get_pid(fd, &f_flags);
if (IS_ERR(pid))
Expand All @@ -147,7 +150,7 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags,
GFP_ATOMIC);
true, GFP_ATOMIC);
bpf_task_storage_unlock();

err = PTR_ERR_OR_ZERO(sdata);
Expand Down Expand Up @@ -219,7 +222,7 @@ static void *__bpf_task_storage_get(struct bpf_map *map,
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, gfp_flags);
BPF_NOEXIST, false, gfp_flags);
return IS_ERR(sdata) ? NULL : sdata->data;
}

Expand Down
Loading

0 comments on commit 2164cae

Please sign in to comment.