Skip to content

Commit

Permalink
Merge pull request #5346 from edwintorok/private/edvint/default
Browse files Browse the repository at this point in the history
fix(NUMA): 'default' is a keyword in some SDK languages
  • Loading branch information
edwintorok authored Jan 10, 2024
2 parents 02a1214 + e719b06 commit 62de133
Show file tree
Hide file tree
Showing 10 changed files with 42 additions and 28 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ jobs:
opam exec -- make install DESTDIR=$(mktemp -d)
opam exec -- make install DESTDIR=$(mktemp -d) BUILD_PY2=NO
- name: Sanity test SDK
run: |
opam exec -- make sdksanity
- name: Uninstall unversioned packages and remove pins
# This should purge them from the cache, unversioned package have
# 'master' as its version
Expand Down
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ sdk:
@ocaml/sdk-gen/csharp/generate \
@ocaml/sdk-gen/java/generate \
@ocaml/sdk-gen/powershell/generate
rm -rf $(XAPISDK)
mkdir -p $(XAPISDK)/c
mkdir -p $(XAPISDK)/csharp
mkdir -p $(XAPISDK)/java
Expand All @@ -79,6 +80,12 @@ sdk:
sh ocaml/sdk-gen/windows-line-endings.sh $(XAPISDK)/csharp
sh ocaml/sdk-gen/windows-line-endings.sh $(XAPISDK)/powershell

# workaround for no .resx generation, just for compilation testing
sdksanity: sdk
sed -i 's/FriendlyErrorNames.ResourceManager/null/g' ./_build/install/default/xapi/sdk/csharp/src/Failure.cs
cd _build/install/default/xapi/sdk/csharp/src && dotnet add package Newtonsoft.Json && dotnet build -f netstandard2.0


python:
$(MAKE) -C scripts/examples/python build

Expand Down
20 changes: 10 additions & 10 deletions doc/content/toolstack/features/NUMA/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ An optimal allocation would require knowing what VMs you would start/create in t
Overall we want to balance the VMs across NUMA nodes, such that we use all NUMA nodes to take advantage of the maximum memory bandwidth available on the system.
For now this proposed balancing will be done only by balancing memory usage: always heuristically allocating VMs on the NUMA node that has the most available memory.
Note that this allocation has a race condition for now when multiple VMs are booted in parallel, because we don't wait until Xen has constructed the domain for each one (that'd serialize domain construction, which is currently parallel).
This may be improved in the future by having an API to query Xen where it has allocated the memory, and to explicitly ask it to place memory on a given NUMA node (instead of best-effort).
This may be improved in the future by having an API to query Xen where it has allocated the memory, and to explicitly ask it to place memory on a given NUMA node (instead of best_effort).

If a VM doesn't fit into a single node then it is not so clear what the best approach is.
One criteria to consider is minimizing the NUMA distance between the nodes chosen for the VM.
Expand Down Expand Up @@ -128,23 +128,23 @@ See page 13 in [^AMD_numa] for a diagram of an AMD Opteron 6272 system.
## XAPI datamodel design

* New API field: `Host.numa_affinity_policy`.
* Choices: `default`, `any`, `best-effort`.
* On upgrade the field is set to `default`
* Choices: `default_policy`, `any`, `best_effort`.
* On upgrade the field is set to `default_policy`
* Changes in the field only affect newly (re)booted VMs, for changes to take effect on existing VMs a host evacuation or reboot is needed

There may be more choices in the future (e.g. `strict`, which requires both Xen and toolstack changes).

Meaning of the policy:
* `any`: the Xen default where it allocated memory by striping across NUMA nodes
* `best-effort`: the algorithm described in this document, where soft pinning is used to achieve better balancing and lower latency
* `default`: when the admin hasn't expressed a preference
* `best_effort`: the algorithm described in this document, where soft pinning is used to achieve better balancing and lower latency
* `default_policy`: when the admin hasn't expressed a preference

* Currently `default` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades.
If we didn't have a `default` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best-effort`
* Future XAPI versions may change `default` to mean `best-effort`.
* Currently `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades.
If we didn't have a `default_policy` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best_effort`
* Future XAPI versions may change `default_policy` to mean `best_effort`.
Admins can still override it to `any` if they wish on a host by host basis.

It is not expected that users would have to change `best-effort`, unless they run very specific workloads, so a pool level control is not provided at this moment.
It is not expected that users would have to change `best_effort`, unless they run very specific workloads, so a pool level control is not provided at this moment.

There is also no separate feature flag: this host flag acts as a feature flag that can be set through the API without restarting the toolstack.
Although obviously only new VMs will benefit.
Expand Down Expand Up @@ -173,7 +173,7 @@ Tests are in [test_topology.ml] which checks balancing properties and whether th

## Future work

* enable 'best-effort' mode by default once more testing has been done
* enable 'best_effort' mode by default once more testing has been done
* an API to query Xen where it has actually allocated the VM's memory.
Currently only an `xl debug-keys` interface exists which is not supported in production as it can result in killing the host via the watchdog, and is not a proper API, but a textual debug output with no stability guarantees.
* more host policies (e.g. `strict`).
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/datamodel_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ open Datamodel_roles
to leave a gap for potential hotfixes needing to increment the schema version.*)
let schema_major_vsn = 5

let schema_minor_vsn = 770
let schema_minor_vsn = 771

(* Historical schema versions just in case this is useful later *)
let rio_schema_major_vsn = 5
Expand Down
7 changes: 4 additions & 3 deletions ocaml/idl/datamodel_host.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1673,12 +1673,12 @@ let host_numa_affinity_policy =
( "host_numa_affinity_policy"
, [
("any", "VMs are spread across all available NUMA nodes")
; ( "best-effort"
; ( "best_effort"
, "VMs are placed on the smallest number of NUMA nodes that they fit \
using soft-pinning, but the policy doesn't guarantee a balanced \
placement, falling back to the 'any' policy."
)
; ( "default"
; ( "default_policy"
, "Use the NUMA affinity policy that is the default for the current \
version"
)
Expand Down Expand Up @@ -2196,7 +2196,8 @@ let t =
"Default as 'unknown', 'yes' if the host is up to date with \
updates synced from remote CDN, otherwise 'no'"
; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:host_numa_affinity_policy
"numa_affinity_policy" ~default_value:(Some (VEnum "default"))
"numa_affinity_policy"
~default_value:(Some (VEnum "default_policy"))
"NUMA-aware VM memory and vCPU placement policy"
]
)
Expand Down
2 changes: 1 addition & 1 deletion ocaml/idl/schematest.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex
(* BEWARE: if this changes, check that schema has been bumped accordingly in
ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *)

let last_known_schema_hash = "2ec2aeef2405d6bd73ab6beb44185532"
let last_known_schema_hash = "29a820cd4c81617f07b43c9b4f934317"

let current_schema_hash : string =
let open Datamodel_types in
Expand Down
2 changes: 1 addition & 1 deletion ocaml/tests/common/test_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ())
~display:`enabled ~virtual_hardware_platform_versions:[]
~control_domain:Ref.null ~updates_requiring_reboot:[] ~iscsi_iqn:""
~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[]
~tls_verification_enabled ~numa_affinity_policy:`default
~tls_verification_enabled ~numa_affinity_policy:`default_policy
~last_software_update:(Xapi_host.get_servertime ~__context ~host:ref)
~recommended_guidances:[] ~latest_synced_updates_applied:`unknown ;
ref
Expand Down
20 changes: 11 additions & 9 deletions ocaml/xapi-cli-server/record_util.ml
Original file line number Diff line number Diff line change
Expand Up @@ -709,21 +709,23 @@ let host_sched_gran_to_string = function
let host_numa_affinity_policy_to_string = function
| `any ->
"any"
| `besteffort ->
"best-effort"
| `default ->
"default"
| `best_effort ->
"best_effort"
| `default_policy ->
"default_policy"

let host_numa_affinity_policy_of_string = function
| "any" ->
`any
| "best-effort" ->
`besteffort
| "default" ->
`default
| "best_effort" ->
`best_effort
| "default_policy" ->
`default_policy
| s ->
raise
(Record_failure ("Expected 'any', 'best-effort' or 'default', got " ^ s))
(Record_failure
("Expected 'any', 'best_effort' or 'default_policy', got " ^ s)
)

let pgpu_dom0_access_to_string x = host_display_to_string x

Expand Down
2 changes: 1 addition & 1 deletion ocaml/xapi/xapi_host.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address
~cpu_configuration:[] (* !!! FIXME hard coding *)
~cpu_info:[] ~chipset_info ~memory_overhead:0L
~sched_policy:"credit" (* !!! FIXME hard coding *)
~numa_affinity_policy:`default
~numa_affinity_policy:`default_policy
~supported_bootloaders:(List.map fst Xapi_globs.supported_bootloaders)
~suspend_image_sr:Ref.null ~crash_dump_sr:Ref.null ~logging:[] ~hostname
~address ~metrics ~license_params ~boot_free_mem:0L ~ha_statefiles:[]
Expand Down
4 changes: 2 additions & 2 deletions ocaml/xapi/xapi_xenops.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3125,9 +3125,9 @@ let set_numa_affinity_policy ~__context ~value =
match value with
| `any ->
Any
| `besteffort ->
| `best_effort ->
Best_effort
| `default ->
| `default_policy ->
Any
in
Client.HOST.set_numa_affinity_policy dbg value
Expand Down

0 comments on commit 62de133

Please sign in to comment.