Skip to content

Commit

Permalink
Merge pull request kubernetes#7481 from jackfrancis/vmss-proactive-de…
Browse files Browse the repository at this point in the history
…leting

azure: StrictCacheUpdates to disable proactive vmss cache updates
  • Loading branch information
k8s-ci-robot authored Nov 11, 2024
2 parents adba590 + 1e5ed18 commit 93f74c0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 15 deletions.
7 changes: 7 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ type Config struct {

// (DEPRECATED, DO NOT USE) GetVmssSizeRefreshPeriod (seconds) defines how frequently to call GET VMSS API to fetch VMSS info per nodegroup instance
GetVmssSizeRefreshPeriod int `json:"getVmssSizeRefreshPeriod,omitempty" yaml:"getVmssSizeRefreshPeriod,omitempty"`

// StrictCacheUpdates updates cache values only after positive validation from Azure APIs
StrictCacheUpdates bool `json:"strictCacheUpdates,omitempty" yaml:"strictCacheUpdates,omitempty"`
}

// These are only here for backward compabitility. Their equivalent exists in providerazure.Config with a different name.
Expand Down Expand Up @@ -122,6 +125,7 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
cfg.CloudProviderBackoffJitter = providerazureconsts.BackoffJitterDefault
cfg.VMType = providerazureconsts.VMTypeVMSS
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
cfg.StrictCacheUpdates = false

// Config file overrides defaults
if configReader != nil {
Expand Down Expand Up @@ -247,6 +251,9 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
if _, err = assignBoolFromEnvIfExists(&cfg.EnableForceDelete, "AZURE_ENABLE_FORCE_DELETE"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.StrictCacheUpdates, "AZURE_STRICT_CACHE_UPDATES"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableDynamicInstanceList, "AZURE_ENABLE_DYNAMIC_INSTANCE_LIST"); err != nil {
return nil, err
}
Expand Down
40 changes: 25 additions & 15 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,19 +530,22 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
return rerr.Error()
}

// Proactively decrement scale set size so that we don't
// go below minimum node count if cache data is stale
// only do it for non-unregistered nodes
if !hasUnregisteredNodes {
scaleSet.sizeMutex.Lock()
scaleSet.curSize -= int64(len(instanceIDs))
scaleSet.lastSizeRefresh = time.Now()
scaleSet.sizeMutex.Unlock()
}
if !scaleSet.manager.config.StrictCacheUpdates {
// Proactively decrement scale set size so that we don't
// go below minimum node count if cache data is stale
// only do it for non-unregistered nodes

if !hasUnregisteredNodes {
scaleSet.sizeMutex.Lock()
scaleSet.curSize -= int64(len(instanceIDs))
scaleSet.lastSizeRefresh = time.Now()
scaleSet.sizeMutex.Unlock()
}

// Proactively set the status of the instances to be deleted in cache
for _, instance := range instancesToDelete {
scaleSet.setInstanceStatusByProviderID(instance.Name, cloudprovider.InstanceStatus{State: cloudprovider.InstanceDeleting})
// Proactively set the status of the instances to be deleted in cache
for _, instance := range instancesToDelete {
scaleSet.setInstanceStatusByProviderID(instance.Name, cloudprovider.InstanceStatus{State: cloudprovider.InstanceDeleting})
}
}

go scaleSet.waitForDeleteInstances(future, requiredIds)
Expand All @@ -558,11 +561,18 @@ func (scaleSet *ScaleSet) waitForDeleteInstances(future *azure.Future, requiredI
isSuccess, err := isSuccessHTTPResponse(httpResponse, err)
if isSuccess {
klog.V(3).Infof(".WaitForDeleteInstancesResult(%v) for %s success", requiredIds.InstanceIds, scaleSet.Name)
// No need to invalidateInstanceCache because instanceStates were proactively set to "deleting"
if scaleSet.manager.config.StrictCacheUpdates {
if err := scaleSet.manager.forceRefresh(); err != nil {
klog.Errorf("forceRefresh failed with error: %v", err)
}
scaleSet.invalidateInstanceCache()
}
return
}
// On failure, invalidate the instanceCache - cannot have instances in deletingState
scaleSet.invalidateInstanceCache()
if !scaleSet.manager.config.StrictCacheUpdates {
// On failure, invalidate the instanceCache - cannot have instances in deletingState
scaleSet.invalidateInstanceCache()
}
klog.Errorf("WaitForDeleteInstancesResult(%v) for %s failed with error: %v", requiredIds.InstanceIds, scaleSet.Name, err)
}

Expand Down

0 comments on commit 93f74c0

Please sign in to comment.