diff --git a/pkg/scheduler/actions/allocate/allocate.go b/pkg/scheduler/actions/allocate/allocate.go index de9f8fd528..676281e761 100644 --- a/pkg/scheduler/actions/allocate/allocate.go +++ b/pkg/scheduler/actions/allocate/allocate.go @@ -17,7 +17,6 @@ package allocate import ( - "sort" "time" "k8s.io/klog/v2" @@ -34,7 +33,6 @@ type Action struct { session *framework.Session // configured flag for error cache enablePredicateErrorCache bool - hyperNodesTiers []int // hyperNodeScoresByJob stores job total score for all available hyperNodes, this is used for accumulate // all nodes' scores in each available hyperNode only when job has hard network topology constrains @@ -45,7 +43,6 @@ type Action struct { func New() *Action { return &Action{ enablePredicateErrorCache: true, // default to enable it - hyperNodesTiers: []int{}, hyperNodeScoresByJob: make(map[string]map[string]float64), } } @@ -61,26 +58,11 @@ func (alloc *Action) parseArguments(ssn *framework.Session) { arguments.GetBool(&alloc.enablePredicateErrorCache, conf.EnablePredicateErrCacheKey) } -func (alloc *Action) parseHyperNodesTiers(ssn *framework.Session) { - if ssn.HyperNodesSetByTier == nil || len(ssn.HyperNodesSetByTier) == 0 { - return - } - - // sort to guarantee the traverse order is from down to top. - var tiers []int - for tier := range ssn.HyperNodesSetByTier { - tiers = append(tiers, tier) - } - sort.Ints(tiers) - alloc.hyperNodesTiers = tiers -} - func (alloc *Action) Execute(ssn *framework.Session) { klog.V(5).Infof("Enter Allocate ...") defer klog.V(5).Infof("Leaving Allocate ...") alloc.parseArguments(ssn) - alloc.parseHyperNodesTiers(ssn) // the allocation for pod may have many stages // 1. pick a queue named Q (using ssn.QueueOrderFn) @@ -241,7 +223,7 @@ func (alloc *Action) allocateResourceForTasksWithTopology(tasks *util.PriorityQu jobAllocatedHyperNode := job.PodGroup.Annotations[api.JobAllocatedHyperNode] // Find a suitable hyperNode in one tier from down to top everytime to ensure that the selected hyperNode spans the least tier. - for _, tier := range alloc.hyperNodesTiers { + for _, tier := range ssn.HyperNodesTiers { if tier > highestAllowedTier { klog.V(4).ErrorS(nil, "Skip search for higher tier cause highest allowed tier reached", "jobName", job.UID, "highestAllowedTier", highestAllowedTier, "tier", tier) break diff --git a/pkg/scheduler/actions/allocate/allocate_test.go b/pkg/scheduler/actions/allocate/allocate_test.go index a04a54345a..e5ed2a506d 100644 --- a/pkg/scheduler/actions/allocate/allocate_test.go +++ b/pkg/scheduler/actions/allocate/allocate_test.go @@ -620,6 +620,132 @@ func TestAllocateWithNetWorkTopologies(t *testing.T) { ExpectBindsNum: 1, MinimalBindCheck: true, }, + { + Name: "hard network topology constrain and tasks in job rescheduled, can allocate job when highestTierAllowed not reached and hyperNodesInfo has three tier", + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 2, nil, schedulingv1.PodGroupInqueue, "hard", 2), + }, + Pods: []*v1.Pod{ + // should use different role, because allocate actions default to enable the role caches when predicate + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectBindsNum: 1, + MinimalBindCheck: true, + }, { Name: "hard network topology constrain and tasks in job rescheduled, can not allocate job when cross highestTierAllowed tier", PodGroups: []*schedulingv1.PodGroup{ @@ -687,6 +813,132 @@ func TestAllocateWithNetWorkTopologies(t *testing.T) { ExpectBindsNum: 0, MinimalBindCheck: true, }, + { + Name: "hard network topology constrain and tasks in job rescheduled, can not allocate job when cross highestTierAllowed tier and hyperNodesInfo has three tier", + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 2, nil, schedulingv1.PodGroupInqueue, "hard", 1), + }, + Pods: []*v1.Pod{ + // should use different role, because allocate actions default to enable the role caches when predicate + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + ExpectBindsNum: 0, + MinimalBindCheck: true, + }, { Name: "hard network topology constrain and tasks in job rescheduled, can not allocate job when LCAHyperNode is empty", PodGroups: []*schedulingv1.PodGroup{ diff --git a/pkg/scheduler/conf/scheduler_conf.go b/pkg/scheduler/conf/scheduler_conf.go index e911e0b69e..182501ed8f 100644 --- a/pkg/scheduler/conf/scheduler_conf.go +++ b/pkg/scheduler/conf/scheduler_conf.go @@ -87,6 +87,8 @@ type PluginOption struct { EnabledOverused *bool `yaml:"enabledOverused"` // EnabledAllocatable defines whether allocatable is enabled EnabledAllocatable *bool `yaml:"enabledAllocatable"` + // EnabledNetworkTopology defines whether network topology is enabled + EnabledNetworkTopology *bool `yaml:"enabledNetworkTopology"` // Arguments defines the different arguments that can be given to different plugins Arguments map[string]interface{} `yaml:"arguments"` } diff --git a/pkg/scheduler/framework/session.go b/pkg/scheduler/framework/session.go index 338b29d88f..58c771f0ac 100644 --- a/pkg/scheduler/framework/session.go +++ b/pkg/scheduler/framework/session.go @@ -19,6 +19,7 @@ package framework import ( "context" "fmt" + "sort" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -82,6 +83,7 @@ type Session struct { // have the same topology domain, e.g., nodes under the same switch or tor, jobs allocated in the same // hyperNode can gain a better performance, the lower the tier of hyperNode, the better performance. HyperNodesSetByTier map[int]sets.Set[string] + HyperNodesTiers []int // RealNodesList maps hyperNode Name -> nodes under the hyperNode. RealNodesList map[string][]*api.NodeInfo HyperNodesReadyToSchedule bool @@ -202,6 +204,7 @@ func openSession(cache cache.Cache) *Session { ssn.NodeList = util.GetNodeList(snapshot.Nodes, snapshot.NodeList) ssn.HyperNodes = snapshot.HyperNodes ssn.HyperNodesSetByTier = snapshot.HyperNodesSetByTier + parseHyperNodesTiers(ssn) ssn.RealNodesList = util.GetRealNodesListByHyperNode(snapshot.RealNodesSet, snapshot.Nodes) ssn.HyperNodesReadyToSchedule = snapshot.HyperNodesReadyToSchedule ssn.Nodes = snapshot.Nodes @@ -220,6 +223,20 @@ func openSession(cache cache.Cache) *Session { return ssn } +func parseHyperNodesTiers(ssn *Session) { + if len(ssn.HyperNodesSetByTier) == 0 { + return + } + + // sort to guarantee the traverse order is from down to top. + var tiers []int + for tier := range ssn.HyperNodesSetByTier { + tiers = append(tiers, tier) + } + sort.Ints(tiers) + ssn.HyperNodesTiers = tiers +} + // updateQueueStatus updates allocated field in queue status on session close. func updateQueueStatus(ssn *Session) { rootQueue := api.QueueID("root") diff --git a/pkg/scheduler/plugins/networktopologyaware/networktopologyaware.go b/pkg/scheduler/plugins/networktopologyaware/networktopologyaware.go new file mode 100644 index 0000000000..aabc21ed36 --- /dev/null +++ b/pkg/scheduler/plugins/networktopologyaware/networktopologyaware.go @@ -0,0 +1,210 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package networktopologyaware + +import ( + "k8s.io/klog/v2" + + "volcano.sh/volcano/pkg/scheduler/api" + "volcano.sh/volcano/pkg/scheduler/framework" + "volcano.sh/volcano/pkg/scheduler/util" +) + +const ( + // PluginName indicates name of volcano scheduler plugin. + PluginName = "networktopologyaware" + BaseScore = 100.0 + TaskSumBaseScore = 10.0 + ZeroScore = 0.0 + NetworkTopologyWeight = "weight" +) + +type networkTopologyAwarePlugin struct { + // Arguments given for the plugin + pluginArguments framework.Arguments + *hyperNodesTier +} + +type hyperNodesTier struct { + maxTier int + minTier int +} + +func (h *hyperNodesTier) init(hyperNodesSetByTier []int) { + if len(hyperNodesSetByTier) == 0 { + return + } + h.minTier = hyperNodesSetByTier[0] + h.maxTier = hyperNodesSetByTier[len(hyperNodesSetByTier)-1] +} + +// New function returns prioritizePlugin object +func New(arguments framework.Arguments) framework.Plugin { + return &networkTopologyAwarePlugin{ + pluginArguments: arguments, + hyperNodesTier: &hyperNodesTier{}, + } +} + +func (nta *networkTopologyAwarePlugin) Name() string { + return PluginName +} + +func calculateWeight(args framework.Arguments) int { + weight := 1 + args.GetInt(&weight, NetworkTopologyWeight) + return weight +} + +func (nta *networkTopologyAwarePlugin) OnSessionOpen(ssn *framework.Session) { + klog.V(5).Infof("Enter networkTopologyAwarePlugin plugin ...") + defer func() { + klog.V(5).Infof("Leaving networkTopologyAware plugin ...") + }() + + weight := calculateWeight(nta.pluginArguments) + nta.hyperNodesTier.init(ssn.HyperNodesTiers) + + hyperNodeFn := func(job *api.JobInfo, hyperNodes map[string][]*api.NodeInfo) (map[string]float64, error) { + hyperNodeScores := make(map[string]float64) + + jobAllocatedHyperNode := job.PodGroup.GetAnnotations()[api.JobAllocatedHyperNode] + if jobAllocatedHyperNode == "" { + for hyperNode := range hyperNodes { + hyperNodeScores[hyperNode] = ZeroScore + } + return hyperNodeScores, nil + } + // The job still has remaining tasks to be scheduled, calculate score based on LCAHyperNode tier of the hyperNode and jobAllocatedHyperNode. + maxScore := ZeroScore + scoreHyperNode := map[float64][]string{} + for hyperNode := range hyperNodes { + score := nta.networkTopologyAwareScore(hyperNode, jobAllocatedHyperNode, ssn.HyperNodes) + score *= float64(weight) + hyperNodeScores[hyperNode] = score + if score >= maxScore { + maxScore = score + scoreHyperNode[score] = append(scoreHyperNode[score], hyperNode) + } + } + // The job still has remaining tasks to be scheduled, calculate score based on task num when max score of hyperNode has more than one. + if maxScore != ZeroScore && len(scoreHyperNode[maxScore]) > 1 { + for hyperNode, score := range hyperNodeScores { + if score == maxScore { + taskNumScore := nta.networkTopologyAwareScoreWithTaskNum(hyperNode, job, ssn.RealNodesList) + taskNumScore *= float64(weight) + hyperNodeScores[hyperNode] += taskNumScore + } + } + } + + klog.V(1).Infof("networkTopologyAware score is: %v", hyperNodeScores) + return hyperNodeScores, nil + } + + nodeFn := func(task *api.TaskInfo, nodes []*api.NodeInfo) (map[string]float64, error) { + nodeScores := make(map[string]float64) + taskJob := ssn.Jobs[task.Job] + + jobAllocatedHyperNode := taskJob.PodGroup.GetAnnotations()[api.JobAllocatedHyperNode] + if jobAllocatedHyperNode == "" { + for _, node := range nodes { + nodeScores[node.Name] = ZeroScore + } + return nodeScores, nil + } + // The job still has remaining tasks to be scheduled, calculate score based on LCAHyperNode tier. + maxScore := ZeroScore + scoreNodes := map[float64][]string{} + for _, node := range nodes { + hyperNode := util.FindHyperNodeOfNode(node.Name, ssn.RealNodesList, ssn.HyperNodesTiers, ssn.HyperNodesSetByTier) + score := nta.networkTopologyAwareScore(hyperNode, jobAllocatedHyperNode, ssn.HyperNodes) + score *= float64(weight) + nodeScores[node.Name] = score + if score >= maxScore { + maxScore = score + scoreNodes[score] = append(scoreNodes[score], node.Name) + } + } + // The job still has remaining tasks to be scheduled, calculate score based on task num when max score of hyperNode has more than one. + if maxScore != ZeroScore && len(scoreNodes[maxScore]) > 1 { + for node, score := range nodeScores { + if score == maxScore { + hyperNode := util.FindHyperNodeOfNode(node, ssn.RealNodesList, ssn.HyperNodesTiers, ssn.HyperNodesSetByTier) + taskNumScore := nta.networkTopologyAwareScoreWithTaskNum(hyperNode, taskJob, ssn.RealNodesList) + taskNumScore *= float64(weight) + nodeScores[node] += taskNumScore + } + } + } + + klog.V(1).Infof("networkTopologyAware score is: %v", nodeScores) + return nodeScores, nil + } + + ssn.AddHyperNodeOrederFn(nta.Name(), hyperNodeFn) + ssn.AddBatchNodeOrderFn(nta.Name(), nodeFn) +} + +func (bp *networkTopologyAwarePlugin) OnSessionClose(ssn *framework.Session) { +} + +// networkTopologyAwareScore use the best fit polices during scheduling. + +// Goals: +// - The tier of LCAHyperNode of the hyperNode and the job allocatedHyperNode should be as low as possible. +func (nta *networkTopologyAwarePlugin) networkTopologyAwareScore(hyperNodeName, jobAllocatedHyperNode string, hyperNodeMap api.HyperNodeInfoMap) float64 { + if hyperNodeName == jobAllocatedHyperNode { + return BaseScore + } + LCAHyperNode := hyperNodeMap.GetLCAHyperNode(hyperNodeName, jobAllocatedHyperNode) + hyperNodeInfo, ok := hyperNodeMap[LCAHyperNode] + if !ok { + return ZeroScore + } + // Calculate score: (maxTier - LCAhyperNode.tier)/(maxTier - minTier) + hyperNodeTierScore := BaseScore * scoreHyperNodeWithTier(hyperNodeInfo.Tier(), nta.minTier, nta.maxTier) + return hyperNodeTierScore +} + +// Goals: +// - Tasks under a job should be scheduled to one hyperNode as much as possible. +func (nta *networkTopologyAwarePlugin) networkTopologyAwareScoreWithTaskNum(hyperNodeName string, job *api.JobInfo, realNodesList map[string][]*api.NodeInfo) float64 { + taskNum := util.FindJobTaskNumOfHyperNode(hyperNodeName, job, realNodesList) + taskNumScore := ZeroScore + if len(job.Tasks) > 0 { + // Calculate score: taskNum/allTaskNum + taskNumScore = TaskSumBaseScore * scoreHyperNodeWithTaskNum(taskNum, len(job.Tasks)) + } + return taskNumScore +} + +func scoreHyperNodeWithTier(tier int, minTier int, maxTier int) float64 { + // Use tier to calculate scores and map the original score to the range between 0 and 1. + if minTier == maxTier { + return ZeroScore + } + return float64(maxTier-tier) / float64(maxTier-minTier) +} + +func scoreHyperNodeWithTaskNum(taskNum int, allTaskNum int) float64 { + // Calculate task distribution rate as score and map the original score to the range between 0 and 1. + if allTaskNum == 0 { + return ZeroScore + } + return float64(taskNum) / float64(allTaskNum) +} diff --git a/pkg/scheduler/plugins/networktopologyaware/networktopologyaware_test.go b/pkg/scheduler/plugins/networktopologyaware/networktopologyaware_test.go new file mode 100644 index 0000000000..8040348d09 --- /dev/null +++ b/pkg/scheduler/plugins/networktopologyaware/networktopologyaware_test.go @@ -0,0 +1,1811 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package networktopologyaware + +import ( + "math" + "testing" + + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + "volcano.sh/volcano/pkg/scheduler/api" + "volcano.sh/volcano/pkg/scheduler/conf" + "volcano.sh/volcano/pkg/scheduler/framework" + "volcano.sh/volcano/pkg/scheduler/uthelper" + "volcano.sh/volcano/pkg/scheduler/util" + + schedulingv1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" + topologyv1alpha1 "volcano.sh/apis/pkg/apis/topology/v1alpha1" +) + +const ( + eps = 1e-1 +) + +func TestArguments(t *testing.T) { + framework.RegisterPluginBuilder(PluginName, New) + defer framework.CleanupPluginBuilders() + + arguments := framework.Arguments{ + "weight": 2, + } + + builder, ok := framework.GetPluginBuilder(PluginName) + if !ok { + t.Fatalf("should have plugin named %s", PluginName) + } + + plugin := builder(arguments) + networkTopologyAware, ok := plugin.(*networkTopologyAwarePlugin) + if !ok { + t.Fatalf("plugin should be %T, but not %T", networkTopologyAware, plugin) + } + weight := calculateWeight(networkTopologyAware.pluginArguments) + if weight != 2 { + t.Errorf("weight should be 2, but get %v", weight) + } +} + +func TestNetworkTopologyAwareHyperNodeScore(t *testing.T) { + tests := []struct { + name string + uthelper.TestCommonStruct + arguments framework.Arguments + scoreHyperNodes map[string][]*api.NodeInfo + tasks map[string]string + expected map[string]float64 + }{ + { + name: "Tasks in job first scheduler, score all hyperNodes zero", + TestCommonStruct: uthelper.TestCommonStruct{ + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s3": nil, + "s4": nil, + "s5": nil, + }, + expected: map[string]float64{ + "s3": 0.0, + "s4": 0.0, + "s5": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score zero when the hyperNode has empty LCA hyperNode with jobAllocatedHyperNode", + TestCommonStruct: uthelper.TestCommonStruct{ + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s5": nil, + "s6": nil, + }, + expected: map[string]float64{ + "s5": 0.0, + "s6": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s3": nil, + "s4": nil, + "s5": nil, + }, + expected: map[string]float64{ + "s3": 100.0, + "s4": 50.0, + "s5": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode when hyperNodesInfo has two tier", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s1", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 2), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s1", "s2"), + 2: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 2, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 1, []api.MemberConfig{ + { + Name: "s1-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s1-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 1, []api.MemberConfig{ + { + Name: "s2-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s2-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s1-n1", "s1-n2", "s2-n1", "s2-n2"), + "s1": sets.New[string]("s1-n1", "s1-n2"), + "s2": sets.New[string]("s2-n1", "s2-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s1": nil, + "s2": nil, + }, + expected: map[string]float64{ + "s1": 100.0, + "s2": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode when hyperNodesInfo has one tier", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s1", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 1), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s1", "s2"), + }, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 1, []api.MemberConfig{ + { + Name: "s1-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s1-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 1, []api.MemberConfig{ + { + Name: "s2-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s2-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s1": sets.New[string]("s1-n1", "s1-n2"), + "s2": sets.New[string]("s2-n1", "s2-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s1": nil, + "s2": nil, + }, + expected: map[string]float64{ + "s1": 100.0, + "s2": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode with plugin weight 2", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 2, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s3": nil, + "s4": nil, + "s5": nil, + }, + expected: map[string]float64{ + "s3": 200.0, + "s4": 100.0, + "s5": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to LCA hyperNode tier and task num of the hyperNode when there are at least two hyperNodes have max hyperNode tier score", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s1", "q1", 3, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "s4-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p4", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Nodes: []*corev1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreHyperNodes: map[string][]*api.NodeInfo{ + "s3": nil, + "s4": nil, + "s5": nil, + }, + tasks: map[string]string{ + "task1": "s3-n1", + "task2": "s3-n2", + "task3": "s4-n1", + "test4": "", + }, + expected: map[string]float64{ + "s3": 55.0, + "s4": 52.5, + "s5": 0.0, + }, + }, + } + trueValue := true + plugins := map[string]framework.PluginBuilder{ + PluginName: New, + } + + for i, test := range tests { + test.Plugins = plugins + tiers := []conf.Tier{ + { + Plugins: []conf.PluginOption{ + { + Name: PluginName, + EnabledNetworkTopology: &trueValue, + Arguments: test.arguments, + }, + }, + }, + } + // create session + ssn := test.RegisterSession(tiers, nil) + defer test.Close() + + scores, err := ssn.HyperNodeOrderMapFn(parseJob(ssn.Jobs), test.scoreHyperNodes) + if err != nil { + t.Errorf("case%d: task %s has err %v", i, test.Name, err) + continue + } + hyperNodesScore := scores[PluginName] + for hypernode, expected := range test.expected { + if math.Abs(hyperNodesScore[hypernode]-expected) > eps { + t.Errorf("case%d: task %s on hypernode %s expect have score %v, but get %v", i+1, test.name, hypernode, expected, hyperNodesScore[hypernode]) + } + } + } +} + +func parseJob(jobInfoMap map[api.JobID]*api.JobInfo) *api.JobInfo { + for _, job := range jobInfoMap { + return job + } + return nil +} + +func TestNetworkTopologyAwareNodeScore(t *testing.T) { + tests := []struct { + name string + uthelper.TestCommonStruct + arguments framework.Arguments + scoreNodes []*api.NodeInfo + tasks map[string]string + expected map[string]float64 + }{ + { + name: "Tasks in job first scheduler, score all nodes zero", + TestCommonStruct: uthelper.TestCommonStruct{ + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p4", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s3-n1", + }, + { + Name: "s4-n1", + }, + { + Name: "s5-n1", + }, + }, + expected: map[string]float64{ + "s3-n1": 0.0, + "s4-n1": 0.0, + "s5-n1": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score zero when the hyperNode of node has empty LCA hyperNode with jobAllocatedHyperNode", + TestCommonStruct: uthelper.TestCommonStruct{ + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s5-n1", + }, + { + Name: "s6-n1", + }, + }, + expected: map[string]float64{ + "s5-n1": 0.0, + "s6-n1": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score nodes according to node hypernode LCA hyperNode tier", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s3-n1", + }, + { + Name: "s4-n1", + }, + { + Name: "s5-n1", + }, + }, + expected: map[string]float64{ + "s3-n1": 100.0, + "s4-n1": 50.0, + "s5-n1": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to node LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode when hyperNodesInfo has two tier", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s1", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 2), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s1-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s1-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s1-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s2-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s2-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s1", "s2"), + 2: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 2, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 1, []api.MemberConfig{ + { + Name: "s1-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s1-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 1, []api.MemberConfig{ + { + Name: "s2-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s2-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s1-n1", "s1-n2", "s2-n1", "s2-n2"), + "s1": sets.New[string]("s1-n1", "s1-n2"), + "s2": sets.New[string]("s2-n1", "s2-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s1-n1", + }, + { + Name: "s2-n1", + }, + }, + expected: map[string]float64{ + "s1-n1": 100.0, + "s2-n1": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to node LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode when hyperNodesInfo has one tier", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s1", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 1), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s1-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s1-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s1-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s2-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s2-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s1", "s2"), + }, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 1, []api.MemberConfig{ + { + Name: "s1-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s1-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 1, []api.MemberConfig{ + { + Name: "s2-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s2-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s1": sets.New[string]("s1-n1", "s1-n2"), + "s2": sets.New[string]("s2-n1", "s2-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s1-n1", + }, + { + Name: "s2-n1", + }, + }, + expected: map[string]float64{ + "s1-n1": 100.0, + "s2-n1": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to node LCA hyperNode tier of the hyperNode and jobAllocatedHyperNode with plugin weight 2", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s3", "q1", 1, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 2, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s3-n1", + }, + { + Name: "s4-n1", + }, + { + Name: "s5-n1", + }, + }, + expected: map[string]float64{ + "s3-n1": 200.0, + "s4-n1": 100.0, + "s5-n1": 0.0, + }, + }, + { + name: "Tasks in job rescheduled, score hyperNodes according to node LCA hyperNode tier and task num of the hyperNode when there are at least two hyperNodes have max hyperNode tier score", + TestCommonStruct: uthelper.TestCommonStruct{ + Plugins: map[string]framework.PluginBuilder{PluginName: New}, + PodGroups: []*schedulingv1.PodGroup{ + util.BuildPodGroupWithNetWorkTopologies("pg1", "c1", "s1", "q1", 3, nil, schedulingv1.PodGroupInqueue, "hard", 3), + }, + Pods: []*v1.Pod{ + util.BuildPod("c1", "p1", "s3-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "master"}, nil), + util.BuildPod("c1", "p2", "s3-n2", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p3", "s4-n1", v1.PodRunning, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + util.BuildPod("c1", "p4", "", v1.PodPending, api.BuildResourceList("2", "4G"), "pg1", map[string]string{"volcano.sh/task-spec": "worker"}, nil), + }, + Nodes: []*v1.Node{ + util.BuildNode("s3-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s3-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s4-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s5-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n1", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + util.BuildNode("s6-n2", api.BuildResourceList("2", "4Gi", []api.ScalarResource{{Name: "pods", Value: "10"}}...), nil), + }, + HyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("s3", "s4", "s5", "s6"), + 2: sets.New[string]("s1", "s2"), + 3: sets.New[string]("s0")}, + HyperNodesMap: map[string]*api.HyperNodeInfo{ + "s0": api.NewHyperNodeInfo(api.BuildHyperNode("s0", 3, []api.MemberConfig{ + { + Name: "s1", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s2", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s1": api.NewHyperNodeInfo(api.BuildHyperNode("s1", 2, []api.MemberConfig{ + { + Name: "s3", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s4", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s2": api.NewHyperNodeInfo(api.BuildHyperNode("s2", 2, []api.MemberConfig{ + { + Name: "s5", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + { + Name: "s6", + Type: topologyv1alpha1.MemberTypeHyperNode, + Selector: "exact", + }, + })), + "s3": api.NewHyperNodeInfo(api.BuildHyperNode("s3", 1, []api.MemberConfig{ + { + Name: "s3-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s3-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s4": api.NewHyperNodeInfo(api.BuildHyperNode("s4", 1, []api.MemberConfig{ + { + Name: "s4-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s4-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s5": api.NewHyperNodeInfo(api.BuildHyperNode("s5", 1, []api.MemberConfig{ + { + Name: "s5-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s5-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + "s6": api.NewHyperNodeInfo(api.BuildHyperNode("s6", 1, []api.MemberConfig{ + { + Name: "s6-n1", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + { + Name: "s6-n2", + Type: topologyv1alpha1.MemberTypeNode, + Selector: "exact", + }, + })), + }, + HyperNodes: map[string]sets.Set[string]{ + "s0": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2", "s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s1": sets.New[string]("s3-n1", "s3-n2", "s4-n1", "s4-n2"), + "s2": sets.New[string]("s5-n1", "s5-n2", "s6-n1", "s6-n2"), + "s3": sets.New[string]("s3-n1", "s3-n2"), + "s4": sets.New[string]("s4-n1", "s4-n2"), + "s5": sets.New[string]("s5-n1", "s5-n2"), + "s6": sets.New[string]("s6-n1", "s6-n2"), + }, + Queues: []*schedulingv1.Queue{ + util.BuildQueue("q1", 1, nil), + }, + }, + arguments: framework.Arguments{ + "weight": 1, + }, + scoreNodes: []*api.NodeInfo{ + { + Name: "s3-n1", + }, + { + Name: "s4-n1", + }, + { + Name: "s5-n1", + }, + }, + tasks: map[string]string{ + "task1": "s3-n1", + "task2": "s3-n2", + "task3": "s4-n1", + "test4": "", + }, + expected: map[string]float64{ + "s3-n1": 55.0, + "s4-n1": 52.5, + "s5-n1": 0.0, + }, + }, + } + trueValue := true + plugins := map[string]framework.PluginBuilder{ + PluginName: New, + } + + for i, test := range tests { + test.Plugins = plugins + tiers := []conf.Tier{ + { + Plugins: []conf.PluginOption{ + { + Name: PluginName, + EnabledNetworkTopology: &trueValue, + EnabledNodeOrder: &trueValue, + Arguments: test.arguments, + }, + }, + }, + } + // create session + ssn := test.RegisterSession(tiers, nil) + defer test.Close() + + nodeScores, err := ssn.BatchNodeOrderFn(parseTask(ssn.Jobs), test.scoreNodes) + if err != nil { + t.Errorf("case%d: task %s has err %v", i, test.Name, err) + continue + } + for node, expected := range test.expected { + if math.Abs(nodeScores[node]-expected) > eps { + t.Errorf("case%d: task %s on node %s expect have score %v, but get %v", i+1, test.name, node, expected, nodeScores[node]) + } + } + } +} + +func parseTask(jobInfoMap map[api.JobID]*api.JobInfo) *api.TaskInfo { + var job *api.JobInfo + for _, jobInfo := range jobInfoMap { + job = jobInfo + } + if job == nil { + return nil + } + for _, task := range job.Tasks { + if task.Pod.Status.Phase == v1.PodPending { + return task + } + } + return nil +} diff --git a/pkg/scheduler/uthelper/helper.go b/pkg/scheduler/uthelper/helper.go index f49f08fb2e..be1d15e703 100644 --- a/pkg/scheduler/uthelper/helper.go +++ b/pkg/scheduler/uthelper/helper.go @@ -56,6 +56,7 @@ type TestCommonStruct struct { HyperNodesSetByTier map[int]sets.Set[string] HyperNodes map[string]sets.Set[string] HyperNodesMap map[string]*api.HyperNodeInfo + RealNodesList map[string][]*api.NodeInfo HyperNodesReadyToSchedule bool PodGroups []*vcapisv1.PodGroup Queues []*vcapisv1.Queue diff --git a/pkg/scheduler/util/scheduler_helper.go b/pkg/scheduler/util/scheduler_helper.go index af0bac1fd6..7b09f909c8 100644 --- a/pkg/scheduler/util/scheduler_helper.go +++ b/pkg/scheduler/util/scheduler_helper.go @@ -295,3 +295,35 @@ func ConvertRes2ResList(res *api.Resource) v1.ResourceList { } return rl } + +// Find the hyperNode to which the node belongs. +func FindHyperNodeOfNode(nodeName string, hyperNodes map[string][]*api.NodeInfo, hyperNodesTiers []int, hyperNodesSetByTier map[int]sets.Set[string]) string { + if len(hyperNodesTiers) == 0 { + return "" + } + nodeTypeHyperNodes := hyperNodesSetByTier[hyperNodesTiers[0]] + for hyperNode := range nodeTypeHyperNodes { + nodes := hyperNodes[hyperNode] + for _, node := range nodes { + if node.Name == nodeName { + return hyperNode + } + } + } + return "" +} + +// FindJobTaskNumOfHyperNode find out the number of tasks in the job that belong to the hyperNode. +func FindJobTaskNumOfHyperNode(hyperNodeName string, job *api.JobInfo, hyperNodes map[string][]*api.NodeInfo) int { + nodes := hyperNodes[hyperNodeName] + taskCount := 0 + for _, task := range job.Tasks { + for _, node := range nodes { + if node.Name == task.NodeName { + taskCount++ + break + } + } + } + return taskCount +} diff --git a/pkg/scheduler/util/scheduler_helper_test.go b/pkg/scheduler/util/scheduler_helper_test.go index 1bd19788f3..8fb913e1c4 100644 --- a/pkg/scheduler/util/scheduler_helper_test.go +++ b/pkg/scheduler/util/scheduler_helper_test.go @@ -239,3 +239,221 @@ func TestGetHyperNodeList(t *testing.T) { }) } } + +func TestFindJobTaskNumOfHyperNode(t *testing.T) { + testCases := []struct { + name string + hyperNodeName string + tasks map[string]string + hyperNodes map[string][]*api.NodeInfo + expectedRes int + }{ + { + name: "Normal case with matching tasks", + hyperNodeName: "hyperNode1", + tasks: map[string]string{ + "task1": "node1", + "task2": "node2", + }, + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node3"}, + }, + }, + expectedRes: 1, + }, + { + name: "No matching tasks case", + hyperNodeName: "hyperNode1", + tasks: map[string]string{ + "task1": "node4", + "task2": "node5", + }, + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node3"}, + }, + }, + expectedRes: 0, + }, + { + name: "Empty job tasks map case", + hyperNodeName: "hyperNode1", + tasks: map[string]string{}, + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node3"}, + }, + }, + expectedRes: 0, + }, + { + name: "Empty nodes list for hyperNode case", + hyperNodeName: "hyperNode2", + tasks: map[string]string{ + "task1": "node1", + "task2": "node2", + }, + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode2": {}, + }, + expectedRes: 0, + }, + { + name: "Tasks with duplicate match in multiple hyperNodes", + hyperNodeName: "hyperNode1", + tasks: map[string]string{ + "task1": "node1", + }, + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + }, + "hyperNode2": { + {Name: "node1"}, + }, + }, + expectedRes: 1, + }, + } + + job := &api.JobInfo{ + Name: "test-job", + PodGroup: &api.PodGroup{}, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + job.Tasks = make(map[api.TaskID]*api.TaskInfo) + for name, node := range tc.tasks { + taskInfo := &api.TaskInfo{ + UID: api.TaskID(name), + Name: name, + Job: job.UID, + } + taskInfo.NodeName = node + job.Tasks[taskInfo.UID] = taskInfo + } + result := FindJobTaskNumOfHyperNode(tc.hyperNodeName, job, tc.hyperNodes) + if result != tc.expectedRes { + t.Errorf("Test case '%s' failed. Expected result: %d, but got: %d", + tc.name, tc.expectedRes, result) + } + }) + } +} + +func TestFindHyperNodeOfNode(t *testing.T) { + testCases := []struct { + name string + nodeName string + hyperNodes map[string][]*api.NodeInfo + hyperNodesTiers []int + hyperNodesSetByTier map[int]sets.Set[string] + expectedRes string + }{ + { + name: "Normal case with matching node", + nodeName: "node1", + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node2"}, + }, + "hyperNode2": { + {Name: "node3"}, + {Name: "node4"}, + }, + }, + hyperNodesTiers: []int{1, 2}, + hyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("hyperNode1", "hyperNode2"), + 2: sets.New[string]("hyperNode3", "hyperNode4"), + }, + expectedRes: "hyperNode1", + }, + { + name: "No matching node case", + nodeName: "node5", + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node2"}, + }, + "hyperNode2": { + {Name: "node3"}, + {Name: "node4"}, + }, + }, + hyperNodesTiers: []int{1, 2}, + hyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("hyperNode1", "hyperNode2"), + 2: sets.New[string]("hyperNode3", "hyperNode4"), + }, + expectedRes: "", + }, + { + name: "Empty hyperNodes map case", + nodeName: "node1", + hyperNodes: map[string][]*api.NodeInfo{}, + hyperNodesTiers: []int{1, 2}, + hyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("hyperNode1", "hyperNode2"), + 2: sets.New[string]("hyperNode3", "hyperNode4"), + }, + expectedRes: "", + }, + { + name: "Empty hyperNodesTiers case", + nodeName: "node1", + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node2"}, + }, + "hyperNode2": { + {Name: "node3"}, + {Name: "node4"}, + }, + }, + hyperNodesTiers: []int{}, + hyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("hyperNode1", "hyperNode2"), + 2: sets.New[string]("hyperNode3", "hyperNode4"), + }, + expectedRes: "", + }, + { + name: "hyperNodesSetByTier does not contain the tier", + nodeName: "node1", + hyperNodes: map[string][]*api.NodeInfo{ + "hyperNode1": { + {Name: "node1"}, + {Name: "node2"}, + }, + "hyperNode2": { + {Name: "node3"}, + {Name: "node4"}, + }, + }, + hyperNodesTiers: []int{3, 4}, + hyperNodesSetByTier: map[int]sets.Set[string]{ + 1: sets.New[string]("hyperNode1", "hyperNode2"), + 2: sets.New[string]("hyperNode3", "hyperNode4"), + }, + expectedRes: "", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := FindHyperNodeOfNode(tc.nodeName, tc.hyperNodes, tc.hyperNodesTiers, tc.hyperNodesSetByTier) + if result != tc.expectedRes { + t.Errorf("Test case '%s' failed. Expected result: %s, but got: %s", + tc.name, tc.expectedRes, result) + } + }) + } +}