diff --git a/bcs-services/bcs-cluster-manager/internal/cloudprovider/google/tasks/updateDesiredNodes.go b/bcs-services/bcs-cluster-manager/internal/cloudprovider/google/tasks/updateDesiredNodes.go index 953030a590..c9e67ea109 100644 --- a/bcs-services/bcs-cluster-manager/internal/cloudprovider/google/tasks/updateDesiredNodes.go +++ b/bcs-services/bcs-cluster-manager/internal/cloudprovider/google/tasks/updateDesiredNodes.go @@ -251,14 +251,25 @@ func recordClusterInstanceToDB(ctx context.Context, state *cloudprovider.TaskSta state.Task.CommonParams = make(map[string]string) } - if len(instancesNames) > 0 { - state.Task.CommonParams[cloudprovider.SuccessNodeIDsKey.String()] = strings.Join(instancesNames, ",") - state.Task.CommonParams[cloudprovider.NodeNamesKey.String()] = strings.Join(instancesNames, ",") - state.Task.CommonParams[cloudprovider.NodeIDsKey.String()] = strings.Join(instancesNames, ",") + successIns, failureIns, err := checkInstance(client, instancesNames) + if err != nil { + blog.Errorf("recordClusterInstanceToDB[%s] checkInstance failed, %s", taskID, err.Error()) + } + + if len(successIns) > 0 { + state.Task.CommonParams[cloudprovider.SuccessNodeIDsKey.String()] = strings.Join(successIns, ",") + state.Task.CommonParams[cloudprovider.NodeNamesKey.String()] = strings.Join(successIns, ",") + state.Task.CommonParams[cloudprovider.NodeIDsKey.String()] = strings.Join(successIns, ",") + } + + if len(failureIns) > 0 { + blog.Infof("recordClusterInstanceToDB[%s] returnGkeInstancesAndCleanNodes %+v", taskID, failureIns) + _ = returnGkeInstancesAndCleanNodes(ctx, info, failureIns) + state.Task.CommonParams[cloudprovider.FailedClusterNodeIDsKey.String()] = strings.Join(failureIns, ",") } // record successNodes to cluster manager DB - nodeIPs, err := transInstancesToNode(ctx, instancesNames, info) + nodeIPs, err := transInstancesToNode(ctx, successIns, info) if err != nil { blog.Errorf("recordClusterInstanceToDB[%s] failed: %v", taskID, err) } @@ -271,14 +282,15 @@ func recordClusterInstanceToDB(ctx context.Context, state *cloudprovider.TaskSta return nil } -func checkInstance(client *api.ComputeServiceClient, ids []string) error { +func checkInstance(client *api.ComputeServiceClient, ids []string) ([]string, []string, error) { + successIns, failureIns := make([]string, 0), make([]string, 0) timeCtx, cancel := context.WithTimeout(context.TODO(), 5*time.Minute) defer cancel() err := loop.LoopDoFunc(timeCtx, func() error { insList, err := client.ListZoneInstanceWithFilter(context.Background(), api.InstanceNameFilter(ids)) if err != nil { blog.Errorf("checkInstance ListZoneInstanceWithFilter failed, %s", err.Error()) - return err + return nil } // check response data @@ -294,15 +306,25 @@ func checkInstance(client *api.ComputeServiceClient, ids []string) error { blog.Warnf("checkInstance[%s] IP is still not distributed", in.Name) return nil } + if !utils.StringInSlice(in.Name, successIns) { + successIns = append(successIns, in.Name) + } } return loop.EndLoop }) + + for _, n := range ids { + if !utils.StringInSlice(n, successIns) { + failureIns = append(failureIns, n) + } + } + if err != nil { - return err + return successIns, failureIns, err } - return nil + return successIns, failureIns, nil } // transInstancesToNode record success nodes to cm DB @@ -314,17 +336,6 @@ func transInstancesToNode(ctx context.Context, instanceNames []string, info *clo nodeIPs = make([]string, 0) err error ) - client, err := api.NewComputeServiceClient(info.CmOption) - if err != nil { - blog.Errorf("transInstanceIDsToNodes create ComputeServiceClient failed, %s", err.Error()) - return nil, err - } - - err = checkInstance(client, instanceNames) - if err != nil { - blog.Errorf("transInstanceIDsToNodes checkInstance failed, %s", err.Error()) - return nil, err - } taskID := cloudprovider.GetTaskIDFromContext(ctx) err = retry.Do(func() error { @@ -535,7 +546,9 @@ func CheckClusterNodesStatusTask(taskID string, stepName string) error { state.Task.CommonParams[cloudprovider.SuccessClusterNodeIDsKey.String()] = strings.Join(successInstances, ",") } if len(failureInstances) > 0 { - state.Task.CommonParams[cloudprovider.FailedClusterNodeIDsKey.String()] = strings.Join(failureInstances, ",") + preFailureIns := state.Task.CommonParams[cloudprovider.FailedClusterNodeIDsKey.String()] + state.Task.CommonParams[cloudprovider.FailedClusterNodeIDsKey.String()] = + strings.Join(failureInstances, ",") + "," + preFailureIns } // successInstance ip list