clusterStart: sequential and ordered node starts & nodeCreate: do not copy status

pull/467/head
iwilltry42 4 years ago
parent b2162b1618
commit 1c61130fdf
No known key found for this signature in database
GPG Key ID: 7BA57AD1CFF16110
  1. 2
      cmd/node/nodeCreate.go
  2. 139
      pkg/client/cluster.go
  3. 24
      pkg/client/node.go
  4. 7
      pkg/types/types.go

@ -50,7 +50,7 @@ func NewCmdNodeCreate() *cobra.Command {
nodes, cluster := parseCreateNodeCmd(cmd, args)
if err := k3dc.NodeAddToClusterMulti(cmd.Context(), runtimes.SelectedRuntime, nodes, cluster, createNodeOpts); err != nil {
log.Errorf("Failed to add nodes to cluster '%s'", cluster.Name)
log.Errorln(err)
log.Fatalln(err)
}
},
}

@ -802,97 +802,106 @@ func generateNodeName(cluster string, role k3d.Role, suffix int) string {
func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Cluster, startClusterOpts types.ClusterStartOpts) error {
log.Infof("Starting cluster '%s'", cluster.Name)
start := time.Now()
if startClusterOpts.Timeout > 0*time.Second {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, startClusterOpts.Timeout)
defer cancel()
}
/*
* Init Node
*/
// sort the nodes into categories
var initNode *k3d.Node
var servers []*k3d.Node
var agents []*k3d.Node
var aux []*k3d.Node
for _, n := range cluster.Nodes {
if n.Role == k3d.ServerRole && n.ServerOpts.IsInit {
if err := NodeStart(ctx, runtime, n, k3d.NodeStartOpts{
Wait: true, // always wait for the init node
NodeHooks: startClusterOpts.NodeHooks,
}); err != nil {
return fmt.Errorf("Failed to start initializing server node: %+v", err)
if n.Role == k3d.ServerRole {
if n.ServerOpts.IsInit {
initNode = n
continue
}
break
servers = append(servers, n)
} else if n.Role == k3d.AgentRole {
agents = append(agents, n)
} else {
aux = append(aux, n)
}
}
log.Infoln("Servers before sort:")
for i, n := range servers {
log.Infof("Server %d - %s", i, n.Name)
}
sort.Slice(servers, func(i, j int) bool {
return servers[i].Name < servers[j].Name
})
log.Infoln("Servers after sort:")
for i, n := range servers {
log.Infof("Server %d - %s", i, n.Name)
}
/*
* Other Nodes
* Init Node
*/
failed := 0
var serverlb *k3d.Node
for _, node := range cluster.Nodes {
// skip the LB, because we want to start it last
if node.Role == k3d.LoadBalancerRole {
serverlb = node
continue
if initNode != nil {
log.Infoln("Starting the initializing server...")
if err := NodeStart(ctx, runtime, initNode, k3d.NodeStartOpts{
Wait: true, // always wait for the init node
NodeHooks: startClusterOpts.NodeHooks,
ReadyLogMessage: "Running kube-apiserver", // initNode means, that we're using etcd -> this will need quorum, so "k3s is up and running" won't happen right now
}); err != nil {
return fmt.Errorf("Failed to start initializing server node: %+v", err)
}
}
// skip init node here, as it should be running already
if node == cluster.InitNode || node.ServerOpts.IsInit {
continue
/*
* Server Nodes
*/
log.Infoln("Starting servers...")
nodeStartOpts := k3d.NodeStartOpts{
Wait: true,
NodeHooks: startClusterOpts.NodeHooks,
}
for _, serverNode := range servers {
if err := NodeStart(ctx, runtime, serverNode, nodeStartOpts); err != nil {
return fmt.Errorf("Failed to start server %s: %+v", serverNode.Name, err)
}
}
// check if node is running already to avoid waiting forever when checking for the node log message
if !node.State.Running {
nodeStartOpts := k3d.NodeStartOpts{
NodeHooks: startClusterOpts.NodeHooks,
}
if node.Role == k3d.ServerRole && startClusterOpts.WaitForServer {
nodeStartOpts.Wait = true
}
/*
* Agent Nodes
*/
// start node
if err := NodeStart(ctx, runtime, node, nodeStartOpts); err != nil {
log.Warningf("Failed to start node '%s': Try to start it manually", node.Name)
failed++
continue
}
failedAgents := 0
} else {
log.Infof("Node '%s' already running", node.Name)
log.Infoln("Starting agents...")
for _, agentNode := range agents {
if err := NodeStart(ctx, runtime, agentNode, nodeStartOpts); err != nil {
log.Warnf("Failed to start agent %s: %+v", agentNode.Name, err)
failedAgents++
}
}
// start serverlb
if serverlb != nil {
if !serverlb.State.Running {
log.Debugln("Starting serverlb...")
if err := runtime.StartNode(ctx, serverlb); err != nil { // FIXME: we could run into a nullpointer exception here
log.Warningf("Failed to start serverlb '%s' (try to start it manually): %+v", serverlb.Name, err)
failed++
}
// TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"`
// ... by scanning for this line in logs and restarting the container in case it appears
log.Debugf("Starting to wait for loadbalancer node '%s'", serverlb.Name)
readyLogMessage := k3d.ReadyLogMessageByRole[k3d.LoadBalancerRole]
if readyLogMessage != "" {
if err := NodeWaitForLogMessage(ctx, runtime, serverlb, readyLogMessage, start); err != nil {
return fmt.Errorf("Loadbalancer '%s' failed to get ready: %+v", serverlb.Name, err)
}
} else {
log.Warnf("ClusterStart: Set to wait for node %s to be ready, but there's no target log message defined", serverlb.Name)
}
} else {
log.Infof("Serverlb '%s' already running", serverlb.Name)
/*
* Auxiliary/Helper Nodes
*/
log.Infoln("Starting helpers...")
failedHelpers := 0
for _, helperNode := range aux {
nodeStartOpts := k3d.NodeStartOpts{}
if helperNode.Role == k3d.LoadBalancerRole {
nodeStartOpts.Wait = true
}
if err := NodeStart(ctx, runtime, helperNode, nodeStartOpts); err != nil {
log.Warnf("Failed to start helper %s: %+v", helperNode.Name, err)
failedHelpers++
}
}
if failed > 0 {
return fmt.Errorf("Failed to start %d nodes: Try to start them manually", failed)
if failedAgents+failedHelpers > 0 {
log.Warnf("%d non-critical (agent or helper) nodes failed to start. You may want to start them manually.", failedAgents+failedHelpers)
}
return nil
}

@ -129,6 +129,10 @@ func NodeAddToCluster(ctx context.Context, runtime runtimes.Runtime, node *k3d.N
}
}
// clear status fields
node.State.Running = false
node.State.Status = ""
if err := NodeRun(ctx, runtime, node, k3d.NodeCreateOpts{}); err != nil {
return err
}
@ -233,6 +237,14 @@ func NodeRun(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, node
// NodeStart starts an existing node
func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, nodeStartOpts k3d.NodeStartOpts) error {
// return early, if the node is already running
if node.State.Running {
log.Infof("Node %s is already running", node.Name)
return nil
}
// execute lifecycle hook actions
for _, hook := range nodeStartOpts.NodeHooks {
if hook.Stage == k3d.LifecycleStagePreStart {
log.Tracef("Node %s: Executing preStartAction '%s'", node.Name, reflect.TypeOf(hook))
@ -241,6 +253,8 @@ func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, no
}
}
}
// start the node
log.Tracef("Starting node '%s'", node.Name)
startTime := time.Now()
@ -250,10 +264,12 @@ func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, no
}
if nodeStartOpts.Wait {
log.Debugf("Waiting for node %s to get ready", node.Name)
readyLogMessage := k3d.ReadyLogMessageByRole[node.Role]
if readyLogMessage != "" {
if err := NodeWaitForLogMessage(ctx, runtime, node, readyLogMessage, startTime); err != nil {
if nodeStartOpts.ReadyLogMessage == "" {
nodeStartOpts.ReadyLogMessage = k3d.ReadyLogMessageByRole[node.Role]
}
if nodeStartOpts.ReadyLogMessage != "" {
log.Debugf("Waiting for node %s to get ready (Log: '%s')", node.Name, nodeStartOpts.ReadyLogMessage)
if err := NodeWaitForLogMessage(ctx, runtime, node, nodeStartOpts.ReadyLogMessage, startTime); err != nil {
return fmt.Errorf("Node %s failed to get ready: %+v", node.Name, err)
}
} else {

@ -224,9 +224,10 @@ type NodeCreateOpts struct {
// NodeStartOpts describes a set of options one can set when (re-)starting a node
type NodeStartOpts struct {
Wait bool
Timeout time.Duration
NodeHooks []NodeHook `yaml:"nodeHooks,omitempty" json:"nodeHooks,omitempty"`
Wait bool
Timeout time.Duration
NodeHooks []NodeHook `yaml:"nodeHooks,omitempty" json:"nodeHooks,omitempty"`
ReadyLogMessage string
}
// NodeDeleteOpts describes a set of options one can set when deleting a node

Loading…
Cancel
Save