properly handle initNode and normalize waiting for log messages to determine successful node starts

pull/471/head
iwilltry42 4 years ago
parent bad77f0027
commit e7c43df434
No known key found for this signature in database
GPG Key ID: 7BA57AD1CFF16110
  1. 70
      pkg/client/cluster.go
  2. 35
      pkg/client/node.go
  3. 26
      pkg/runtimes/docker/translate.go
  4. 1
      pkg/types/types.go

@ -22,7 +22,6 @@ THE SOFTWARE.
package client
import (
"bytes"
"context"
"errors"
"fmt"
@ -392,6 +391,7 @@ ClusterCreatOpts:
// the cluster has an init server node, but its not this one, so connect it to the init node
if cluster.InitNode != nil && !node.ServerOpts.IsInit {
node.Env = append(node.Env, fmt.Sprintf("K3S_URL=%s", connectionURL))
node.Labels[k3d.LabelServerIsInit] = "false" // set label, that this server node is not the init server
}
} else if node.Role == k3d.AgentRole {
@ -425,6 +425,10 @@ ClusterCreatOpts:
if cluster.InitNode != nil {
log.Infoln("Creating initializing server node")
cluster.InitNode.Args = append(cluster.InitNode.Args, "--cluster-init")
if cluster.InitNode.Labels == nil {
cluster.InitNode.Labels = map[string]string{}
}
cluster.InitNode.Labels[k3d.LabelServerIsInit] = "true" // set label, that this server node is the init server
// in case the LoadBalancer was disabled, expose the API Port on the initializing server node
if clusterCreateOpts.DisableLoadBalancer {
@ -797,39 +801,11 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
for _, n := range cluster.Nodes {
if n.Role == k3d.ServerRole && n.ServerOpts.IsInit {
if err := NodeStart(ctx, runtime, n, k3d.NodeStartOpts{
Wait: true, // always wait for the init node
NodeHooks: startClusterOpts.NodeHooks,
}); err != nil {
return fmt.Errorf("Failed to start initializing server node: %+v", err)
}
// wait for the initnode to come up before doing anything else
for {
select {
case <-ctx.Done():
log.Errorln("Failed to bring up initializing server node in time")
return fmt.Errorf(">>> %w", ctx.Err())
default:
}
log.Debugln("Waiting for initializing server node...")
logreader, err := runtime.GetNodeLogs(ctx, cluster.InitNode, time.Time{})
if err != nil {
if logreader != nil {
logreader.Close()
}
log.Errorln(err)
log.Errorln("Failed to get logs from the initializig server node.. waiting for 3 seconds instead")
time.Sleep(3 * time.Second)
break
}
defer logreader.Close()
buf := new(bytes.Buffer)
nRead, _ := buf.ReadFrom(logreader)
logreader.Close()
if nRead > 0 && strings.Contains(buf.String(), k3d.ReadyLogMessageByRole[k3d.ServerRole]) {
log.Debugln("Initializing server node is up... continuing")
break
}
time.Sleep(time.Second)
}
break
}
}
@ -847,24 +823,27 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
continue
}
// skip init node here, as it should be running already
if node == cluster.InitNode || node.ServerOpts.IsInit {
continue
}
// check if node is running already to avoid waiting forever when checking for the node log message
if !node.State.Running {
// start node
if err := NodeStart(ctx, runtime, node, k3d.NodeStartOpts{
nodeStartOpts := k3d.NodeStartOpts{
NodeHooks: startClusterOpts.NodeHooks,
}); err != nil {
log.Warningf("Failed to start node '%s': Try to start it manually", node.Name)
failed++
continue
}
// wait for this server node to be ready (by checking the logs for a specific log message)
if node.Role == k3d.ServerRole && startClusterOpts.WaitForServer {
log.Debugf("Waiting for server node '%s' to get ready", node.Name)
if err := NodeWaitForLogMessage(ctx, runtime, node, k3d.ReadyLogMessageByRole[k3d.ServerRole], start); err != nil {
return fmt.Errorf("Server node '%s' failed to get ready: %+v", node.Name, err)
}
nodeStartOpts.Wait = true
}
// start node
if err := NodeStart(ctx, runtime, node, nodeStartOpts); err != nil {
log.Warningf("Failed to start node '%s': Try to start it manually", node.Name)
failed++
continue
}
} else {
@ -883,8 +862,13 @@ func ClusterStart(ctx context.Context, runtime k3drt.Runtime, cluster *k3d.Clust
// TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"`
// ... by scanning for this line in logs and restarting the container in case it appears
log.Debugf("Starting to wait for loadbalancer node '%s'", serverlb.Name)
if err := NodeWaitForLogMessage(ctx, runtime, serverlb, k3d.ReadyLogMessageByRole[k3d.LoadBalancerRole], start); err != nil {
return fmt.Errorf("Loadbalancer '%s' failed to get ready: %+v", serverlb.Name, err)
readyLogMessage := k3d.ReadyLogMessageByRole[k3d.LoadBalancerRole]
if readyLogMessage != "" {
if err := NodeWaitForLogMessage(ctx, runtime, serverlb, readyLogMessage, start); err != nil {
return fmt.Errorf("Loadbalancer '%s' failed to get ready: %+v", serverlb.Name, err)
}
} else {
log.Warnf("ClusterStart: Set to wait for node %s to be ready, but there's no target log message defined", serverlb.Name)
}
} else {
log.Infof("Serverlb '%s' already running", serverlb.Name)

@ -161,7 +161,12 @@ func NodeAddToClusterMulti(ctx context.Context, runtime runtimes.Runtime, nodes
currentNode := node
nodeWaitGroup.Go(func() error {
log.Debugf("Starting to wait for node '%s'", currentNode.Name)
return NodeWaitForLogMessage(ctx, runtime, currentNode, k3d.ReadyLogMessageByRole[currentNode.Role], time.Time{})
readyLogMessage := k3d.ReadyLogMessageByRole[currentNode.Role]
if readyLogMessage != "" {
return NodeWaitForLogMessage(ctx, runtime, currentNode, readyLogMessage, time.Time{})
}
log.Warnf("NodeAddToClusterMulti: Set to wait for node %s to get ready, but there's no target log message defined", currentNode.Name)
return nil
})
}
}
@ -191,7 +196,12 @@ func NodeCreateMulti(ctx context.Context, runtime runtimes.Runtime, nodes []*k3d
currentNode := node
nodeWaitGroup.Go(func() error {
log.Debugf("Starting to wait for node '%s'", currentNode.Name)
return NodeWaitForLogMessage(ctx, runtime, currentNode, k3d.ReadyLogMessageByRole[currentNode.Role], time.Time{})
readyLogMessage := k3d.ReadyLogMessageByRole[currentNode.Role]
if readyLogMessage != "" {
return NodeWaitForLogMessage(ctx, runtime, currentNode, readyLogMessage, time.Time{})
}
log.Warnf("NodeCreateMulti: Set to wait for node %s to get ready, but there's no target log message defined", currentNode.Name)
return nil
})
}
}
@ -226,17 +236,32 @@ func NodeRun(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, node
func NodeStart(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, nodeStartOpts k3d.NodeStartOpts) error {
for _, hook := range nodeStartOpts.NodeHooks {
if hook.Stage == k3d.LifecycleStagePreStart {
log.Tracef("Executing preStartAction '%s'", reflect.TypeOf(hook))
log.Tracef("Node %s: Executing preStartAction '%s'", node.Name, reflect.TypeOf(hook))
if err := hook.Action.Run(ctx, node); err != nil {
log.Errorf("Failed executing preStartAction '%+v': %+v", hook, err)
log.Errorf("Node %s: Failed executing preStartAction '%+v': %+v", node.Name, hook, err)
}
}
}
log.Tracef("Starting node '%s'", node.Name)
startTime := time.Now()
if err := runtime.StartNode(ctx, node); err != nil {
log.Errorf("Failed to start node *'%s'", node.Name)
log.Errorf("Failed to start node '%s'", node.Name)
return err
}
if nodeStartOpts.Wait {
log.Debugf("Waiting for node %s to get ready", node.Name)
readyLogMessage := k3d.ReadyLogMessageByRole[node.Role]
if readyLogMessage != "" {
if err := NodeWaitForLogMessage(ctx, runtime, node, readyLogMessage, startTime); err != nil {
return fmt.Errorf("Node %s failed to get ready: %+v", node.Name, err)
}
} else {
log.Warnf("NodeStart: Set to wait for node %s to be ready, but there's no target log message defined", node.Name)
}
}
return nil
}

@ -144,7 +144,7 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
// first, make sure, that it's actually a k3d managed container by checking if it has all the default labels
for k, v := range k3d.DefaultObjectLabels {
log.Tracef("TranslateContainerDetailsToNode: Checking for default object label %s=%s", k, v)
log.Tracef("TranslateContainerDetailsToNode: Checking for default object label %s=%s on container %s", k, v, containerDetails.Name)
found := false
for lk, lv := range containerDetails.Config.Labels {
if lk == k && lv == v {
@ -176,8 +176,30 @@ func TranslateContainerDetailsToNode(containerDetails types.ContainerJSON) (*k3d
}
orderedNetworks = append(orderedNetworks, otherNetworks...)
// serverOpts
/**
* ServerOpts
*/
// IsInit
serverOpts := k3d.ServerOpts{IsInit: false}
clusterInitFlagSet := false
for _, arg := range containerDetails.Args {
if strings.Contains(arg, "--cluster-init") {
clusterInitFlagSet = true
break
}
}
if serverIsInitLabel, ok := containerDetails.Config.Labels[k3d.LabelServerIsInit]; ok {
if serverIsInitLabel == "true" {
if !clusterInitFlagSet {
log.Errorf("Container %s has label %s=true, but the args do not contain the --cluster-init flag", containerDetails.Name, k3d.LabelServerIsInit)
} else {
serverOpts.IsInit = true
}
}
}
// Kube API
serverOpts.KubeAPI = &k3d.ExposureOpts{}
for k, v := range containerDetails.Config.Labels {
if k == k3d.LabelServerAPIHostIP {

@ -121,6 +121,7 @@ const (
LabelServerAPIPort string = "k3d.server.api.port"
LabelServerAPIHost string = "k3d.server.api.host"
LabelServerAPIHostIP string = "k3d.server.api.hostIP"
LabelServerIsInit string = "k3d.server.init"
LabelRegistryHost string = "k3d.registry.host"
LabelRegistryHostIP string = "k3d.registry.hostIP"
LabelRegistryPortExternal string = "k3s.registry.port.external"

Loading…
Cancel
Save