From 99f5a6884a84e0df7f202f4a1e4a9189d4a7d833 Mon Sep 17 00:00:00 2001 From: iwilltry42 Date: Fri, 29 May 2020 11:21:12 +0200 Subject: [PATCH] getNodeLogs: accept time.Time parameter - fixes startCluster's --wait/--timeout functionality --> now doesn't check all the logs (e.g. from previous run) ----> no more false positives when checking for a specifc log message only in the current run --- pkg/cluster/cluster.go | 26 +++++++++++++++++--------- pkg/cluster/node.go | 4 ++-- pkg/runtimes/containerd/node.go | 3 ++- pkg/runtimes/docker/node.go | 8 ++++++-- pkg/runtimes/runtime.go | 3 ++- tests/test_full_lifecycle.sh | 5 +---- 6 files changed, 30 insertions(+), 19 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index d9125878..1461a525 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -192,7 +192,7 @@ func CreateCluster(ctx context.Context, cluster *k3d.Cluster, runtime k3drt.Runt default: } log.Debugln("Waiting for initializing master node...") - logreader, err := runtime.GetNodeLogs(cluster.InitNode) + logreader, err := runtime.GetNodeLogs(cluster.InitNode, time.Time{}) if err != nil { if logreader != nil { logreader.Close() @@ -254,7 +254,7 @@ func CreateCluster(ctx context.Context, cluster *k3d.Cluster, runtime k3drt.Runt // TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"` // ... by scanning for this line in logs and restarting the container in case it appears log.Debugf("Starting to wait for master node '%s'", masterNode.Name) - return WaitForNodeLogMessage(ctx, runtime, masterNode, "Wrote kubeconfig") + return WaitForNodeLogMessage(ctx, runtime, masterNode, "Wrote kubeconfig", time.Time{}) }) } } @@ -475,6 +475,8 @@ func generateNodeName(cluster string, role k3d.Role, suffix int) string { func StartCluster(ctx context.Context, cluster *k3d.Cluster, runtime k3drt.Runtime, startClusterOpts types.StartClusterOpts) error { log.Infof("Starting cluster '%s'", cluster.Name) + start := time.Now() + if startClusterOpts.Timeout > 0*time.Second { var cancel context.CancelFunc ctx, cancel = context.WithTimeout(ctx, startClusterOpts.Timeout) @@ -508,17 +510,11 @@ func StartCluster(ctx context.Context, cluster *k3d.Cluster, runtime k3drt.Runti // TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"` // ... by scanning for this line in logs and restarting the container in case it appears log.Debugf("Starting to wait for master node '%s'", masterNode.Name) - return WaitForNodeLogMessage(ctx, runtime, masterNode, "Wrote kubeconfig") + return WaitForNodeLogMessage(ctx, runtime, masterNode, "Wrote kubeconfig", start) }) } } - if err := waitForMasterWaitgroup.Wait(); err != nil { - log.Errorln("Failed to bring up all master nodes in time. Check the logs:") - log.Errorln(">>> ", err) - return fmt.Errorf("Failed to bring up cluster") - } - // start masterlb if masterlb != nil { log.Debugln("Starting masterlb...") @@ -526,6 +522,18 @@ func StartCluster(ctx context.Context, cluster *k3d.Cluster, runtime k3drt.Runti log.Warningf("Failed to start masterlb '%s': Try to start it manually", masterlb.Name) failed++ } + waitForMasterWaitgroup.Go(func() error { + // TODO: avoid `level=fatal msg="starting kubernetes: preparing server: post join: a configuration change is already in progress (5)"` + // ... by scanning for this line in logs and restarting the container in case it appears + log.Debugf("Starting to wait for loadbalancer node '%s'", masterlb.Name) + return WaitForNodeLogMessage(ctx, runtime, masterlb, "start worker processes", start) + }) + } + + if err := waitForMasterWaitgroup.Wait(); err != nil { + log.Errorln("Failed to bring up all nodes in time. Check the logs:") + log.Errorln(">>> ", err) + return fmt.Errorf("Failed to bring up cluster") } if failed > 0 { diff --git a/pkg/cluster/node.go b/pkg/cluster/node.go index 7f93aea0..beade981 100644 --- a/pkg/cluster/node.go +++ b/pkg/cluster/node.go @@ -233,7 +233,7 @@ func GetNode(node *k3d.Node, runtime runtimes.Runtime) (*k3d.Node, error) { } // WaitForNodeLogMessage follows the logs of a node container and returns if it finds a specific line in there (or timeout is reached) -func WaitForNodeLogMessage(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, message string) error { +func WaitForNodeLogMessage(ctx context.Context, runtime runtimes.Runtime, node *k3d.Node, message string, since time.Time) error { for { select { case <-ctx.Done(): @@ -242,7 +242,7 @@ func WaitForNodeLogMessage(ctx context.Context, runtime runtimes.Runtime, node * } // read the logs - out, err := runtime.GetNodeLogs(node) + out, err := runtime.GetNodeLogs(node, since) if err != nil { if out != nil { out.Close() diff --git a/pkg/runtimes/containerd/node.go b/pkg/runtimes/containerd/node.go index 1547ac07..259f16e4 100644 --- a/pkg/runtimes/containerd/node.go +++ b/pkg/runtimes/containerd/node.go @@ -25,6 +25,7 @@ package containerd import ( "context" "io" + "time" "github.com/containerd/containerd" "github.com/containerd/containerd/containers" @@ -119,7 +120,7 @@ func (d Containerd) GetNode(node *k3d.Node) (*k3d.Node, error) { } // GetNodeLogs returns the logs from a given node -func (d Containerd) GetNodeLogs(node *k3d.Node) (io.ReadCloser, error) { +func (d Containerd) GetNodeLogs(node *k3d.Node, since time.Time) (io.ReadCloser, error) { return nil, nil } diff --git a/pkg/runtimes/docker/node.go b/pkg/runtimes/docker/node.go index 8dfd5662..bfc17a45 100644 --- a/pkg/runtimes/docker/node.go +++ b/pkg/runtimes/docker/node.go @@ -218,7 +218,7 @@ func (d Docker) GetNode(node *k3d.Node) (*k3d.Node, error) { } // GetNodeLogs returns the logs from a given node -func (d Docker) GetNodeLogs(node *k3d.Node) (io.ReadCloser, error) { +func (d Docker) GetNodeLogs(node *k3d.Node, since time.Time) (io.ReadCloser, error) { // get the container for the given node container, err := getNodeContainer(node) if err != nil { @@ -244,7 +244,11 @@ func (d Docker) GetNodeLogs(node *k3d.Node) (io.ReadCloser, error) { return nil, fmt.Errorf("Node '%s' (container '%s') not running", node.Name, containerInspectResponse.ID) } - logreader, err := docker.ContainerLogs(ctx, container.ID, types.ContainerLogsOptions{ShowStdout: true, ShowStderr: true}) + sinceStr := "" + if !since.IsZero() { + sinceStr = since.Format("2006-01-02T15:04:05") + } + logreader, err := docker.ContainerLogs(ctx, container.ID, types.ContainerLogsOptions{ShowStdout: true, ShowStderr: true, Since: sinceStr}) if err != nil { log.Errorf("Failed to get logs from node '%s' (container '%s')", node.Name, container.ID) return nil, err diff --git a/pkg/runtimes/runtime.go b/pkg/runtimes/runtime.go index 428e0c61..fc05c465 100644 --- a/pkg/runtimes/runtime.go +++ b/pkg/runtimes/runtime.go @@ -24,6 +24,7 @@ package runtimes import ( "fmt" "io" + "time" "github.com/rancher/k3d/pkg/runtimes/containerd" "github.com/rancher/k3d/pkg/runtimes/docker" @@ -56,7 +57,7 @@ type Runtime interface { GetRuntimePath() string // returns e.g. '/var/run/docker.sock' for a default docker setup ExecInNode(*k3d.Node, []string) error // DeleteContainer() error - GetNodeLogs(*k3d.Node) (io.ReadCloser, error) + GetNodeLogs(*k3d.Node, time.Time) (io.ReadCloser, error) } // GetRuntime checks, if a given name is represented by an implemented k3d runtime and returns it diff --git a/tests/test_full_lifecycle.sh b/tests/test_full_lifecycle.sh index 12968ace..e73ebb01 100755 --- a/tests/test_full_lifecycle.sh +++ b/tests/test_full_lifecycle.sh @@ -30,10 +30,7 @@ check_clusters "$clustername" && failed "cluster was not stopped, since we still # 3. start the cluster info "Starting cluster..." -$EXE start cluster "$clustername" - -info "Sleeping for 5 seconds to give the cluster enough time to get ready..." -sleep 5 +$EXE start cluster "$clustername" --wait --timeout 360s || failed "cluster didn't come back in time" info "Checking that we have access to the cluster..." check_clusters "$clustername" || failed "error checking cluster"