completion [bash | zsh | (psh | powershell)] # generate completion scripts for common shells
cluster [CLUSTERNAME] # default cluster name is 'k3s-default'
cluster [CLUSTERNAME] # default cluster name is 'k3s-default'
create
create
--api-port # specify the port on which the cluster will be accessible (e.g. via kubectl)
-a, --agents # specify how many agent nodes you want to create (integer, default: 0)
-i, --image # specify which k3s image should be used for the nodes
--api-port # specify the port on which the cluster will be accessible (format '[HOST:]HOSTPORT', default: random)
--k3s-agent-arg # add additional arguments to the k3s agent (see https://rancher.com/docs/k3s/latest/en/installation/install-options/agent-config/#k3s-agent-cli-help)
-c, --config # use a config file (format 'PATH')
--k3s-server-arg # add additional arguments to the k3s server (see https://rancher.com/docs/k3s/latest/en/installation/install-options/server-config/#k3s-server-cli-help)
-e, --env # add environment variables to the nodes (quoted string, format: 'KEY[=VALUE][@NODEFILTER[;NODEFILTER...]]', use flag multiple times)
-s, --servers # specify how many server nodes you want to create
--gpus # [from docker CLI] add GPU devices to the node containers (string, e.g. 'all')
--network # specify a network you want to connect to
-i, --image # specify which k3s image should be used for the nodes (string, default: 'docker.io/rancher/k3s:v1.20.0-k3s2', tag changes per build)
--no-hostip # disable the automatic injection of the Host IP as 'host.k3d.internal' into the containers and CoreDN
--k3s-agent-arg # add additional arguments to the k3s agent (quoted string, use flag multiple times) (see https://rancher.com/docs/k3s/latest/en/installation/install-options/agent-config/#k3s-agent-cli-help)
--no-image-volume # disable the creation of a volume for storing images (used for the 'k3d load image' command)
--k3s-server-arg # add additional arguments to the k3s server (quoted string, use flag multiple times) (see https://rancher.com/docs/k3s/latest/en/installation/install-options/server-config/#k3s-server-cli-help)
--no-lb # disable the creation of a LoadBalancer in front of the server nodes
--kubeconfig-switch-context # (implies --kubeconfig-update-default) automatically sets the current-context of your default kubeconfig to the new cluster's context (default: true)
--no-rollback # disable the automatic rollback actions, if anything goes wrong
--kubeconfig-update-default # enable the automated update of the default kubeconfig with the details of the newly created cluster (also sets '--wait=true') (default: true)
-p, --port # add some more port mappings
-l, --label # add (docker) labels to the node containers (format: 'KEY[=VALUE][@NODEFILTER[;NODEFILTER...]]', use flag multiple times)
--token # specify a cluster token (default: auto-generated)
--network # specify an existing (docker) network you want to connect to (string)
--timeout # specify a timeout, after which the cluster creation will be interrupted and changes rolled back
--no-hostip # disable the automatic injection of the Host IP as 'host.k3d.internal' into the containers and CoreDNS (default: false)
--kubeconfig-update-default # enable the automated update of the default kubeconfig with the details of the newly created cluster (also sets '--wait=true')
--no-image-volume # disable the creation of a volume for storing images (used for the 'k3d image import' command) (default: false)
--kubeconfig-switch-context # (implies --kubeconfig-update-default) automatically sets the current-context of your default kubeconfig to the new cluster's context
--no-lb # disable the creation of a load balancer in front of the server nodes (default: false)
-v, --volume # specify additional bind-mounts
--no-rollback # disable the automatic rollback actions, if anything goes wrong (default: false)
--wait # enable waiting for all server nodes to be ready before returning
-p, --port # add some more port mappings (format: '[HOST:][HOSTPORT:]CONTAINERPORT[/PROTOCOL][@NODEFILTER]', use flag multiple times)
-a, --agents # specify how many agent nodes you want to create
--registry-create # create a new (docker) registry dedicated for this cluster (default: false)
-e, --env # add environment variables to the node containers
--registry-use # use an existing local (docker) registry with this cluster (string, use multiple times)
-s, --servers # specify how many server nodes you want to create (integer, default: 1)
--token # specify a cluster token (string, default: auto-generated)
--timeout # specify a timeout, after which the cluster creation will be interrupted and changes rolled back (duration, e.g. '10s')
-v, --volume # specify additional bind-mounts (format: '[SOURCE:]DEST[@NODEFILTER[;NODEFILTER...]]', use flag multiple times)
--wait # enable waiting for all server nodes to be ready before returning (default: true)
start CLUSTERNAME # start a (stopped) cluster
start CLUSTERNAME # start a (stopped) cluster
-a, --all # start all clusters
-a, --all # start all clusters (default: false)
--wait # wait for all servers and server-loadbalancer to be up before returning
--wait # wait for all servers and server-loadbalancer to be up before returning (default: true)
--timeout # maximum waiting time for '--wait' before canceling/returning
--timeout # maximum waiting time for '--wait' before canceling/returning (duration, e.g. '10s')
stop CLUSTERNAME # stop a cluster
stop CLUSTERNAME # stop a cluster
-a, --all # stop all clusters
-a, --all # stop all clusters (default: false)
delete CLUSTERNAME # delete an existing cluster
delete CLUSTERNAME # delete an existing cluster
-a, --all # delete all existing clusters
-a, --all # delete all existing clusters (default: false)
list [CLUSTERNAME [CLUSTERNAME ...]]
list [CLUSTERNAME [CLUSTERNAME ...]]
--no-headers # do not print headers
--no-headers # do not print headers (default: false)
--token # show column with cluster tokens
--token # show column with cluster tokens (default: false)
-o, --output # format the output (format: 'json|yaml')
completion [bash | zsh | fish | (psh | powershell)] # generate completion scripts for common shells
config
init # write a default k3d config (as a starting point)
-f, --force # force overwrite target file (default: false)
-o, --output # file to write to (string, default "k3d-default.yaml")
help [COMMAND] # show help text for any command
image
import [IMAGE | ARCHIVE [IMAGE | ARCHIVE ...]] # Load one or more images from the local runtime environment or tar-archives into k3d clusters
-c, --cluster # clusters to load the image into (string, use flag multiple times, default: k3s-default)
-k, --keep-tarball # do not delete the image tarball from the shared volume after completion (default: false)
kubeconfig
get (CLUSTERNAME [CLUSTERNAME ...] | --all) # get kubeconfig from cluster(s) and write it to stdout
-a, --all # get kubeconfigs from all clusters (default: false)
merge | write (CLUSTERNAME [CLUSTERNAME ...] | --all) # get kubeconfig from cluster(s) and merge it/them into a (kubeconfig-)file
-a, --all # get kubeconfigs from all clusters (default: false)
-s, --kubeconfig-switch-context # switch current-context in kubeconfig to the new context (default: true)
-d, --kubeconfig-merge-default # update the default kubeconfig (usually $KUBECONFIG or $HOME/.kube/config)
-o, --output # specify the output file where the kubeconfig should be written to (string)
create NODENAME # Create new nodes (and add them to existing clusters)
create NODENAME # Create new nodes (and add them to existing clusters)
-c, --cluster # specify the cluster that the node shall connect to
-c, --cluster # specify the cluster that the node shall connect to (string, default: k3s-default)
-i, --image # specify which k3s image should be used for the node(s)
-i, --image # specify which k3s image should be used for the node(s) (string, default: 'docker.io/rancher/k3s:v1.20.0-k3s2', tag changes per build)
--replicas # specify how many replicas you want to create with this spec
--replicas # specify how many replicas you want to create with this spec (integer, default: 1)
--role # specify the node role
--role # specify the node role (string, format: 'agent|server', default: agent)
--wait # wait for the node to be up and running before returning
--timeout # specify a timeout duration, after which the node creation will be interrupted, if not done yet (duration, e.g. '10s')
--timeout # specify a timeout duration, after which the node creation will be interrupted, if not done yet
--wait # wait for the node to be up and running before returning (default: true)
start NODENAME # start a (stopped) node
start NODENAME # start a (stopped) node
stop NODENAME # stop a node
stop NODENAME # stop a node
delete NODENAME # delete an existing node
delete NODENAME # delete an existing node
-a, --all # delete all existing nodes
-a, --all # delete all existing nodes (default: false)
list NODENAME
list NODENAME
--no-headers # do not print headers
--no-headers # do not print headers (default: false)
kubeconfig
registry
get (CLUSTERNAME [CLUSTERNAME ...] | --all) # get kubeconfig from cluster(s) and write it to stdout
create REGISTRYNAME
-a, --all # get kubeconfigs from all clusters
-i, --image # specify image used for the registry (string, default: "docker.io/library/registry:2")
merge | write (CLUSTERNAME [CLUSTERNAME ...] | --all) # get kubeconfig from cluster(s) and merge it/them into into a file in $HOME/.k3d (or whatever you specify via the flags)
-p, --port # select host port to map to (format: '[HOST:]HOSTPORT', default: 'random')
-a, --all # get kubeconfigs from all clusters
delete REGISTRYNAME
--output # specify the output file where the kubeconfig should be written to
-a, --all # delete all existing registries (default: false)
If you want to run CUDA workloads on the K3S container you need to customize the container.
If you want to run CUDA workloads on the K3S container you need to customize the container.
CUDA workloads require the NVIDIA Container Runtime, so containerd needs to be configured to use this runtime.
CUDA workloads require the NVIDIA Container Runtime, so containerd needs to be configured to use this runtime.
The K3S container itself also needs to run with this runtime. If you are using Docker you can install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
The K3S container itself also needs to run with this runtime. If you are using Docker you can install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).
## Building a customized K3S image
## Building a customized K3S image
To get the NVIDIA container runtime in the K3S image you need to build your own K3S image. The native K3S image is based on Alpine but the NVIDIA container runtime is not supported on Alpine yet. To get around this we need to build the image with a supported base image.
To get the NVIDIA container runtime in the K3S image you need to build your own K3S image. The native K3S image is based on Alpine but the NVIDIA container runtime is not supported on Alpine yet. To get around this we need to build the image with a supported base image.
### Adapt the Dockerfile
### Adapt the Dockerfile
@ -48,13 +50,16 @@ ENV PATH="$PATH:/bin/aux"
ENTRYPOINT ["/bin/k3s"]
ENTRYPOINT ["/bin/k3s"]
CMD ["agent"]
CMD ["agent"]
```
```
This [Dockerfile](cuda/Dockerfile) is based on the [K3S Dockerfile](https://github.com/rancher/k3s/blob/master/package/Dockerfile).
This [Dockerfile](cuda/Dockerfile) is based on the [K3S Dockerfile](https://github.com/rancher/k3s/blob/master/package/Dockerfile).
The following changes are applied:
The following changes are applied:
1. Change the base images to Ubuntu 18.04 so the NVIDIA Container Runtime can be installed
1. Change the base images to Ubuntu 18.04 so the NVIDIA Container Runtime can be installed
2. Add a custom containerd `config.toml` template to add the NVIDIA Container Runtime. This replaces the default `runc` runtime
2. Add a custom containerd `config.toml` template to add the NVIDIA Container Runtime. This replaces the default `runc` runtime
3. Add a manifest for the NVIDIA driver plugin for Kubernetes
3. Add a manifest for the NVIDIA driver plugin for Kubernetes
### Configure containerd
### Configure containerd
We need to configure containerd to use the NVIDIA Container Runtime. We need to customize the config.toml that is used at startup. K3S provides a way to do this using a [config.toml.tmpl](cuda/config.toml.tmpl) file. More information can be found on the [K3S site](https://rancher.com/docs/k3s/latest/en/advanced/#configuring-containerd).
We need to configure containerd to use the NVIDIA Container Runtime. We need to customize the config.toml that is used at startup. K3S provides a way to do this using a [config.toml.tmpl](cuda/config.toml.tmpl) file. More information can be found on the [K3S site](https://rancher.com/docs/k3s/latest/en/advanced/#configuring-containerd).
```go
```go
@ -116,7 +121,9 @@ We need to configure containerd to use the NVIDIA Container Runtime. We need to
```
```
### The NVIDIA device plugin
### The NVIDIA device plugin
To enable NVIDIA GPU support on Kubernetes you also need to install the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin). The device plugin is a deamonset and allows you to automatically:
To enable NVIDIA GPU support on Kubernetes you also need to install the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin). The device plugin is a deamonset and allows you to automatically:
* Expose the number of GPUs on each nodes of your cluster
* Expose the number of GPUs on each nodes of your cluster
* Keep track of the health of your GPUs
* Keep track of the health of your GPUs
* Run GPU enabled containers in your Kubernetes cluster.
* Run GPU enabled containers in your Kubernetes cluster.
@ -166,9 +173,11 @@ spec:
```
```
### Build the K3S image
### Build the K3S image
To build the custom image we need to build K3S because we need the generated output.
To build the custom image we need to build K3S because we need the generated output.
Put the following files in a directory:
Put the following files in a directory:
* [Dockerfile](cuda/Dockerfile)
* [Dockerfile](cuda/Dockerfile)
* [config.toml.tmpl](cuda/config.toml.tmpl)
* [config.toml.tmpl](cuda/config.toml.tmpl)
* [gpu.yaml](cuda/gpu.yaml)
* [gpu.yaml](cuda/gpu.yaml)
@ -176,6 +185,7 @@ Put the following files in a directory:
k3d cluster create --no-lb --image k3s-gpu:v1.18.10-k3s1 --gpus all
k3d cluster create --no-lb --image k3s-gpu:v1.18.10-k3s1 --gpus all
```
```
Deploy a [test pod](cuda/cuda-vector-add.yaml):
Deploy a [test pod](cuda/cuda-vector-add.yaml):
```
```bash
kubectl apply -f cuda-vector-add.yaml
kubectl apply -f cuda-vector-add.yaml
kubectl logs cuda-vector-add
kubectl logs cuda-vector-add
```
```
## Known issues
## Known issues
* This approach does not work on WSL2 yet. The NVIDIA driver plugin and container runtime rely on the NVIDIA Management Library (NVML) which is not yet supported. See the [CUDA on WSL User Guide](https://docs.nvidia.com/cuda/wsl-user-guide/index.html#known-limitations).
* This approach does not work on WSL2 yet. The NVIDIA driver plugin and container runtime rely on the NVIDIA Management Library (NVML) which is not yet supported. See the [CUDA on WSL User Guide](https://docs.nvidia.com/cuda/wsl-user-guide/index.html#known-limitations).
## Acknowledgements:
## Acknowledgements
Most of the information in this article was obtained from various sources:
Most of the information in this article was obtained from various sources:
* [Add NVIDIA GPU support to k3s with containerd](https://dev.to/mweibel/add-nvidia-gpu-support-to-k3s-with-containerd-4j17)
* [Add NVIDIA GPU support to k3s with containerd](https://dev.to/mweibel/add-nvidia-gpu-support-to-k3s-with-containerd-4j17)
In this example, we will deploy a simple nginx webserver deployment and make it accessible via ingress.
In this example, we will deploy a simple nginx webserver deployment and make it accessible via ingress.
Therefore, we have to create the cluster in a way, that the internal port 80 (where the `traefik` ingress controller is listening on) is exposed on the host system.
Therefore, we have to create the cluster in a way, that the internal port 80 (where the `traefik` ingress controller is listening on) is exposed on the host system.
@ -16,7 +16,7 @@ Therefore, we have to create the cluster in a way, that the internal port 80 (wh
- the `loadbalancer` nodefilter matches only the `serverlb` that's deployed in front of a cluster's server nodes
- the `loadbalancer` nodefilter matches only the `serverlb` that's deployed in front of a cluster's server nodes
- all ports exposed on the `serverlb` will be proxied to the same ports on all server nodes in the cluster
- all ports exposed on the `serverlb` will be proxied to the same ports on all server nodes in the cluster
2. Get the kubeconfig file
2. Get the kubeconfig file (redundant, as `k3d cluster create` already merges it into your default kubeconfig file)
@ -65,6 +65,7 @@ Therefore, we have to create the cluster in a way, that the internal port 80 (wh
- **Note**: Kubernetes' default NodePort range is [`30000-32767`](https://kubernetes.io/docs/concepts/services-networking/service/#nodeport)
- **Note**: Kubernetes' default NodePort range is [`30000-32767`](https://kubernetes.io/docs/concepts/services-networking/service/#nodeport)
- **Note**: You may as well expose the whole NodePort range from the very beginning, e.g. via `k3d cluster create mycluster --agents 3 -p "30000-32767:30000-32767@server[0]"` (See [this video from @portainer](https://www.youtube.com/watch?v=5HaU6338lAk))
- **Note**: You may as well expose the whole NodePort range from the very beginning, e.g. via `k3d cluster create mycluster --agents 3 -p "30000-32767:30000-32767@server[0]"` (See [this video from @portainer](https://www.youtube.com/watch?v=5HaU6338lAk))
- **Warning**: Docker creates iptable entries and a new proxy process per port-mapping, so this may take a very long time or even freeze your system!
@ -65,12 +65,27 @@ Finally, we can create the cluster, mounting the CA file in the path we specifie
## Using a local registry
## Using a local registry
### Using the k3d registry
### Using k3d-managed registries
!!! info "Not ported yet"
!!! info "Not ported yet"
The k3d-managed registry has not yet been ported from v1.x to v3.x
The k3d-managed registry is available again as of k3d v4.0.0 (January 2021)
### Using your own local registry
#### Create a dedicated registry together with your cluster
1. `#!bash k3d cluster create mycluster --registry-create`: This creates your cluster `mycluster` together with a registry container called `k3d-mycluster-registry`
- k3d sets everything up in the cluster for containerd to be able to pull images from that registry (using the `registries.yaml` file)
- the port, which the registry is listening on will be mapped to a random port on your host system
2. Check the k3d command output or `#!bash docker ps -f name=k3d-mycluster-registry` to find the exposed port (let's use `12345` here)
3. Pull some image (optional) `#!bash docker pull alpine:latest`, re-tag it to reference your newly created registry `#!bash docker tag alpine:latest k3d-mycluster-registry:12345/testimage:local` and push it `#!bash docker push k3d-mycluster-registry:12345/testimage:local`
4. Use kubectl to create a new pod in your cluster using that image to see, if the cluster can pull from the new registry: `#!bash kubectl run --image k3d-mycluster-registry:12345/testimage:local testimage --command -- tail -f /dev/null` (creates a container that will not do anything but keep on running)
#### Create a customized k3d-managed registry
1. `#!bash k3d registry create myregistry.localhost --port 5111` creates a new registry called `myregistry.localhost` (could be used with automatic resolution of `*.localhost`, see next section)
2. `#!bash k3d cluster create newcluster --registry-use k3d-myregistry.localhost:5111` (make sure you use the `k3d-` prefix here) creates a new cluster set up to us that registry
3. continue with step 3 and 4 from the last section for testing
### Using your own (not k3d-managed) local registry
You can start your own local registry it with some `docker` commands, like:
You can start your own local registry it with some `docker` commands, like:
@ -103,14 +118,14 @@ Once again, this will only work with k3s >= v0.10.0 (see the some sections below
You should test that you can
You should test that you can
* push to your registry from your local development machine.
- push to your registry from your local development machine.
* use images from that registry in `Deployments` in your k3d cluster.
- use images from that registry in `Deployments` in your k3d cluster.
We will verify these two things for a local registry (located at `registry.localhost:5000`) running in your development machine. Things would be basically the same for checking an external registry, but some additional configuration could be necessary in your local machine when using an authenticated or secure registry (please refer to Docker's documentation for this).
We will verify these two things for a local registry (located at `registry.localhost:5000`) running in your development machine. Things would be basically the same for checking an external registry, but some additional configuration could be necessary in your local machine when using an authenticated or secure registry (please refer to Docker's documentation for this).
First, we can download some image (like `nginx`) and push it to our local registry with:
First, we can download some image (like `nginx`) and push it to our local registry with:
```shell script
```bash
docker pull nginx:latest
docker pull nginx:latest
docker tag nginx:latest registry.localhost:5000/nginx:latest
docker tag nginx:latest registry.localhost:5000/nginx:latest
@ -40,6 +40,6 @@ It will also delete the respective kubeconfig file in `$HOME/.k3d/` if it exists
## Handling multiple clusters
## Handling multiple clusters
`k3d kubeconfig merge` let's you specify one or more clusters via arguments _or_ all via `--all`.
`k3d kubeconfig merge` let's you specify one or more clusters via arguments _or_ all via `--all`.
All kubeconfigs will then be merged into a single file if `--merge-default-kubeconfig` or `--output` is specified.
All kubeconfigs will then be merged into a single file if `--kubeconfig-merge-default` or `--output` is specified.
If none of those two flags was specified, a new file will be created per cluster and the merged path (e.g. `$HOME/.k3d/kubeconfig-cluster1.yaml:$HOME/.k3d/cluster2.yaml`) will be returned.
If none of those two flags was specified, a new file will be created per cluster and the merged path (e.g. `$HOME/.k3d/kubeconfig-cluster1.yaml:$HOME/.k3d/cluster2.yaml`) will be returned.
Note, that with multiple cluster specified, the `--kubeconfig-switch-context` flag will change the current context to the cluster which was last in the list.
Note, that with multiple cluster specified, the `--kubeconfig-switch-context` flag will change the current context to the cluster which was last in the list.