@ -1,47 +0,0 @@
ARG K3S_TAG="v1.21.2-k3s1"
FROM rancher/k3s:$K3S_TAG as k3s
FROM nvidia/cuda:11.2.0-base-ubuntu18.04
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
RUN apt-get update && \
apt-get -y install gnupg2 curl
# Install NVIDIA Container Runtime
RUN curl -s -L | apt-key add -
RUN curl -s -L | tee /etc/apt/sources.list.d/nvidia-container-runtime.list
RUN apt-get update && \
apt-get -y install nvidia-container-runtime=${NVIDIA_CONTAINER_RUNTIME_VERSION}
COPY --from=k3s / /
RUN mkdir -p /etc && \
echo 'hosts: files dns' > /etc/nsswitch.conf
RUN chmod 1777 /tmp
# Provide custom containerd configuration to configure the nvidia-container-runtime
RUN mkdir -p /var/lib/rancher/k3s/agent/etc/containerd/
COPY config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
# Deploy the nvidia driver plugin on startup
RUN mkdir -p /var/lib/rancher/k3s/server/manifests
COPY device-plugin-daemonset.yaml /var/lib/rancher/k3s/server/manifests/nvidia-device-plugin-daemonset.yaml
VOLUME /var/lib/kubelet
VOLUME /var/lib/rancher/k3s
VOLUME /var/lib/cni
VOLUME /var/log
ENV PATH="$PATH:/bin/aux"
ENTRYPOINT ["/bin/k3s"]
CMD ["agent"]

@ -1,21 +0,0 @@
set -euxo pipefail
K3S_TAG=${K3S_TAG:="v1.21.2-k3s1"} # replace + with -, if needed
# due to some unknown reason, copying symlinks fails with buildkit enabled
DOCKER_BUILDKIT=0 docker build \
--build-arg K3S_TAG=$K3S_TAG \
-t $IMAGE .
docker push $IMAGE
echo "Done!"

@ -1,55 +0,0 @@
path = "{{ .NodeConfig.Containerd.Opt }}"
stream_server_address = ""
stream_server_port = "10010"
{{- if .IsRunningInUserNS }}
disable_cgroup = true
disable_apparmor = true
restrict_oom_score_adj = true
{{- if .NodeConfig.AgentConfig.PauseImage }}
sandbox_image = "{{ .NodeConfig.AgentConfig.PauseImage }}"
{{- if not .NodeConfig.NoFlannel }}
bin_dir = "{{ .NodeConfig.AgentConfig.CNIBinDir }}"
conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}"
# ---- changed from 'io.containerd.runc.v2' for GPU support
runtime_type = "io.containerd.runtime.v1.linux"
# ---- added for GPU support
runtime = "nvidia-container-runtime"
{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
{{range $k, $v := .PrivateRegistryConfig.Mirrors }}
endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}]
{{range $k, $v := .PrivateRegistryConfig.Configs }}
{{ if $v.Auth }}
{{ if $v.Auth.Username }}username = "{{ $v.Auth.Username }}"{{end}}
{{ if $v.Auth.Password }}password = "{{ $v.Auth.Password }}"{{end}}
{{ if $v.Auth.Auth }}auth = "{{ $v.Auth.Auth }}"{{end}}
{{ if $v.Auth.IdentityToken }}identitytoken = "{{ $v.Auth.IdentityToken }}"{{end}}
{{ if $v.TLS }}
{{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}}
{{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}}
{{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}}

@ -1,12 +0,0 @@
apiVersion: v1
kind: Pod
name: cuda-vector-add
restartPolicy: OnFailure
- name: cuda-vector-add
image: ""
limits: 1

@ -1,41 +0,0 @@
apiVersion: apps/v1
kind: DaemonSet
name: nvidia-device-plugin-daemonset
namespace: kube-system
name: nvidia-device-plugin-ds
# Mark this pod as a critical add-on; when enabled, the critical add-on scheduler
# reserves resources for critical add-on pods so that they can be rescheduled after
# a failure. This annotation works in tandem with the toleration below.
annotations: ""
name: nvidia-device-plugin-ds
# Allow this pod to be rescheduled while the node is in "critical add-ons only" mode.
# This, along with the annotation above marks this pod as a critical add-on.
- key: CriticalAddonsOnly
operator: Exists
- env:
value: xids
image: nvidia/k8s-device-plugin:1.11
name: nvidia-device-plugin-ctr
allowPrivilegeEscalation: true
drop: ["ALL"]
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: device-plugin
path: /var/lib/kubelet/device-plugins

