Little helper to run CNCF's k3s in Docker
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
k3d/v5.4.6/usage/advanced/cuda/index.html

1698 lines
68 KiB

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="Little helper to run Rancher Lab's k3s in Docker">
<link rel="canonical" href="https://k3d.io/v5.4.6/usage/advanced/cuda/">
<link rel="icon" href="../../../static/img/favicons_black_blue/favicon.png">
<meta name="generator" content="mkdocs-1.3.1, mkdocs-material-8.3.9">
<title>Running CUDA workloads - k3d</title>
<link rel="stylesheet" href="../../../assets/stylesheets/main.1d29e8d0.min.css">
<link rel="stylesheet" href="../../../assets/stylesheets/palette.cbb835fc.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../../static/css/asciinema-player.css">
<link rel="stylesheet" href="../../../static/css/extra.css">
<script>__md_scope=new URL("../../..",location),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="grey">
<script>var palette=__md_get("__palette");if(palette&&"object"==typeof palette.color)for(var key of Object.keys(palette.color))document.body.setAttribute("data-md-color-"+key,palette.color[key])</script>
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#running-cuda-workloads" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<div data-md-component="outdated" hidden>
<aside class="md-banner md-banner--warning">
</aside>
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../../.." title="k3d" class="md-header__button md-logo" aria-label="k3d" data-md-component="logo">
<img src="../../../static/img/k3d_logo_black_green.svg" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
k3d
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Running CUDA workloads
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="grey" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 6H7c-3.31 0-6 2.69-6 6s2.69 6 6 6h10c3.31 0 6-2.69 6-6s-2.69-6-6-6zm0 10H7c-2.21 0-4-1.79-4-4s1.79-4 4-4h10c2.21 0 4 1.79 4 4s-1.79 4-4 4zM7 9c-1.66 0-3 1.34-3 3s1.34 3 3 3 3-1.34 3-3-1.34-3-3-3z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="light-blue" data-md-color-accent="" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 7H7a5 5 0 0 0-5 5 5 5 0 0 0 5 5h10a5 5 0 0 0 5-5 5 5 0 0 0-5-5m0 8a3 3 0 0 1-3-3 3 3 0 0 1 3-3 3 3 0 0 1 3 3 3 3 0 0 1-3 3Z"/></svg>
</label>
</form>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/k3d-io/k3d" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.1.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
k3d-io/k3d
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-tabs__inner md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../../.." class="md-tabs__link">
Overview
</a>
</li>
<li class="md-tabs__item">
<a href="../../configfile/" class="md-tabs__link md-tabs__link--active">
Guides
</a>
</li>
<li class="md-tabs__item">
<a href="../../../design/project/" class="md-tabs__link">
Design
</a>
</li>
<li class="md-tabs__item">
<a href="../../../faq/faq/" class="md-tabs__link">
FAQ
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../../.." title="k3d" class="md-nav__button md-logo" aria-label="k3d" data-md-component="logo">
<img src="../../../static/img/k3d_logo_black_green.svg" alt="logo">
</a>
k3d
</label>
<div class="md-nav__source">
<a href="https://github.com/k3d-io/k3d" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.1.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2022 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"/></svg>
</div>
<div class="md-source__repository">
k3d-io/k3d
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../.." class="md-nav__link">
Overview
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle" data-md-toggle="__nav_2" type="checkbox" id="__nav_2" checked>
<label class="md-nav__link" for="__nav_2">
Guides
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" aria-label="Guides" data-md-level="1">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Guides
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../configfile/" class="md-nav__link">
Using Config Files
</a>
</li>
<li class="md-nav__item">
<a href="../../kubeconfig/" class="md-nav__link">
Handling Kubeconfigs
</a>
</li>
<li class="md-nav__item">
<a href="../../multiserver/" class="md-nav__link">
Creating multi-server clusters
</a>
</li>
<li class="md-nav__item">
<a href="../../registries/" class="md-nav__link">
Using Image Registries
</a>
</li>
<li class="md-nav__item">
<a href="../../exposing_services/" class="md-nav__link">
Exposing Services
</a>
</li>
<li class="md-nav__item">
<a href="../../importing_images/" class="md-nav__link">
Importing modes
</a>
</li>
<li class="md-nav__item">
<a href="../../k3s/" class="md-nav__link">
K3s Features in k3d
</a>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle" data-md-toggle="__nav_2_8" type="checkbox" id="__nav_2_8" checked>
<label class="md-nav__link" for="__nav_2_8">
Advanced Guides
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" aria-label="Advanced Guides" data-md-level="2">
<label class="md-nav__title" for="__nav_2_8">
<span class="md-nav__icon md-icon"></span>
Advanced Guides
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../calico/" class="md-nav__link">
Use Calico instead of Flannel
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
Running CUDA workloads
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
Running CUDA workloads
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#building-a-customized-k3s-image" class="md-nav__link">
Building a customized K3s image
</a>
<nav class="md-nav" aria-label="Building a customized K3s image">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#dockerfile" class="md-nav__link">
Dockerfile
</a>
</li>
<li class="md-nav__item">
<a href="#configure-containerd" class="md-nav__link">
Configure containerd
</a>
</li>
<li class="md-nav__item">
<a href="#the-nvidia-device-plugin" class="md-nav__link">
The NVIDIA device plugin
</a>
</li>
<li class="md-nav__item">
<a href="#build-the-k3s-image" class="md-nav__link">
Build the K3s image
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#run-and-test-the-custom-image-with-k3d" class="md-nav__link">
Run and test the custom image with k3d
</a>
</li>
<li class="md-nav__item">
<a href="#known-issues" class="md-nav__link">
Known issues
</a>
</li>
<li class="md-nav__item">
<a href="#acknowledgements" class="md-nav__link">
Acknowledgements
</a>
</li>
<li class="md-nav__item">
<a href="#authors" class="md-nav__link">
Authors
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../podman/" class="md-nav__link">
Using Podman instead of Docker
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" data-md-toggle="__nav_2_9" type="checkbox" id="__nav_2_9" checked>
<label class="md-nav__link" for="__nav_2_9">
Commands
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" aria-label="Commands" data-md-level="2">
<label class="md-nav__title" for="__nav_2_9">
<span class="md-nav__icon md-icon"></span>
Commands
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../commands/k3d/" class="md-nav__link">
K3d
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster/" class="md-nav__link">
K3d cluster
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster_create/" class="md-nav__link">
K3d cluster create
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster_delete/" class="md-nav__link">
K3d cluster delete
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster_edit/" class="md-nav__link">
K3d cluster edit
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster_list/" class="md-nav__link">
K3d cluster list
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster_start/" class="md-nav__link">
K3d cluster start
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_cluster_stop/" class="md-nav__link">
K3d cluster stop
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_completion/" class="md-nav__link">
K3d completion
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_config/" class="md-nav__link">
K3d config
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_config_init/" class="md-nav__link">
K3d config init
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_config_migrate/" class="md-nav__link">
K3d config migrate
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_image/" class="md-nav__link">
K3d image
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_image_import/" class="md-nav__link">
K3d image import
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_kubeconfig/" class="md-nav__link">
K3d kubeconfig
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_kubeconfig_get/" class="md-nav__link">
K3d kubeconfig get
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_kubeconfig_merge/" class="md-nav__link">
K3d kubeconfig merge
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node/" class="md-nav__link">
K3d node
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node_create/" class="md-nav__link">
K3d node create
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node_delete/" class="md-nav__link">
K3d node delete
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node_edit/" class="md-nav__link">
K3d node edit
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node_list/" class="md-nav__link">
K3d node list
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node_start/" class="md-nav__link">
K3d node start
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_node_stop/" class="md-nav__link">
K3d node stop
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_registry/" class="md-nav__link">
K3d registry
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_registry_create/" class="md-nav__link">
K3d registry create
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_registry_delete/" class="md-nav__link">
K3d registry delete
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_registry_list/" class="md-nav__link">
K3d registry list
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_version/" class="md-nav__link">
K3d version
</a>
</li>
<li class="md-nav__item">
<a href="../../commands/k3d_version_list/" class="md-nav__link">
K3d version list
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" data-md-toggle="__nav_3" type="checkbox" id="__nav_3" checked>
<label class="md-nav__link" for="__nav_3">
Design
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" aria-label="Design" data-md-level="1">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Design
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../design/project/" class="md-nav__link">
Project Overview
</a>
</li>
<li class="md-nav__item">
<a href="../../../design/concepts/" class="md-nav__link">
Concepts
</a>
</li>
<li class="md-nav__item">
<a href="../../../design/defaults/" class="md-nav__link">
Defaults
</a>
</li>
<li class="md-nav__item">
<a href="../../../design/networking/" class="md-nav__link">
Networking
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" data-md-toggle="__nav_4" type="checkbox" id="__nav_4" checked>
<label class="md-nav__link" for="__nav_4">
FAQ
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" aria-label="FAQ" data-md-level="1">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
FAQ
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../../faq/faq/" class="md-nav__link">
FAQ
</a>
</li>
<li class="md-nav__item">
<a href="../../../faq/compatibility/" class="md-nav__link">
Compatibility
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#building-a-customized-k3s-image" class="md-nav__link">
Building a customized K3s image
</a>
<nav class="md-nav" aria-label="Building a customized K3s image">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#dockerfile" class="md-nav__link">
Dockerfile
</a>
</li>
<li class="md-nav__item">
<a href="#configure-containerd" class="md-nav__link">
Configure containerd
</a>
</li>
<li class="md-nav__item">
<a href="#the-nvidia-device-plugin" class="md-nav__link">
The NVIDIA device plugin
</a>
</li>
<li class="md-nav__item">
<a href="#build-the-k3s-image" class="md-nav__link">
Build the K3s image
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#run-and-test-the-custom-image-with-k3d" class="md-nav__link">
Run and test the custom image with k3d
</a>
</li>
<li class="md-nav__item">
<a href="#known-issues" class="md-nav__link">
Known issues
</a>
</li>
<li class="md-nav__item">
<a href="#acknowledgements" class="md-nav__link">
Acknowledgements
</a>
</li>
<li class="md-nav__item">
<a href="#authors" class="md-nav__link">
Authors
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<a href="https://github.com/k3d-io/k3d/edit/main/docs/usage/advanced/cuda.md" title="Edit this page" class="md-content__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20.71 7.04c.39-.39.39-1.04 0-1.41l-2.34-2.34c-.37-.39-1.02-.39-1.41 0l-1.84 1.83 3.75 3.75M3 17.25V21h3.75L17.81 9.93l-3.75-3.75L3 17.25Z"/></svg>
</a>
<h1 id="running-cuda-workloads">Running CUDA workloads<a class="headerlink" href="#running-cuda-workloads" title="Permanent link">&para;</a></h1>
<p>If you want to run CUDA workloads on the K3s container you need to customize the container.<br />
CUDA workloads require the NVIDIA Container Runtime, so containerd needs to be configured to use this runtime.<br />
The K3s container itself also needs to run with this runtime.<br />
If you are using Docker you can install the <a href="https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html">NVIDIA Container Toolkit</a>.</p>
<h2 id="building-a-customized-k3s-image">Building a customized K3s image<a class="headerlink" href="#building-a-customized-k3s-image" title="Permanent link">&para;</a></h2>
<p>To get the NVIDIA container runtime in the K3s image you need to build your own K3s image.<br />
The native K3s image is based on Alpine but the NVIDIA container runtime is not supported on Alpine yet.<br />
To get around this we need to build the image with a supported base image.</p>
<h3 id="dockerfile">Dockerfile<a class="headerlink" href="#dockerfile" title="Permanent link">&para;</a></h3>
<p><a href="cuda/Dockerfile">Dockerfile</a>: </p>
<div class="highlight"><pre><span></span><code><span class="k">ARG</span><span class="w"> </span><span class="nv">K3S_TAG</span><span class="o">=</span><span class="s2">&quot;v1.21.2-k3s1&quot;</span>
<span class="k">FROM</span><span class="w"> </span><span class="s">rancher/k3s:$K3S_TAG</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="s">k3s</span>
<span class="k">FROM</span><span class="w"> </span><span class="s">nvidia/cuda:11.2.0-base-ubuntu18.04</span>
<span class="k">ARG</span><span class="w"> </span>NVIDIA_CONTAINER_RUNTIME_VERSION
<span class="k">ENV</span><span class="w"> </span><span class="nv">NVIDIA_CONTAINER_RUNTIME_VERSION</span><span class="o">=</span><span class="nv">$NVIDIA_CONTAINER_RUNTIME_VERSION</span>
<span class="k">RUN</span><span class="w"> </span><span class="nb">echo</span> <span class="s1">&#39;debconf debconf/frontend select Noninteractive&#39;</span> <span class="p">|</span> debconf-set-selections
<span class="k">RUN</span><span class="w"> </span>apt-get update <span class="o">&amp;&amp;</span> <span class="se">\</span>
apt-get -y install gnupg2 curl
<span class="c"># Install NVIDIA Container Runtime</span>
<span class="k">RUN</span><span class="w"> </span>curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey <span class="p">|</span> apt-key add -
<span class="k">RUN</span><span class="w"> </span>curl -s -L https://nvidia.github.io/nvidia-container-runtime/ubuntu18.04/nvidia-container-runtime.list <span class="p">|</span> tee /etc/apt/sources.list.d/nvidia-container-runtime.list
<span class="k">RUN</span><span class="w"> </span>apt-get update <span class="o">&amp;&amp;</span> <span class="se">\</span>
apt-get -y install nvidia-container-runtime<span class="o">=</span><span class="si">${</span><span class="nv">NVIDIA_CONTAINER_RUNTIME_VERSION</span><span class="si">}</span>
<span class="k">COPY</span><span class="w"> </span>--from<span class="o">=</span>k3s / /
<span class="k">RUN</span><span class="w"> </span>mkdir -p /etc <span class="o">&amp;&amp;</span> <span class="se">\</span>
<span class="nb">echo</span> <span class="s1">&#39;hosts: files dns&#39;</span> &gt; /etc/nsswitch.conf
<span class="k">RUN</span><span class="w"> </span>chmod <span class="m">1777</span> /tmp
<span class="c"># Provide custom containerd configuration to configure the nvidia-container-runtime</span>
<span class="k">RUN</span><span class="w"> </span>mkdir -p /var/lib/rancher/k3s/agent/etc/containerd/
<span class="k">COPY</span><span class="w"> </span>config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl
<span class="c"># Deploy the nvidia driver plugin on startup</span>
<span class="k">RUN</span><span class="w"> </span>mkdir -p /var/lib/rancher/k3s/server/manifests
<span class="k">COPY</span><span class="w"> </span>device-plugin-daemonset.yaml /var/lib/rancher/k3s/server/manifests/nvidia-device-plugin-daemonset.yaml
<span class="k">VOLUME</span><span class="w"> </span><span class="s">/var/lib/kubelet</span>
<span class="k">VOLUME</span><span class="w"> </span><span class="s">/var/lib/rancher/k3s</span>
<span class="k">VOLUME</span><span class="w"> </span><span class="s">/var/lib/cni</span>
<span class="k">VOLUME</span><span class="w"> </span><span class="s">/var/log</span>
<span class="k">ENV</span><span class="w"> </span><span class="nv">PATH</span><span class="o">=</span><span class="s2">&quot;</span><span class="nv">$PATH</span><span class="s2">:/bin/aux&quot;</span>
<span class="k">ENTRYPOINT</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;/bin/k3s&quot;</span><span class="p">]</span>
<span class="k">CMD</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;agent&quot;</span><span class="p">]</span>
</code></pre></div>
<p>This Dockerfile is based on the <a href="https://github.com/rancher/k3s/blob/master/package/Dockerfile">K3s Dockerfile</a>
The following changes are applied:</p>
<ol>
<li>Change the base images to nvidia/cuda:11.2.0-base-ubuntu18.04 so the NVIDIA Container Runtime can be installed. The version of <code>cuda:xx.x.x</code> must match the one you&rsquo;re planning to use.</li>
<li>Add a custom containerd <code>config.toml</code> template to add the NVIDIA Container Runtime. This replaces the default <code>runc</code> runtime</li>
<li>Add a manifest for the NVIDIA driver plugin for Kubernetes</li>
</ol>
<h3 id="configure-containerd">Configure containerd<a class="headerlink" href="#configure-containerd" title="Permanent link">&para;</a></h3>
<p>We need to configure containerd to use the NVIDIA Container Runtime. We need to customize the config.toml that is used at startup. K3s provides a way to do this using a <a href="config.toml.tmpl">config.toml.tmpl</a> file. More information can be found on the <a href="https://rancher.com/docs/k3s/latest/en/advanced/#configuring-containerd">K3s site</a>.</p>
<div class="highlight"><pre><span></span><code><span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">opt</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="nx">path</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ .NodeConfig.Containerd.Opt }}&quot;</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="nx">stream_server_address</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;127.0.0.1&quot;</span><span class="w"></span>
<span class="w"> </span><span class="nx">stream_server_port</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;10010&quot;</span><span class="w"></span>
<span class="p">{{</span><span class="o">-</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">.</span><span class="nx">IsRunningInUserNS</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="nx">disable_cgroup</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="kc">true</span><span class="w"></span>
<span class="w"> </span><span class="nx">disable_apparmor</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="kc">true</span><span class="w"></span>
<span class="w"> </span><span class="nx">restrict_oom_score_adj</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="kc">true</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="o">-</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">.</span><span class="nx">NodeConfig</span><span class="p">.</span><span class="nx">AgentConfig</span><span class="p">.</span><span class="nx">PauseImage</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="nx">sandbox_image</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ .NodeConfig.AgentConfig.PauseImage }}&quot;</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="o">-</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="nx">not</span><span class="w"> </span><span class="p">.</span><span class="nx">NodeConfig</span><span class="p">.</span><span class="nx">NoFlannel</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">.</span><span class="nx">cni</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="nx">bin_dir</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ .NodeConfig.AgentConfig.CNIBinDir }}&quot;</span><span class="w"></span>
<span class="w"> </span><span class="nx">conf_dir</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ .NodeConfig.AgentConfig.CNIConfDir }}&quot;</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">.</span><span class="nx">containerd</span><span class="p">.</span><span class="nx">runtimes</span><span class="p">.</span><span class="nx">runc</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="err">#</span><span class="w"> </span><span class="o">----</span><span class="w"> </span><span class="nx">changed</span><span class="w"> </span><span class="nx">from</span><span class="w"> </span><span class="err">&#39;</span><span class="nx">io</span><span class="p">.</span><span class="nx">containerd</span><span class="p">.</span><span class="nx">runc</span><span class="p">.</span><span class="nx">v2</span><span class="err">&#39;</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="nx">GPU</span><span class="w"> </span><span class="nx">support</span><span class="w"></span>
<span class="w"> </span><span class="nx">runtime_type</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;io.containerd.runtime.v1.linux&quot;</span><span class="w"></span>
<span class="err">#</span><span class="w"> </span><span class="o">----</span><span class="w"> </span><span class="nx">added</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="nx">GPU</span><span class="w"> </span><span class="nx">support</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">linux</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="nx">runtime</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;nvidia-container-runtime&quot;</span><span class="w"></span>
<span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">.</span><span class="nx">PrivateRegistryConfig</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">.</span><span class="nx">PrivateRegistryConfig</span><span class="p">.</span><span class="nx">Mirrors</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">.</span><span class="nx">registry</span><span class="p">.</span><span class="nx">mirrors</span><span class="p">]{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="k">range</span><span class="w"> </span><span class="err">$</span><span class="nx">k</span><span class="p">,</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="w"> </span><span class="o">:=</span><span class="w"> </span><span class="p">.</span><span class="nx">PrivateRegistryConfig</span><span class="p">.</span><span class="nx">Mirrors</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">.</span><span class="nx">registry</span><span class="p">.</span><span class="nx">mirrors</span><span class="p">.</span><span class="s">&quot;{{$k}}&quot;</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="nx">endpoint</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="p">[{{</span><span class="k">range</span><span class="w"> </span><span class="err">$</span><span class="nx">i</span><span class="p">,</span><span class="w"> </span><span class="err">$</span><span class="nx">j</span><span class="w"> </span><span class="o">:=</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">Endpoints</span><span class="p">}}{{</span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">i</span><span class="p">}},</span><span class="w"> </span><span class="p">{{</span><span class="nx">end</span><span class="p">}}{{</span><span class="nx">printf</span><span class="w"> </span><span class="s">&quot;%q&quot;</span><span class="w"> </span><span class="p">.}}{{</span><span class="nx">end</span><span class="p">}}]</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="k">range</span><span class="w"> </span><span class="err">$</span><span class="nx">k</span><span class="p">,</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="w"> </span><span class="o">:=</span><span class="w"> </span><span class="p">.</span><span class="nx">PrivateRegistryConfig</span><span class="p">.</span><span class="nx">Configs</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">Auth</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">.</span><span class="nx">registry</span><span class="p">.</span><span class="nx">configs</span><span class="p">.</span><span class="s">&quot;{{$k}}&quot;</span><span class="p">.</span><span class="nx">auth</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">Auth</span><span class="p">.</span><span class="nx">Username</span><span class="w"> </span><span class="p">}}</span><span class="nx">username</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.Auth.Username }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">Auth</span><span class="p">.</span><span class="nx">Password</span><span class="w"> </span><span class="p">}}</span><span class="nx">password</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.Auth.Password }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">Auth</span><span class="p">.</span><span class="nx">Auth</span><span class="w"> </span><span class="p">}}</span><span class="nx">auth</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.Auth.Auth }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">Auth</span><span class="p">.</span><span class="nx">IdentityToken</span><span class="w"> </span><span class="p">}}</span><span class="nx">identitytoken</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.Auth.IdentityToken }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">TLS</span><span class="w"> </span><span class="p">}}</span><span class="w"></span>
<span class="p">[</span><span class="nx">plugins</span><span class="p">.</span><span class="nx">cri</span><span class="p">.</span><span class="nx">registry</span><span class="p">.</span><span class="nx">configs</span><span class="p">.</span><span class="s">&quot;{{$k}}&quot;</span><span class="p">.</span><span class="nx">tls</span><span class="p">]</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">TLS</span><span class="p">.</span><span class="nx">CAFile</span><span class="w"> </span><span class="p">}}</span><span class="nx">ca_file</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.TLS.CAFile }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">TLS</span><span class="p">.</span><span class="nx">CertFile</span><span class="w"> </span><span class="p">}}</span><span class="nx">cert_file</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.TLS.CertFile }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="w"> </span><span class="p">{{</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="err">$</span><span class="nx">v</span><span class="p">.</span><span class="nx">TLS</span><span class="p">.</span><span class="nx">KeyFile</span><span class="w"> </span><span class="p">}}</span><span class="nx">key_file</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><span class="s">&quot;{{ $v.TLS.KeyFile }}&quot;</span><span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
<span class="p">{{</span><span class="nx">end</span><span class="p">}}</span><span class="w"></span>
</code></pre></div>
<h3 id="the-nvidia-device-plugin">The NVIDIA device plugin<a class="headerlink" href="#the-nvidia-device-plugin" title="Permanent link">&para;</a></h3>
<p>To enable NVIDIA GPU support on Kubernetes you also need to install the <a href="https://github.com/NVIDIA/k8s-device-plugin">NVIDIA device plugin</a>. The device plugin is a deamonset and allows you to automatically:</p>
<ul>
<li>Expose the number of GPUs on each nodes of your cluster</li>
<li>Keep track of the health of your GPUs</li>
<li>Run GPU enabled containers in your Kubernetes cluster.</li>
</ul>
<div class="highlight"><pre><span></span><code><span class="nt">apiVersion</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">apps/v1</span><span class="w"></span>
<span class="nt">kind</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">DaemonSet</span><span class="w"></span>
<span class="nt">metadata</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">nvidia-device-plugin-daemonset</span><span class="w"></span>
<span class="w"> </span><span class="nt">namespace</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">kube-system</span><span class="w"></span>
<span class="nt">spec</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">selector</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">matchLabels</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">nvidia-device-plugin-ds</span><span class="w"></span>
<span class="w"> </span><span class="nt">template</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">metadata</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="c1"># Mark this pod as a critical add-on; when enabled, the critical add-on scheduler</span><span class="w"></span>
<span class="w"> </span><span class="c1"># reserves resources for critical add-on pods so that they can be rescheduled after</span><span class="w"></span>
<span class="w"> </span><span class="c1"># a failure. This annotation works in tandem with the toleration below.</span><span class="w"></span>
<span class="w"> </span><span class="nt">annotations</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">scheduler.alpha.kubernetes.io/critical-pod</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;&quot;</span><span class="w"></span>
<span class="w"> </span><span class="nt">labels</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">nvidia-device-plugin-ds</span><span class="w"></span>
<span class="w"> </span><span class="nt">spec</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">tolerations</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="c1"># Allow this pod to be rescheduled while the node is in &quot;critical add-ons only&quot; mode.</span><span class="w"></span>
<span class="w"> </span><span class="c1"># This, along with the annotation above marks this pod as a critical add-on.</span><span class="w"></span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">CriticalAddonsOnly</span><span class="w"></span>
<span class="w"> </span><span class="nt">operator</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">Exists</span><span class="w"></span>
<span class="w"> </span><span class="nt">containers</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">env</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">DP_DISABLE_HEALTHCHECKS</span><span class="w"></span>
<span class="w"> </span><span class="nt">value</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">xids</span><span class="w"></span>
<span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">nvidia/k8s-device-plugin:1.11</span><span class="w"></span>
<span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">nvidia-device-plugin-ctr</span><span class="w"></span>
<span class="w"> </span><span class="nt">securityContext</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">allowPrivilegeEscalation</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"></span>
<span class="w"> </span><span class="nt">capabilities</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">drop</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">&quot;ALL&quot;</span><span class="p p-Indicator">]</span><span class="w"></span>
<span class="w"> </span><span class="nt">volumeMounts</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device-plugin</span><span class="w"></span>
<span class="w"> </span><span class="nt">mountPath</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/var/lib/kubelet/device-plugins</span><span class="w"></span>
<span class="w"> </span><span class="nt">volumes</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="nt">name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">device-plugin</span><span class="w"></span>
<span class="w"> </span><span class="nt">hostPath</span><span class="p">:</span><span class="w"></span>
<span class="w"> </span><span class="nt">path</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">/var/lib/kubelet/device-plugins</span><span class="w"></span>
</code></pre></div>
<h3 id="build-the-k3s-image">Build the K3s image<a class="headerlink" href="#build-the-k3s-image" title="Permanent link">&para;</a></h3>
<p>To build the custom image we need to build K3s because we need the generated output.</p>
<p>Put the following files in a directory:</p>
<ul>
<li><a href="cuda/Dockerfile">Dockerfile</a></li>
<li><a href="config.toml.tmpl">config.toml.tmpl</a></li>
<li><a href="device-plugin-daemonset.yaml">device-plugin-daemonset.yaml</a></li>
<li><a href="build.sh">build.sh</a></li>
<li><a href="cuda-vector-add.yaml">cuda-vector-add.yaml</a></li>
</ul>
<p>The <code>build.sh</code> script is configured using exports &amp; defaults to <code>v1.21.2+k3s1</code>. Please set at least the <code>IMAGE_REGISTRY</code> variable! The script performs the following steps builds the custom K3s image including the nvidia drivers.</p>
<p><a href="build.sh">build.sh</a>:</p>
<div class="highlight"><pre><span></span><code><span class="ch">#!/bin/bash</span>
<span class="nb">set</span> -euxo pipefail
<span class="nv">K3S_TAG</span><span class="o">=</span><span class="si">${</span><span class="nv">K3S_TAG</span><span class="p">:=</span><span class="s2">&quot;v1.21.2-k3s1&quot;</span><span class="si">}</span> <span class="c1"># replace + with -, if needed</span>
<span class="nv">IMAGE_REGISTRY</span><span class="o">=</span><span class="si">${</span><span class="nv">IMAGE_REGISTRY</span><span class="p">:=</span><span class="s2">&quot;MY_REGISTRY&quot;</span><span class="si">}</span>
<span class="nv">IMAGE_REPOSITORY</span><span class="o">=</span><span class="si">${</span><span class="nv">IMAGE_REPOSITORY</span><span class="p">:=</span><span class="s2">&quot;rancher/k3s&quot;</span><span class="si">}</span>
<span class="nv">IMAGE_TAG</span><span class="o">=</span><span class="s2">&quot;</span><span class="nv">$K3S_TAG</span><span class="s2">-cuda&quot;</span>
<span class="nv">IMAGE</span><span class="o">=</span><span class="si">${</span><span class="nv">IMAGE</span><span class="p">:=</span><span class="s2">&quot;</span><span class="nv">$IMAGE_REGISTRY</span><span class="s2">/</span><span class="nv">$IMAGE_REPOSITORY</span><span class="s2">:</span><span class="nv">$IMAGE_TAG</span><span class="s2">&quot;</span><span class="si">}</span>
<span class="nv">NVIDIA_CONTAINER_RUNTIME_VERSION</span><span class="o">=</span><span class="si">${</span><span class="nv">NVIDIA_CONTAINER_RUNTIME_VERSION</span><span class="p">:=</span><span class="s2">&quot;3.5.0-1&quot;</span><span class="si">}</span>
<span class="nb">echo</span> <span class="s2">&quot;IMAGE=</span><span class="nv">$IMAGE</span><span class="s2">&quot;</span>
<span class="c1"># due to some unknown reason, copying symlinks fails with buildkit enabled</span>
<span class="nv">DOCKER_BUILDKIT</span><span class="o">=</span><span class="m">0</span> docker build <span class="se">\</span>
--build-arg <span class="nv">K3S_TAG</span><span class="o">=</span><span class="nv">$K3S_TAG</span> <span class="se">\</span>
--build-arg <span class="nv">NVIDIA_CONTAINER_RUNTIME_VERSION</span><span class="o">=</span><span class="nv">$NVIDIA_CONTAINER_RUNTIME_VERSION</span> <span class="se">\</span>
-t <span class="nv">$IMAGE</span> .
docker push <span class="nv">$IMAGE</span>
<span class="nb">echo</span> <span class="s2">&quot;Done!&quot;</span>
</code></pre></div>
<h2 id="run-and-test-the-custom-image-with-k3d">Run and test the custom image with k3d<a class="headerlink" href="#run-and-test-the-custom-image-with-k3d" title="Permanent link">&para;</a></h2>
<p>You can use the image with k3d:</p>
<div class="highlight"><pre><span></span><code>k3d cluster create gputest --image<span class="o">=</span><span class="nv">$IMAGE</span> --gpus<span class="o">=</span><span class="m">1</span>
</code></pre></div>
<p>Deploy a <a href="cuda-vector-add.yaml">test pod</a>:</p>
<div class="highlight"><pre><span></span><code>kubectl apply -f cuda-vector-add.yaml
kubectl logs cuda-vector-add
</code></pre></div>
<p>This should output something like the following:</p>
<div class="highlight"><pre><span></span><code>$ kubectl logs cuda-vector-add
<span class="o">[</span>Vector addition of <span class="m">50000</span> elements<span class="o">]</span>
Copy input data from the host memory to the CUDA device
CUDA kernel launch with <span class="m">196</span> blocks of <span class="m">256</span> threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done
</code></pre></div>
<p>If the <code>cuda-vector-add</code> pod is stuck in <code>Pending</code> state, probably the device-driver daemonset didn&rsquo;t get deployed correctly from the auto-deploy manifests. In that case, you can apply it manually via <code class="highlight">kubectl apply -f device-plugin-daemonset.yaml</code>.</p>
<h2 id="known-issues">Known issues<a class="headerlink" href="#known-issues" title="Permanent link">&para;</a></h2>
<ul>
<li>This approach does not work on WSL2 yet. The NVIDIA driver plugin and container runtime rely on the NVIDIA Management Library (NVML) which is not yet supported. See the <a href="https://docs.nvidia.com/cuda/wsl-user-guide/index.html#known-limitations">CUDA on WSL User Guide</a>.</li>
</ul>
<h2 id="acknowledgements">Acknowledgements<a class="headerlink" href="#acknowledgements" title="Permanent link">&para;</a></h2>
<p>Most of the information in this article was obtained from various sources:</p>
<ul>
<li><a href="https://dev.to/mweibel/add-nvidia-gpu-support-to-k3s-with-containerd-4j17">Add NVIDIA GPU support to k3s with containerd</a></li>
<li><a href="https://github.com/ubuntu/microk8s">microk8s</a></li>
<li><a href="https://github.com/rancher/k3s">K3s</a></li>
<li><a href="https://gitlab.com/vainkop1/k3s-gpu">k3s-gpu</a></li>
</ul>
<h2 id="authors">Authors<a class="headerlink" href="#authors" title="Permanent link">&para;</a></h2>
<ul>
<li><a href="https://github.com/markrexwinkel">@markrexwinkel</a></li>
<li><a href="https://github.com/vainkop">@vainkop</a></li>
<li><a href="https://github.com/iwilltry42">@iwilltry42</a></li>
</ul>
<hr>
<div class="md-source-file">
<small>
Last update:
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 17, 2021</span>
</small>
</div>
</article>
</div>
</div>
<a href="#" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12Z"/></svg>
Back to top
</a>
</main>
<footer class="md-footer">
<nav class="md-footer__inner md-grid" aria-label="Footer" >
<a href="../calico/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Use Calico instead of Flannel" rel="prev">
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
</div>
<div class="md-footer__title">
<div class="md-ellipsis">
<span class="md-footer__direction">
Previous
</span>
Use Calico instead of Flannel
</div>
</div>
</a>
<a href="../podman/" class="md-footer__link md-footer__link--next" aria-label="Next: Using Podman instead of Docker" rel="next">
<div class="md-footer__title">
<div class="md-ellipsis">
<span class="md-footer__direction">
Next
</span>
Using Podman instead of Docker
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4Z"/></svg>
</div>
</a>
</nav>
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
<div class="md-copyright__highlight">
Copyright &copy; 2020-2022 k3d Authors
</div>
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"base": "../../..", "features": ["navigation.top", "search.suggest", "search.highlight", "navigation.expand", "navigation.tabs"], "search": "../../../assets/javascripts/workers/search.b97dbffb.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.config.lang": "en", "search.config.pipeline": "trimmer, stopWordFilter", "search.config.separator": "[\\s\\-]+", "search.placeholder": "Search", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version.title": "Select version"}, "version": {"provider": "mike"}}</script>
<script src="../../../assets/javascripts/bundle.6c7ad80a.min.js"></script>
<script src="../../../static/js/asciinema-player.js"></script>
</body>
</html>