loadImages: accept mixed runtimeImage and tarball args

- loads images from tarballs and runtime
- only tries to do something if respective lists are not empty
- only creates tools node if it's not present yet
- uses new tools image to remove tarballs after process
pull/266/head
iwilltry42 4 years ago
parent 933ac38059
commit dbb24f4e1c
No known key found for this signature in database
GPG Key ID: 7BA57AD1CFF16110
  1. 11
      pkg/runtimes/containerd/util.go
  2. 1
      pkg/runtimes/docker/node.go
  3. 51
      pkg/runtimes/docker/util.go
  4. 1
      pkg/runtimes/runtime.go
  5. 58
      pkg/tools/tools.go
  6. 3
      pkg/types/types.go
  7. 51
      vendor/github.com/Microsoft/hcsshim/osversion/osversion.go
  8. 10
      vendor/github.com/Microsoft/hcsshim/osversion/windowsbuilds.go
  9. 101
      vendor/github.com/containerd/continuity/pathdriver/path_driver.go
  10. 1
      vendor/github.com/docker/docker/pkg/archive/README.md
  11. 1284
      vendor/github.com/docker/docker/pkg/archive/archive.go
  12. 261
      vendor/github.com/docker/docker/pkg/archive/archive_linux.go
  13. 7
      vendor/github.com/docker/docker/pkg/archive/archive_other.go
  14. 115
      vendor/github.com/docker/docker/pkg/archive/archive_unix.go
  15. 67
      vendor/github.com/docker/docker/pkg/archive/archive_windows.go
  16. 445
      vendor/github.com/docker/docker/pkg/archive/changes.go
  17. 286
      vendor/github.com/docker/docker/pkg/archive/changes_linux.go
  18. 97
      vendor/github.com/docker/docker/pkg/archive/changes_other.go
  19. 43
      vendor/github.com/docker/docker/pkg/archive/changes_unix.go
  20. 34
      vendor/github.com/docker/docker/pkg/archive/changes_windows.go
  21. 480
      vendor/github.com/docker/docker/pkg/archive/copy.go
  22. 11
      vendor/github.com/docker/docker/pkg/archive/copy_unix.go
  23. 9
      vendor/github.com/docker/docker/pkg/archive/copy_windows.go
  24. 260
      vendor/github.com/docker/docker/pkg/archive/diff.go
  25. 16
      vendor/github.com/docker/docker/pkg/archive/time_linux.go
  26. 16
      vendor/github.com/docker/docker/pkg/archive/time_unsupported.go
  27. 23
      vendor/github.com/docker/docker/pkg/archive/whiteouts.go
  28. 59
      vendor/github.com/docker/docker/pkg/archive/wrap.go
  29. 298
      vendor/github.com/docker/docker/pkg/fileutils/fileutils.go
  30. 27
      vendor/github.com/docker/docker/pkg/fileutils/fileutils_darwin.go
  31. 22
      vendor/github.com/docker/docker/pkg/fileutils/fileutils_unix.go
  32. 7
      vendor/github.com/docker/docker/pkg/fileutils/fileutils_windows.go
  33. 264
      vendor/github.com/docker/docker/pkg/idtools/idtools.go
  34. 231
      vendor/github.com/docker/docker/pkg/idtools/idtools_unix.go
  35. 25
      vendor/github.com/docker/docker/pkg/idtools/idtools_windows.go
  36. 164
      vendor/github.com/docker/docker/pkg/idtools/usergroupadd_linux.go
  37. 12
      vendor/github.com/docker/docker/pkg/idtools/usergroupadd_unsupported.go
  38. 32
      vendor/github.com/docker/docker/pkg/idtools/utils_unix.go
  39. 51
      vendor/github.com/docker/docker/pkg/ioutils/buffer.go
  40. 187
      vendor/github.com/docker/docker/pkg/ioutils/bytespipe.go
  41. 162
      vendor/github.com/docker/docker/pkg/ioutils/fswriters.go
  42. 157
      vendor/github.com/docker/docker/pkg/ioutils/readers.go
  43. 10
      vendor/github.com/docker/docker/pkg/ioutils/temp_unix.go
  44. 16
      vendor/github.com/docker/docker/pkg/ioutils/temp_windows.go
  45. 92
      vendor/github.com/docker/docker/pkg/ioutils/writeflusher.go
  46. 66
      vendor/github.com/docker/docker/pkg/ioutils/writers.go
  47. 26
      vendor/github.com/docker/docker/pkg/longpath/longpath.go
  48. 137
      vendor/github.com/docker/docker/pkg/mount/flags.go
  49. 49
      vendor/github.com/docker/docker/pkg/mount/flags_freebsd.go
  50. 87
      vendor/github.com/docker/docker/pkg/mount/flags_linux.go
  51. 31
      vendor/github.com/docker/docker/pkg/mount/flags_unsupported.go
  52. 159
      vendor/github.com/docker/docker/pkg/mount/mount.go
  53. 59
      vendor/github.com/docker/docker/pkg/mount/mounter_freebsd.go
  54. 73
      vendor/github.com/docker/docker/pkg/mount/mounter_linux.go
  55. 7
      vendor/github.com/docker/docker/pkg/mount/mounter_unsupported.go
  56. 40
      vendor/github.com/docker/docker/pkg/mount/mountinfo.go
  57. 54
      vendor/github.com/docker/docker/pkg/mount/mountinfo_freebsd.go
  58. 144
      vendor/github.com/docker/docker/pkg/mount/mountinfo_linux.go
  59. 12
      vendor/github.com/docker/docker/pkg/mount/mountinfo_unsupported.go
  60. 6
      vendor/github.com/docker/docker/pkg/mount/mountinfo_windows.go
  61. 71
      vendor/github.com/docker/docker/pkg/mount/sharedsubtree_linux.go
  62. 22
      vendor/github.com/docker/docker/pkg/mount/unmount_unix.go
  63. 7
      vendor/github.com/docker/docker/pkg/mount/unmount_unsupported.go
  64. 137
      vendor/github.com/docker/docker/pkg/pools/pools.go
  65. 16
      vendor/github.com/docker/docker/pkg/system/args_windows.go
  66. 31
      vendor/github.com/docker/docker/pkg/system/chtimes.go
  67. 14
      vendor/github.com/docker/docker/pkg/system/chtimes_unix.go
  68. 26
      vendor/github.com/docker/docker/pkg/system/chtimes_windows.go
  69. 13
      vendor/github.com/docker/docker/pkg/system/errors.go
  70. 19
      vendor/github.com/docker/docker/pkg/system/exitcode.go
  71. 67
      vendor/github.com/docker/docker/pkg/system/filesys_unix.go
  72. 295
      vendor/github.com/docker/docker/pkg/system/filesys_windows.go
  73. 22
      vendor/github.com/docker/docker/pkg/system/init.go
  74. 12
      vendor/github.com/docker/docker/pkg/system/init_unix.go
  75. 41
      vendor/github.com/docker/docker/pkg/system/init_windows.go
  76. 32
      vendor/github.com/docker/docker/pkg/system/lcow.go
  77. 8
      vendor/github.com/docker/docker/pkg/system/lcow_unix.go
  78. 6
      vendor/github.com/docker/docker/pkg/system/lcow_windows.go
  79. 20
      vendor/github.com/docker/docker/pkg/system/lstat_unix.go
  80. 14
      vendor/github.com/docker/docker/pkg/system/lstat_windows.go
  81. 17
      vendor/github.com/docker/docker/pkg/system/meminfo.go
  82. 71
      vendor/github.com/docker/docker/pkg/system/meminfo_linux.go
  83. 8
      vendor/github.com/docker/docker/pkg/system/meminfo_unsupported.go
  84. 45
      vendor/github.com/docker/docker/pkg/system/meminfo_windows.go
  85. 22
      vendor/github.com/docker/docker/pkg/system/mknod.go
  86. 11
      vendor/github.com/docker/docker/pkg/system/mknod_windows.go
  87. 60
      vendor/github.com/docker/docker/pkg/system/path.go
  88. 10
      vendor/github.com/docker/docker/pkg/system/path_unix.go
  89. 24
      vendor/github.com/docker/docker/pkg/system/path_windows.go
  90. 24
      vendor/github.com/docker/docker/pkg/system/process_unix.go
  91. 18
      vendor/github.com/docker/docker/pkg/system/process_windows.go
  92. 80
      vendor/github.com/docker/docker/pkg/system/rm.go
  93. 13
      vendor/github.com/docker/docker/pkg/system/stat_darwin.go
  94. 13
      vendor/github.com/docker/docker/pkg/system/stat_freebsd.go
  95. 20
      vendor/github.com/docker/docker/pkg/system/stat_linux.go
  96. 13
      vendor/github.com/docker/docker/pkg/system/stat_openbsd.go
  97. 66
      vendor/github.com/docker/docker/pkg/system/stat_unix.go
  98. 49
      vendor/github.com/docker/docker/pkg/system/stat_windows.go
  99. 17
      vendor/github.com/docker/docker/pkg/system/syscall_unix.go
  100. 176
      vendor/github.com/docker/docker/pkg/system/syscall_windows.go
  101. Some files were not shown because too many files have changed in this diff Show More

@ -20,3 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
package containerd
import (
"context"
k3d "github.com/rancher/k3d/pkg/types"
)
// CopyToNode copies a file from the local FS to the selected node
func (d Containerd) CopyToNode(ctx context.Context, src string, dest string, node *k3d.Node) error {
return nil
}

@ -194,7 +194,6 @@ func getContainerDetails(ctx context.Context, containerID string) (types.Contain
func (d Docker) GetNode(ctx context.Context, node *k3d.Node) (*k3d.Node, error) {
container, err := getNodeContainer(ctx, node)
if err != nil {
log.Errorf("Failed to get container for node '%s'", node.Name)
return node, err
}

@ -22,10 +22,15 @@ THE SOFTWARE.
package docker
import (
"context"
"fmt"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/docker/docker/pkg/archive"
k3d "github.com/rancher/k3d/pkg/types"
log "github.com/sirupsen/logrus"
)
// GetDefaultObjectLabelsFilter returns docker type filters created from k3d labels
@ -37,3 +42,49 @@ func GetDefaultObjectLabelsFilter(clusterName string) filters.Args {
filters.Add("label", fmt.Sprintf("k3d.cluster=%s", clusterName))
return filters
}
// CopyToNode copies a file from the local FS to the selected node
func (d Docker) CopyToNode(ctx context.Context, src string, dest string, node *k3d.Node) error {
// create docker client
docker, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
if err != nil {
log.Errorln("Failed to create docker client")
return err
}
defer docker.Close()
container, err := getNodeContainer(ctx, node)
if err != nil {
log.Errorln("Failed to find container for target node '%s'", node.Name)
return err
}
// source: docker/cli/cli/command/container/cp
srcInfo, err := archive.CopyInfoSourcePath(src, false)
if err != nil {
log.Errorln("Failed to copy info source path")
return err
}
srcArchive, err := archive.TarResource(srcInfo)
if err != nil {
log.Errorln("Failed to create tar resource")
return err
}
defer srcArchive.Close()
destInfo := archive.CopyInfo{Path: dest}
destStat, _ := docker.ContainerStatPath(ctx, container.ID, dest) // don't blame me, docker is also not doing anything if err != nil ¯\_(ツ)_/¯
destInfo.Exists, destInfo.IsDir = true, destStat.Mode.IsDir()
destDir, preparedArchive, err := archive.PrepareArchiveCopy(srcArchive, srcInfo, destInfo)
if err != nil {
log.Errorln("Failed to prepare archive")
return err
}
defer preparedArchive.Close()
return docker.CopyToContainer(ctx, container.ID, destDir, preparedArchive, types.CopyToContainerOptions{AllowOverwriteDirWithFile: false})
}

@ -59,6 +59,7 @@ type Runtime interface {
ExecInNode(context.Context, *k3d.Node, []string) error
GetNodeLogs(context.Context, *k3d.Node, time.Time) (io.ReadCloser, error)
GetImages(context.Context) ([]string, error)
CopyToNode(context.Context, string, string, *k3d.Node) error
}
// GetRuntime checks, if a given name is represented by an implemented k3d runtime and returns it

@ -25,6 +25,7 @@ import (
"context"
"fmt"
"os"
"path"
"strings"
"sync"
"time"
@ -98,18 +99,22 @@ func LoadImagesIntoCluster(ctx context.Context, runtime runtimes.Runtime, images
log.Debugf("Attaching to cluster's image volume '%s'", imageVolume)
// create tools node to export images
log.Infoln("Starting k3d-tools node...")
toolsNode, err := startToolsNode( // TODO: re-use existing container
ctx,
runtime,
cluster,
cluster.Network.Name,
[]string{
fmt.Sprintf("%s:%s", imageVolume, k3d.DefaultImageVolumeMountPath),
fmt.Sprintf("%s:%s", runtime.GetRuntimePath(), runtime.GetRuntimePath()),
})
if err != nil {
log.Errorf("Failed to start tools container for cluster '%s'", cluster.Name)
var toolsNode *k3d.Node
toolsNode, err = runtime.GetNode(ctx, &k3d.Node{Name: fmt.Sprintf("%s-%s-tools", k3d.DefaultObjectNamePrefix, cluster.Name)})
if err != nil || toolsNode == nil {
log.Infoln("Starting k3d-tools node...")
toolsNode, err = startToolsNode( // TODO: re-use existing container
ctx,
runtime,
cluster,
cluster.Network.Name,
[]string{
fmt.Sprintf("%s:%s", imageVolume, k3d.DefaultImageVolumeMountPath),
fmt.Sprintf("%s:%s", runtime.GetRuntimePath(), runtime.GetRuntimePath()),
})
if err != nil {
log.Errorf("Failed to start tools container for cluster '%s'", cluster.Name)
}
}
/* TODO:
@ -133,6 +138,19 @@ func LoadImagesIntoCluster(ctx context.Context, runtime runtimes.Runtime, images
importTarNames = append(importTarNames, tarName)
}
if len(imagesFromTar) > 0 {
// copy tarfiles to shared volume
log.Infof("Saving %d tarball(s) to shared image volume...", len(imagesFromTar))
for _, file := range imagesFromTar {
tarName := fmt.Sprintf("%s/k3d-%s-images-%s-file-%s", k3d.DefaultImageVolumeMountPath, cluster.Name, time.Now().Format("20060102150405"), path.Base(file))
if err := runtime.CopyToNode(ctx, file, tarName, toolsNode); err != nil {
log.Errorf("Failed to copy image tar '%s' to tools node! Error below:\n%+v", file, err)
continue
}
importTarNames = append(importTarNames, tarName)
}
}
// import image in each node
log.Infoln("Importing images into nodes...")
var importWaitgroup sync.WaitGroup
@ -141,23 +159,23 @@ func LoadImagesIntoCluster(ctx context.Context, runtime runtimes.Runtime, images
// only import image in master and worker nodes (i.e. ignoring auxiliary nodes like the master loadbalancer)
if node.Role == k3d.MasterRole || node.Role == k3d.WorkerRole {
importWaitgroup.Add(1)
go func(node *k3d.Node, wg *sync.WaitGroup) {
log.Infof("Importing images into node '%s'...", node.Name)
if err := runtime.ExecInNode(ctx, node, []string{"ctr", "image", "import", tarName}); err != nil {
go func(node *k3d.Node, wg *sync.WaitGroup, tarPath string) {
log.Infof("Importing images from tarball '%s' into node '%s'...", tarPath, node.Name)
if err := runtime.ExecInNode(ctx, node, []string{"ctr", "image", "import", tarPath}); err != nil {
log.Errorf("Failed to import images in node '%s'", node.Name)
log.Errorln(err)
}
wg.Done()
}(node, &importWaitgroup)
}(node, &importWaitgroup, tarName)
}
}
}
importWaitgroup.Wait()
// remove tarball
if !loadImageOpts.KeepTar {
log.Infoln("Removing the tarball...")
if err := runtime.ExecInNode(ctx, cluster.Nodes[0], []string{"rm", "-f", strings.Join(importTarNames, " ")}); err != nil { // TODO: do this in tools node (requires rm)
if !loadImageOpts.KeepTar && len(importTarNames) > 0 {
log.Infoln("Removing the tarball(s) from image volume...")
if err := runtime.ExecInNode(ctx, toolsNode, []string{"rm", "-f", strings.Join(importTarNames, " ")}); err != nil {
log.Errorf("Failed to delete one or more tarballs from '%+v'", importTarNames)
log.Errorln(err)
}
@ -185,7 +203,9 @@ func startToolsNode(ctx context.Context, runtime runtimes.Runtime, cluster *k3d.
Network: network,
Cmd: []string{},
Args: []string{"noop"},
Labels: k3d.DefaultObjectLabels,
}
node.Labels["k3d.cluster"] = cluster.Name
if err := runtime.CreateNode(ctx, node); err != nil {
log.Errorf("Failed to create tools container for cluster '%s'", cluster.Name)
return node, err

@ -100,7 +100,7 @@ var DefaultNodeEnv = []string{
}
// DefaultToolsContainerImage defines the default image used for the tools container
const DefaultToolsContainerImage = "docker.io/iwilltry42/k3d-tools:v0.0.2" // TODO: get version dynamically or at build time
const DefaultToolsContainerImage = "docker.io/iwilltry42/k3d-tools:v0.0.3" // TODO: get version dynamically or at build time
// DefaultImageVolumeMountPath defines the mount path inside k3d nodes where we will mount the shared image volume by default
const DefaultImageVolumeMountPath = "/k3d/images"
@ -185,6 +185,7 @@ func (c *Cluster) MasterCount() int {
}
return masterCount
}
// WorkerCount return number of worker node into cluster
func (c *Cluster) WorkerCount() int {
workerCount := 0

@ -0,0 +1,51 @@
package osversion
import (
"fmt"
"golang.org/x/sys/windows"
)
// OSVersion is a wrapper for Windows version information
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms724439(v=vs.85).aspx
type OSVersion struct {
Version uint32
MajorVersion uint8
MinorVersion uint8
Build uint16
}
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms724833(v=vs.85).aspx
type osVersionInfoEx struct {
OSVersionInfoSize uint32
MajorVersion uint32
MinorVersion uint32
BuildNumber uint32
PlatformID uint32
CSDVersion [128]uint16
ServicePackMajor uint16
ServicePackMinor uint16
SuiteMask uint16
ProductType byte
Reserve byte
}
// Get gets the operating system version on Windows.
// The calling application must be manifested to get the correct version information.
func Get() OSVersion {
var err error
osv := OSVersion{}
osv.Version, err = windows.GetVersion()
if err != nil {
// GetVersion never fails.
panic(err)
}
osv.MajorVersion = uint8(osv.Version & 0xFF)
osv.MinorVersion = uint8(osv.Version >> 8 & 0xFF)
osv.Build = uint16(osv.Version >> 16)
return osv
}
func (osv OSVersion) ToString() string {
return fmt.Sprintf("%d.%d.%d", osv.MajorVersion, osv.MinorVersion, osv.Build)
}

@ -0,0 +1,10 @@
package osversion
const (
// RS2 was a client-only release in case you're asking why it's not in the list.
RS1 = 14393
RS3 = 16299
RS4 = 17134
RS5 = 17763
)

@ -0,0 +1,101 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package pathdriver
import (
"path/filepath"
)
// PathDriver provides all of the path manipulation functions in a common
// interface. The context should call these and never use the `filepath`
// package or any other package to manipulate paths.
type PathDriver interface {
Join(paths ...string) string
IsAbs(path string) bool
Rel(base, target string) (string, error)
Base(path string) string
Dir(path string) string
Clean(path string) string
Split(path string) (dir, file string)
Separator() byte
Abs(path string) (string, error)
Walk(string, filepath.WalkFunc) error
FromSlash(path string) string
ToSlash(path string) string
Match(pattern, name string) (matched bool, err error)
}
// pathDriver is a simple default implementation calls the filepath package.
type pathDriver struct{}
// LocalPathDriver is the exported pathDriver struct for convenience.
var LocalPathDriver PathDriver = &pathDriver{}
func (*pathDriver) Join(paths ...string) string {
return filepath.Join(paths...)
}
func (*pathDriver) IsAbs(path string) bool {
return filepath.IsAbs(path)
}
func (*pathDriver) Rel(base, target string) (string, error) {
return filepath.Rel(base, target)
}
func (*pathDriver) Base(path string) string {
return filepath.Base(path)
}
func (*pathDriver) Dir(path string) string {
return filepath.Dir(path)
}
func (*pathDriver) Clean(path string) string {
return filepath.Clean(path)
}
func (*pathDriver) Split(path string) (dir, file string) {
return filepath.Split(path)
}
func (*pathDriver) Separator() byte {
return filepath.Separator
}
func (*pathDriver) Abs(path string) (string, error) {
return filepath.Abs(path)
}
// Note that filepath.Walk calls os.Stat, so if the context wants to
// to call Driver.Stat() for Walk, they need to create a new struct that
// overrides this method.
func (*pathDriver) Walk(root string, walkFn filepath.WalkFunc) error {
return filepath.Walk(root, walkFn)
}
func (*pathDriver) FromSlash(path string) string {
return filepath.FromSlash(path)
}
func (*pathDriver) ToSlash(path string) string {
return filepath.ToSlash(path)
}
func (*pathDriver) Match(pattern, name string) (bool, error) {
return filepath.Match(pattern, name)
}

@ -0,0 +1 @@
This code provides helper functions for dealing with archive files.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,261 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containerd/continuity/fs"
"github.com/docker/docker/pkg/system"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter {
if format == OverlayWhiteoutFormat {
return overlayWhiteoutConverter{inUserNS: inUserNS}
}
return nil
}
type overlayWhiteoutConverter struct {
inUserNS bool
}
func (overlayWhiteoutConverter) ConvertWrite(hdr *tar.Header, path string, fi os.FileInfo) (wo *tar.Header, err error) {
// convert whiteouts to AUFS format
if fi.Mode()&os.ModeCharDevice != 0 && hdr.Devmajor == 0 && hdr.Devminor == 0 {
// we just rename the file and make it normal
dir, filename := filepath.Split(hdr.Name)
hdr.Name = filepath.Join(dir, WhiteoutPrefix+filename)
hdr.Mode = 0600
hdr.Typeflag = tar.TypeReg
hdr.Size = 0
}
if fi.Mode()&os.ModeDir != 0 {
// convert opaque dirs to AUFS format by writing an empty file with the prefix
opaque, err := system.Lgetxattr(path, "trusted.overlay.opaque")
if err != nil {
return nil, err
}
if len(opaque) == 1 && opaque[0] == 'y' {
if hdr.Xattrs != nil {
delete(hdr.Xattrs, "trusted.overlay.opaque")
}
// create a header for the whiteout file
// it should inherit some properties from the parent, but be a regular file
wo = &tar.Header{
Typeflag: tar.TypeReg,
Mode: hdr.Mode & int64(os.ModePerm),
Name: filepath.Join(hdr.Name, WhiteoutOpaqueDir),
Size: 0,
Uid: hdr.Uid,
Uname: hdr.Uname,
Gid: hdr.Gid,
Gname: hdr.Gname,
AccessTime: hdr.AccessTime,
ChangeTime: hdr.ChangeTime,
}
}
}
return
}
func (c overlayWhiteoutConverter) ConvertRead(hdr *tar.Header, path string) (bool, error) {
base := filepath.Base(path)
dir := filepath.Dir(path)
// if a directory is marked as opaque by the AUFS special file, we need to translate that to overlay
if base == WhiteoutOpaqueDir {
err := unix.Setxattr(dir, "trusted.overlay.opaque", []byte{'y'}, 0)
if err != nil {
if c.inUserNS {
if err = replaceDirWithOverlayOpaque(dir); err != nil {
return false, errors.Wrapf(err, "replaceDirWithOverlayOpaque(%q) failed", dir)
}
} else {
return false, errors.Wrapf(err, "setxattr(%q, trusted.overlay.opaque=y)", dir)
}
}
// don't write the file itself
return false, err
}
// if a file was deleted and we are using overlay, we need to create a character device
if strings.HasPrefix(base, WhiteoutPrefix) {
originalBase := base[len(WhiteoutPrefix):]
originalPath := filepath.Join(dir, originalBase)
if err := unix.Mknod(originalPath, unix.S_IFCHR, 0); err != nil {
if c.inUserNS {
// Ubuntu and a few distros support overlayfs in userns.
//
// Although we can't call mknod directly in userns (at least on bionic kernel 4.15),
// we can still create 0,0 char device using mknodChar0Overlay().
//
// NOTE: we don't need this hack for the containerd snapshotter+unpack model.
if err := mknodChar0Overlay(originalPath); err != nil {
return false, errors.Wrapf(err, "failed to mknodChar0UserNS(%q)", originalPath)
}
} else {
return false, errors.Wrapf(err, "failed to mknod(%q, S_IFCHR, 0)", originalPath)
}
}
if err := os.Chown(originalPath, hdr.Uid, hdr.Gid); err != nil {
return false, err
}
// don't write the file itself
return false, nil
}
return true, nil
}
// mknodChar0Overlay creates 0,0 char device by mounting overlayfs and unlinking.
// This function can be used for creating 0,0 char device in userns on Ubuntu.
//
// Steps:
// * Mkdir lower,upper,merged,work
// * Create lower/dummy
// * Mount overlayfs
// * Unlink merged/dummy
// * Unmount overlayfs
// * Make sure a 0,0 char device is created as upper/dummy
// * Rename upper/dummy to cleansedOriginalPath
func mknodChar0Overlay(cleansedOriginalPath string) error {
dir := filepath.Dir(cleansedOriginalPath)
tmp, err := ioutil.TempDir(dir, "mc0o")
if err != nil {
return errors.Wrapf(err, "failed to create a tmp directory under %s", dir)
}
defer os.RemoveAll(tmp)
lower := filepath.Join(tmp, "l")
upper := filepath.Join(tmp, "u")
work := filepath.Join(tmp, "w")
merged := filepath.Join(tmp, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return errors.Wrapf(err, "failed to mkdir %s", s)
}
}
dummyBase := "d"
lowerDummy := filepath.Join(lower, dummyBase)
if err := ioutil.WriteFile(lowerDummy, []byte{}, 0600); err != nil {
return errors.Wrapf(err, "failed to create a dummy lower file %s", lowerDummy)
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
// docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead.
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
mergedDummy := filepath.Join(merged, dummyBase)
if err := os.Remove(mergedDummy); err != nil {
syscall.Unmount(merged, 0)
return errors.Wrapf(err, "failed to unlink %s", mergedDummy)
}
if err := syscall.Unmount(merged, 0); err != nil {
return errors.Wrapf(err, "failed to unmount %s", merged)
}
upperDummy := filepath.Join(upper, dummyBase)
if err := isChar0(upperDummy); err != nil {
return err
}
if err := os.Rename(upperDummy, cleansedOriginalPath); err != nil {
return errors.Wrapf(err, "failed to rename %s to %s", upperDummy, cleansedOriginalPath)
}
return nil
}
func isChar0(path string) error {
osStat, err := os.Stat(path)
if err != nil {
return errors.Wrapf(err, "failed to stat %s", path)
}
st, ok := osStat.Sys().(*syscall.Stat_t)
if !ok {
return errors.Errorf("got unsupported stat for %s", path)
}
if os.FileMode(st.Mode)&syscall.S_IFMT != syscall.S_IFCHR {
return errors.Errorf("%s is not a character device, got mode=%d", path, st.Mode)
}
if st.Rdev != 0 {
return errors.Errorf("%s is not a 0,0 character device, got Rdev=%d", path, st.Rdev)
}
return nil
}
// replaceDirWithOverlayOpaque replaces path with a new directory with trusted.overlay.opaque
// xattr. The contents of the directory are preserved.
func replaceDirWithOverlayOpaque(path string) error {
if path == "/" {
return errors.New("replaceDirWithOverlayOpaque: path must not be \"/\"")
}
dir := filepath.Dir(path)
tmp, err := ioutil.TempDir(dir, "rdwoo")
if err != nil {
return errors.Wrapf(err, "failed to create a tmp directory under %s", dir)
}
defer os.RemoveAll(tmp)
// newPath is a new empty directory crafted with trusted.overlay.opaque xattr.
// we copy the content of path into newPath, remove path, and rename newPath to path.
newPath, err := createDirWithOverlayOpaque(tmp)
if err != nil {
return errors.Wrapf(err, "createDirWithOverlayOpaque(%q) failed", tmp)
}
if err := fs.CopyDir(newPath, path); err != nil {
return errors.Wrapf(err, "CopyDir(%q, %q) failed", newPath, path)
}
if err := os.RemoveAll(path); err != nil {
return err
}
return os.Rename(newPath, path)
}
// createDirWithOverlayOpaque creates a directory with trusted.overlay.opaque xattr,
// without calling setxattr, so as to allow creating opaque dir in userns on Ubuntu.
func createDirWithOverlayOpaque(tmp string) (string, error) {
lower := filepath.Join(tmp, "l")
upper := filepath.Join(tmp, "u")
work := filepath.Join(tmp, "w")
merged := filepath.Join(tmp, "m")
for _, s := range []string{lower, upper, work, merged} {
if err := os.MkdirAll(s, 0700); err != nil {
return "", errors.Wrapf(err, "failed to mkdir %s", s)
}
}
dummyBase := "d"
lowerDummy := filepath.Join(lower, dummyBase)
if err := os.MkdirAll(lowerDummy, 0700); err != nil {
return "", errors.Wrapf(err, "failed to create a dummy lower directory %s", lowerDummy)
}
mOpts := fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lower, upper, work)
// docker/pkg/mount.Mount() requires procfs to be mounted. So we use syscall.Mount() directly instead.
if err := syscall.Mount("overlay", merged, "overlay", uintptr(0), mOpts); err != nil {
return "", errors.Wrapf(err, "failed to mount overlay (%s) on %s", mOpts, merged)
}
mergedDummy := filepath.Join(merged, dummyBase)
if err := os.Remove(mergedDummy); err != nil {
syscall.Unmount(merged, 0)
return "", errors.Wrapf(err, "failed to rmdir %s", mergedDummy)
}
// upperDummy becomes a 0,0-char device file here
if err := os.Mkdir(mergedDummy, 0700); err != nil {
syscall.Unmount(merged, 0)
return "", errors.Wrapf(err, "failed to mkdir %s", mergedDummy)
}
// upperDummy becomes a directory with trusted.overlay.opaque xattr
// (but can't be verified in userns)
if err := syscall.Unmount(merged, 0); err != nil {
return "", errors.Wrapf(err, "failed to unmount %s", merged)
}
upperDummy := filepath.Join(upper, dummyBase)
return upperDummy, nil
}

@ -0,0 +1,7 @@
// +build !linux
package archive // import "github.com/docker/docker/pkg/archive"
func getWhiteoutConverter(format WhiteoutFormat, inUserNS bool) tarWhiteoutConverter {
return nil
}

@ -0,0 +1,115 @@
// +build !windows
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"errors"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/system"
rsystem "github.com/opencontainers/runc/libcontainer/system"
"golang.org/x/sys/unix"
)
// fixVolumePathPrefix does platform specific processing to ensure that if
// the path being passed in is not in a volume path format, convert it to one.
func fixVolumePathPrefix(srcPath string) string {
return srcPath
}
// getWalkRoot calculates the root path when performing a TarWithOptions.
// We use a separate function as this is platform specific. On Linux, we
// can't use filepath.Join(srcPath,include) because this will clean away
// a trailing "." or "/" which may be important.
func getWalkRoot(srcPath string, include string) string {
return strings.TrimSuffix(srcPath, string(filepath.Separator)) + string(filepath.Separator) + include
}
// CanonicalTarNameForPath returns platform-specific filepath
// to canonical posix-style path for tar archival. p is relative
// path.
func CanonicalTarNameForPath(p string) string {
return p // already unix-style
}
// chmodTarEntry is used to adjust the file permissions used in tar header based
// on the platform the archival is done.
func chmodTarEntry(perm os.FileMode) os.FileMode {
return perm // noop for unix as golang APIs provide perm bits correctly
}
func setHeaderForSpecialDevice(hdr *tar.Header, name string, stat interface{}) (err error) {
s, ok := stat.(*syscall.Stat_t)
if ok {
// Currently go does not fill in the major/minors
if s.Mode&unix.S_IFBLK != 0 ||
s.Mode&unix.S_IFCHR != 0 {
hdr.Devmajor = int64(unix.Major(uint64(s.Rdev))) // nolint: unconvert
hdr.Devminor = int64(unix.Minor(uint64(s.Rdev))) // nolint: unconvert
}
}
return
}
func getInodeFromStat(stat interface{}) (inode uint64, err error) {
s, ok := stat.(*syscall.Stat_t)
if ok {
inode = s.Ino
}
return
}
func getFileUIDGID(stat interface{}) (idtools.Identity, error) {
s, ok := stat.(*syscall.Stat_t)
if !ok {
return idtools.Identity{}, errors.New("cannot convert stat value to syscall.Stat_t")
}
return idtools.Identity{UID: int(s.Uid), GID: int(s.Gid)}, nil
}
// handleTarTypeBlockCharFifo is an OS-specific helper function used by
// createTarFile to handle the following types of header: Block; Char; Fifo
func handleTarTypeBlockCharFifo(hdr *tar.Header, path string) error {
if rsystem.RunningInUserNS() {
// cannot create a device if running in user namespace
return nil
}
mode := uint32(hdr.Mode & 07777)
switch hdr.Typeflag {
case tar.TypeBlock:
mode |= unix.S_IFBLK
case tar.TypeChar:
mode |= unix.S_IFCHR
case tar.TypeFifo:
mode |= unix.S_IFIFO
}
return system.Mknod(path, mode, int(system.Mkdev(hdr.Devmajor, hdr.Devminor)))
}
func handleLChmod(hdr *tar.Header, path string, hdrInfo os.FileInfo) error {
if hdr.Typeflag == tar.TypeLink {
if fi, err := os.Lstat(hdr.Linkname); err == nil && (fi.Mode()&os.ModeSymlink == 0) {
if err := os.Chmod(path, hdrInfo.Mode()); err != nil {
return err
}
}
} else if hdr.Typeflag != tar.TypeSymlink {
if err := os.Chmod(path, hdrInfo.Mode()); err != nil {
return err
}
}
return nil
}

@ -0,0 +1,67 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"os"
"path/filepath"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/longpath"
)
// fixVolumePathPrefix does platform specific processing to ensure that if
// the path being passed in is not in a volume path format, convert it to one.
func fixVolumePathPrefix(srcPath string) string {
return longpath.AddPrefix(srcPath)
}
// getWalkRoot calculates the root path when performing a TarWithOptions.
// We use a separate function as this is platform specific.
func getWalkRoot(srcPath string, include string) string {
return filepath.Join(srcPath, include)
}
// CanonicalTarNameForPath returns platform-specific filepath
// to canonical posix-style path for tar archival. p is relative
// path.
func CanonicalTarNameForPath(p string) string {
return filepath.ToSlash(p)
}
// chmodTarEntry is used to adjust the file permissions used in tar header based
// on the platform the archival is done.
func chmodTarEntry(perm os.FileMode) os.FileMode {
//perm &= 0755 // this 0-ed out tar flags (like link, regular file, directory marker etc.)
permPart := perm & os.ModePerm
noPermPart := perm &^ os.ModePerm
// Add the x bit: make everything +x from windows
permPart |= 0111
permPart &= 0755
return noPermPart | permPart
}
func setHeaderForSpecialDevice(hdr *tar.Header, name string, stat interface{}) (err error) {
// do nothing. no notion of Rdev, Nlink in stat on Windows
return
}
func getInodeFromStat(stat interface{}) (inode uint64, err error) {
// do nothing. no notion of Inode in stat on Windows
return
}
// handleTarTypeBlockCharFifo is an OS-specific helper function used by
// createTarFile to handle the following types of header: Block; Char; Fifo
func handleTarTypeBlockCharFifo(hdr *tar.Header, path string) error {
return nil
}
func handleLChmod(hdr *tar.Header, path string, hdrInfo os.FileInfo) error {
return nil
}
func getFileUIDGID(stat interface{}) (idtools.Identity, error) {
// no notion of file ownership mapping yet on Windows
return idtools.Identity{UID: 0, GID: 0}, nil
}

@ -0,0 +1,445 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/system"
"github.com/sirupsen/logrus"
)
// ChangeType represents the change type.
type ChangeType int
const (
// ChangeModify represents the modify operation.
ChangeModify = iota
// ChangeAdd represents the add operation.
ChangeAdd
// ChangeDelete represents the delete operation.
ChangeDelete
)
func (c ChangeType) String() string {
switch c {
case ChangeModify:
return "C"
case ChangeAdd:
return "A"
case ChangeDelete:
return "D"
}
return ""
}
// Change represents a change, it wraps the change type and path.
// It describes changes of the files in the path respect to the
// parent layers. The change could be modify, add, delete.
// This is used for layer diff.
type Change struct {
Path string
Kind ChangeType
}
func (change *Change) String() string {
return fmt.Sprintf("%s %s", change.Kind, change.Path)
}
// for sort.Sort
type changesByPath []Change
func (c changesByPath) Less(i, j int) bool { return c[i].Path < c[j].Path }
func (c changesByPath) Len() int { return len(c) }
func (c changesByPath) Swap(i, j int) { c[j], c[i] = c[i], c[j] }
// Gnu tar doesn't have sub-second mtime precision. The go tar
// writer (1.10+) does when using PAX format, but we round times to seconds
// to ensure archives have the same hashes for backwards compatibility.
// See https://github.com/moby/moby/pull/35739/commits/fb170206ba12752214630b269a40ac7be6115ed4.
//
// Non-sub-second is problematic when we apply changes via tar
// files. We handle this by comparing for exact times, *or* same
// second count and either a or b having exactly 0 nanoseconds
func sameFsTime(a, b time.Time) bool {
return a.Equal(b) ||
(a.Unix() == b.Unix() &&
(a.Nanosecond() == 0 || b.Nanosecond() == 0))
}
func sameFsTimeSpec(a, b syscall.Timespec) bool {
return a.Sec == b.Sec &&
(a.Nsec == b.Nsec || a.Nsec == 0 || b.Nsec == 0)
}
// Changes walks the path rw and determines changes for the files in the path,
// with respect to the parent layers
func Changes(layers []string, rw string) ([]Change, error) {
return changes(layers, rw, aufsDeletedFile, aufsMetadataSkip)
}
func aufsMetadataSkip(path string) (skip bool, err error) {
skip, err = filepath.Match(string(os.PathSeparator)+WhiteoutMetaPrefix+"*", path)
if err != nil {
skip = true
}
return
}
func aufsDeletedFile(root, path string, fi os.FileInfo) (string, error) {
f := filepath.Base(path)
// If there is a whiteout, then the file was removed
if strings.HasPrefix(f, WhiteoutPrefix) {
originalFile := f[len(WhiteoutPrefix):]
return filepath.Join(filepath.Dir(path), originalFile), nil
}
return "", nil
}
type skipChange func(string) (bool, error)
type deleteChange func(string, string, os.FileInfo) (string, error)
func changes(layers []string, rw string, dc deleteChange, sc skipChange) ([]Change, error) {
var (
changes []Change
changedDirs = make(map[string]struct{})
)
err := filepath.Walk(rw, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// Rebase path
path, err = filepath.Rel(rw, path)
if err != nil {
return err
}
// As this runs on the daemon side, file paths are OS specific.
path = filepath.Join(string(os.PathSeparator), path)
// Skip root
if path == string(os.PathSeparator) {
return nil
}
if sc != nil {
if skip, err := sc(path); skip {
return err
}
}
change := Change{
Path: path,
}
deletedFile, err := dc(rw, path, f)
if err != nil {
return err
}
// Find out what kind of modification happened
if deletedFile != "" {
change.Path = deletedFile
change.Kind = ChangeDelete
} else {
// Otherwise, the file was added
change.Kind = ChangeAdd
// ...Unless it already existed in a top layer, in which case, it's a modification
for _, layer := range layers {
stat, err := os.Stat(filepath.Join(layer, path))
if err != nil && !os.IsNotExist(err) {
return err
}
if err == nil {
// The file existed in the top layer, so that's a modification
// However, if it's a directory, maybe it wasn't actually modified.
// If you modify /foo/bar/baz, then /foo will be part of the changed files only because it's the parent of bar
if stat.IsDir() && f.IsDir() {
if f.Size() == stat.Size() && f.Mode() == stat.Mode() && sameFsTime(f.ModTime(), stat.ModTime()) {
// Both directories are the same, don't record the change
return nil
}
}
change.Kind = ChangeModify
break
}
}
}
// If /foo/bar/file.txt is modified, then /foo/bar must be part of the changed files.
// This block is here to ensure the change is recorded even if the
// modify time, mode and size of the parent directory in the rw and ro layers are all equal.
// Check https://github.com/docker/docker/pull/13590 for details.
if f.IsDir() {
changedDirs[path] = struct{}{}
}
if change.Kind == ChangeAdd || change.Kind == ChangeDelete {
parent := filepath.Dir(path)
if _, ok := changedDirs[parent]; !ok && parent != "/" {
changes = append(changes, Change{Path: parent, Kind: ChangeModify})
changedDirs[parent] = struct{}{}
}
}
// Record change
changes = append(changes, change)
return nil
})
if err != nil && !os.IsNotExist(err) {
return nil, err
}
return changes, nil
}
// FileInfo describes the information of a file.
type FileInfo struct {
parent *FileInfo
name string
stat *system.StatT
children map[string]*FileInfo
capability []byte
added bool
}
// LookUp looks up the file information of a file.
func (info *FileInfo) LookUp(path string) *FileInfo {
// As this runs on the daemon side, file paths are OS specific.
parent := info
if path == string(os.PathSeparator) {
return info
}
pathElements := strings.Split(path, string(os.PathSeparator))
for _, elem := range pathElements {
if elem != "" {
child := parent.children[elem]
if child == nil {
return nil
}
parent = child
}
}
return parent
}
func (info *FileInfo) path() string {
if info.parent == nil {
// As this runs on the daemon side, file paths are OS specific.
return string(os.PathSeparator)
}
return filepath.Join(info.parent.path(), info.name)
}
func (info *FileInfo) addChanges(oldInfo *FileInfo, changes *[]Change) {
sizeAtEntry := len(*changes)
if oldInfo == nil {
// add
change := Change{
Path: info.path(),
Kind: ChangeAdd,
}
*changes = append(*changes, change)
info.added = true
}
// We make a copy so we can modify it to detect additions
// also, we only recurse on the old dir if the new info is a directory
// otherwise any previous delete/change is considered recursive
oldChildren := make(map[string]*FileInfo)
if oldInfo != nil && info.isDir() {
for k, v := range oldInfo.children {
oldChildren[k] = v
}
}
for name, newChild := range info.children {
oldChild := oldChildren[name]
if oldChild != nil {
// change?
oldStat := oldChild.stat
newStat := newChild.stat
// Note: We can't compare inode or ctime or blocksize here, because these change
// when copying a file into a container. However, that is not generally a problem
// because any content change will change mtime, and any status change should
// be visible when actually comparing the stat fields. The only time this
// breaks down is if some code intentionally hides a change by setting
// back mtime
if statDifferent(oldStat, newStat) ||
!bytes.Equal(oldChild.capability, newChild.capability) {
change := Change{
Path: newChild.path(),
Kind: ChangeModify,
}
*changes = append(*changes, change)
newChild.added = true
}
// Remove from copy so we can detect deletions
delete(oldChildren, name)
}
newChild.addChanges(oldChild, changes)
}
for _, oldChild := range oldChildren {
// delete
change := Change{
Path: oldChild.path(),
Kind: ChangeDelete,
}
*changes = append(*changes, change)
}
// If there were changes inside this directory, we need to add it, even if the directory
// itself wasn't changed. This is needed to properly save and restore filesystem permissions.
// As this runs on the daemon side, file paths are OS specific.
if len(*changes) > sizeAtEntry && info.isDir() && !info.added && info.path() != string(os.PathSeparator) {
change := Change{
Path: info.path(),
Kind: ChangeModify,
}
// Let's insert the directory entry before the recently added entries located inside this dir
*changes = append(*changes, change) // just to resize the slice, will be overwritten
copy((*changes)[sizeAtEntry+1:], (*changes)[sizeAtEntry:])
(*changes)[sizeAtEntry] = change
}
}
// Changes add changes to file information.
func (info *FileInfo) Changes(oldInfo *FileInfo) []Change {
var changes []Change
info.addChanges(oldInfo, &changes)
return changes
}
func newRootFileInfo() *FileInfo {
// As this runs on the daemon side, file paths are OS specific.
root := &FileInfo{
name: string(os.PathSeparator),
children: make(map[string]*FileInfo),
}
return root
}
// ChangesDirs compares two directories and generates an array of Change objects describing the changes.
// If oldDir is "", then all files in newDir will be Add-Changes.
func ChangesDirs(newDir, oldDir string) ([]Change, error) {
var (
oldRoot, newRoot *FileInfo
)
if oldDir == "" {
emptyDir, err := ioutil.TempDir("", "empty")
if err != nil {
return nil, err
}
defer os.Remove(emptyDir)
oldDir = emptyDir
}
oldRoot, newRoot, err := collectFileInfoForChanges(oldDir, newDir)
if err != nil {
return nil, err
}
return newRoot.Changes(oldRoot), nil
}
// ChangesSize calculates the size in bytes of the provided changes, based on newDir.
func ChangesSize(newDir string, changes []Change) int64 {
var (
size int64
sf = make(map[uint64]struct{})
)
for _, change := range changes {
if change.Kind == ChangeModify || change.Kind == ChangeAdd {
file := filepath.Join(newDir, change.Path)
fileInfo, err := os.Lstat(file)
if err != nil {
logrus.Errorf("Can not stat %q: %s", file, err)
continue
}
if fileInfo != nil && !fileInfo.IsDir() {
if hasHardlinks(fileInfo) {
inode := getIno(fileInfo)
if _, ok := sf[inode]; !ok {
size += fileInfo.Size()
sf[inode] = struct{}{}
}
} else {
size += fileInfo.Size()
}
}
}
}
return size
}
// ExportChanges produces an Archive from the provided changes, relative to dir.
func ExportChanges(dir string, changes []Change, uidMaps, gidMaps []idtools.IDMap) (io.ReadCloser, error) {
reader, writer := io.Pipe()
go func() {
ta := newTarAppender(idtools.NewIDMappingsFromMaps(uidMaps, gidMaps), writer, nil)
// this buffer is needed for the duration of this piped stream
defer pools.BufioWriter32KPool.Put(ta.Buffer)
sort.Sort(changesByPath(changes))
// In general we log errors here but ignore them because
// during e.g. a diff operation the container can continue
// mutating the filesystem and we can see transient errors
// from this
for _, change := range changes {
if change.Kind == ChangeDelete {
whiteOutDir := filepath.Dir(change.Path)
whiteOutBase := filepath.Base(change.Path)
whiteOut := filepath.Join(whiteOutDir, WhiteoutPrefix+whiteOutBase)
timestamp := time.Now()
hdr := &tar.Header{
Name: whiteOut[1:],
Size: 0,
ModTime: timestamp,
AccessTime: timestamp,
ChangeTime: timestamp,
}
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
logrus.Debugf("Can't write whiteout header: %s", err)
}
} else {
path := filepath.Join(dir, change.Path)
if err := ta.addTarFile(path, change.Path[1:]); err != nil {
logrus.Debugf("Can't add file %s to tar: %s", path, err)
}
}
}
// Make sure to check the error on Close.
if err := ta.TarWriter.Close(); err != nil {
logrus.Debugf("Can't close layer: %s", err)
}
if err := writer.Close(); err != nil {
logrus.Debugf("failed close Changes writer: %s", err)
}
}()
return reader, nil
}

@ -0,0 +1,286 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"bytes"
"fmt"
"os"
"path/filepath"
"sort"
"syscall"
"unsafe"
"github.com/docker/docker/pkg/system"
"golang.org/x/sys/unix"
)
// walker is used to implement collectFileInfoForChanges on linux. Where this
// method in general returns the entire contents of two directory trees, we
// optimize some FS calls out on linux. In particular, we take advantage of the
// fact that getdents(2) returns the inode of each file in the directory being
// walked, which, when walking two trees in parallel to generate a list of
// changes, can be used to prune subtrees without ever having to lstat(2) them
// directly. Eliminating stat calls in this way can save up to seconds on large
// images.
type walker struct {
dir1 string
dir2 string
root1 *FileInfo
root2 *FileInfo
}
// collectFileInfoForChanges returns a complete representation of the trees
// rooted at dir1 and dir2, with one important exception: any subtree or
// leaf where the inode and device numbers are an exact match between dir1
// and dir2 will be pruned from the results. This method is *only* to be used
// to generating a list of changes between the two directories, as it does not
// reflect the full contents.
func collectFileInfoForChanges(dir1, dir2 string) (*FileInfo, *FileInfo, error) {
w := &walker{
dir1: dir1,
dir2: dir2,
root1: newRootFileInfo(),
root2: newRootFileInfo(),
}
i1, err := os.Lstat(w.dir1)
if err != nil {
return nil, nil, err
}
i2, err := os.Lstat(w.dir2)
if err != nil {
return nil, nil, err
}
if err := w.walk("/", i1, i2); err != nil {
return nil, nil, err
}
return w.root1, w.root2, nil
}
// Given a FileInfo, its path info, and a reference to the root of the tree
// being constructed, register this file with the tree.
func walkchunk(path string, fi os.FileInfo, dir string, root *FileInfo) error {
if fi == nil {
return nil
}
parent := root.LookUp(filepath.Dir(path))
if parent == nil {
return fmt.Errorf("walkchunk: Unexpectedly no parent for %s", path)
}
info := &FileInfo{
name: filepath.Base(path),
children: make(map[string]*FileInfo),
parent: parent,
}
cpath := filepath.Join(dir, path)
stat, err := system.FromStatT(fi.Sys().(*syscall.Stat_t))
if err != nil {
return err
}
info.stat = stat
info.capability, _ = system.Lgetxattr(cpath, "security.capability") // lgetxattr(2): fs access
parent.children[info.name] = info
return nil
}
// Walk a subtree rooted at the same path in both trees being iterated. For
// example, /docker/overlay/1234/a/b/c/d and /docker/overlay/8888/a/b/c/d
func (w *walker) walk(path string, i1, i2 os.FileInfo) (err error) {
// Register these nodes with the return trees, unless we're still at the
// (already-created) roots:
if path != "/" {
if err := walkchunk(path, i1, w.dir1, w.root1); err != nil {
return err
}
if err := walkchunk(path, i2, w.dir2, w.root2); err != nil {
return err
}
}
is1Dir := i1 != nil && i1.IsDir()
is2Dir := i2 != nil && i2.IsDir()
sameDevice := false
if i1 != nil && i2 != nil {
si1 := i1.Sys().(*syscall.Stat_t)
si2 := i2.Sys().(*syscall.Stat_t)
if si1.Dev == si2.Dev {
sameDevice = true
}
}
// If these files are both non-existent, or leaves (non-dirs), we are done.
if !is1Dir && !is2Dir {
return nil
}
// Fetch the names of all the files contained in both directories being walked:
var names1, names2 []nameIno
if is1Dir {
names1, err = readdirnames(filepath.Join(w.dir1, path)) // getdents(2): fs access
if err != nil {
return err
}
}
if is2Dir {
names2, err = readdirnames(filepath.Join(w.dir2, path)) // getdents(2): fs access
if err != nil {
return err
}
}
// We have lists of the files contained in both parallel directories, sorted
// in the same order. Walk them in parallel, generating a unique merged list
// of all items present in either or both directories.
var names []string
ix1 := 0
ix2 := 0
for {
if ix1 >= len(names1) {
break
}
if ix2 >= len(names2) {
break
}
ni1 := names1[ix1]
ni2 := names2[ix2]
switch bytes.Compare([]byte(ni1.name), []byte(ni2.name)) {
case -1: // ni1 < ni2 -- advance ni1
// we will not encounter ni1 in names2
names = append(names, ni1.name)
ix1++
case 0: // ni1 == ni2
if ni1.ino != ni2.ino || !sameDevice {
names = append(names, ni1.name)
}
ix1++
ix2++
case 1: // ni1 > ni2 -- advance ni2
// we will not encounter ni2 in names1
names = append(names, ni2.name)
ix2++
}
}
for ix1 < len(names1) {
names = append(names, names1[ix1].name)
ix1++
}
for ix2 < len(names2) {
names = append(names, names2[ix2].name)
ix2++
}
// For each of the names present in either or both of the directories being
// iterated, stat the name under each root, and recurse the pair of them:
for _, name := range names {
fname := filepath.Join(path, name)
var cInfo1, cInfo2 os.FileInfo
if is1Dir {
cInfo1, err = os.Lstat(filepath.Join(w.dir1, fname)) // lstat(2): fs access
if err != nil && !os.IsNotExist(err) {
return err
}
}
if is2Dir {
cInfo2, err = os.Lstat(filepath.Join(w.dir2, fname)) // lstat(2): fs access
if err != nil && !os.IsNotExist(err) {
return err
}
}
if err = w.walk(fname, cInfo1, cInfo2); err != nil {
return err
}
}
return nil
}
// {name,inode} pairs used to support the early-pruning logic of the walker type
type nameIno struct {
name string
ino uint64
}
type nameInoSlice []nameIno
func (s nameInoSlice) Len() int { return len(s) }
func (s nameInoSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s nameInoSlice) Less(i, j int) bool { return s[i].name < s[j].name }
// readdirnames is a hacked-apart version of the Go stdlib code, exposing inode
// numbers further up the stack when reading directory contents. Unlike
// os.Readdirnames, which returns a list of filenames, this function returns a
// list of {filename,inode} pairs.
func readdirnames(dirname string) (names []nameIno, err error) {
var (
size = 100
buf = make([]byte, 4096)
nbuf int
bufp int
nb int
)
f, err := os.Open(dirname)
if err != nil {
return nil, err
}
defer f.Close()
names = make([]nameIno, 0, size) // Empty with room to grow.
for {
// Refill the buffer if necessary
if bufp >= nbuf {
bufp = 0
nbuf, err = unix.ReadDirent(int(f.Fd()), buf) // getdents on linux
if nbuf < 0 {
nbuf = 0
}
if err != nil {
return nil, os.NewSyscallError("readdirent", err)
}
if nbuf <= 0 {
break // EOF
}
}
// Drain the buffer
nb, names = parseDirent(buf[bufp:nbuf], names)
bufp += nb
}
sl := nameInoSlice(names)
sort.Sort(sl)
return sl, nil
}
// parseDirent is a minor modification of unix.ParseDirent (linux version)
// which returns {name,inode} pairs instead of just names.
func parseDirent(buf []byte, names []nameIno) (consumed int, newnames []nameIno) {
origlen := len(buf)
for len(buf) > 0 {
dirent := (*unix.Dirent)(unsafe.Pointer(&buf[0]))
buf = buf[dirent.Reclen:]
if dirent.Ino == 0 { // File absent in directory.
continue
}
bytes := (*[10000]byte)(unsafe.Pointer(&dirent.Name[0]))
var name = string(bytes[0:clen(bytes[:])])
if name == "." || name == ".." { // Useless names
continue
}
names = append(names, nameIno{name, dirent.Ino})
}
return origlen - len(buf), names
}
func clen(n []byte) int {
for i := 0; i < len(n); i++ {
if n[i] == 0 {
return i
}
}
return len(n)
}

@ -0,0 +1,97 @@
// +build !linux
package archive // import "github.com/docker/docker/pkg/archive"
import (
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"github.com/docker/docker/pkg/system"
)
func collectFileInfoForChanges(oldDir, newDir string) (*FileInfo, *FileInfo, error) {
var (
oldRoot, newRoot *FileInfo
err1, err2 error
errs = make(chan error, 2)
)
go func() {
oldRoot, err1 = collectFileInfo(oldDir)
errs <- err1
}()
go func() {
newRoot, err2 = collectFileInfo(newDir)
errs <- err2
}()
// block until both routines have returned
for i := 0; i < 2; i++ {
if err := <-errs; err != nil {
return nil, nil, err
}
}
return oldRoot, newRoot, nil
}
func collectFileInfo(sourceDir string) (*FileInfo, error) {
root := newRootFileInfo()
err := filepath.Walk(sourceDir, func(path string, f os.FileInfo, err error) error {
if err != nil {
return err
}
// Rebase path
relPath, err := filepath.Rel(sourceDir, path)
if err != nil {
return err
}
// As this runs on the daemon side, file paths are OS specific.
relPath = filepath.Join(string(os.PathSeparator), relPath)
// See https://github.com/golang/go/issues/9168 - bug in filepath.Join.
// Temporary workaround. If the returned path starts with two backslashes,
// trim it down to a single backslash. Only relevant on Windows.
if runtime.GOOS == "windows" {
if strings.HasPrefix(relPath, `\\`) {
relPath = relPath[1:]
}
}
if relPath == string(os.PathSeparator) {
return nil
}
parent := root.LookUp(filepath.Dir(relPath))
if parent == nil {
return fmt.Errorf("collectFileInfo: Unexpectedly no parent for %s", relPath)
}
info := &FileInfo{
name: filepath.Base(relPath),
children: make(map[string]*FileInfo),
parent: parent,
}
s, err := system.Lstat(path)
if err != nil {
return err
}
info.stat = s
info.capability, _ = system.Lgetxattr(path, "security.capability")
parent.children[info.name] = info
return nil
})
if err != nil {
return nil, err
}
return root, nil
}

@ -0,0 +1,43 @@
// +build !windows
package archive // import "github.com/docker/docker/pkg/archive"
import (
"os"
"syscall"
"github.com/docker/docker/pkg/system"
"golang.org/x/sys/unix"
)
func statDifferent(oldStat *system.StatT, newStat *system.StatT) bool {
// Don't look at size for dirs, its not a good measure of change
if oldStat.Mode() != newStat.Mode() ||
oldStat.UID() != newStat.UID() ||
oldStat.GID() != newStat.GID() ||
oldStat.Rdev() != newStat.Rdev() ||
// Don't look at size or modification time for dirs, its not a good
// measure of change. See https://github.com/moby/moby/issues/9874
// for a description of the issue with modification time, and
// https://github.com/moby/moby/pull/11422 for the change.
// (Note that in the Windows implementation of this function,
// modification time IS taken as a change). See
// https://github.com/moby/moby/pull/37982 for more information.
(oldStat.Mode()&unix.S_IFDIR != unix.S_IFDIR &&
(!sameFsTimeSpec(oldStat.Mtim(), newStat.Mtim()) || (oldStat.Size() != newStat.Size()))) {
return true
}
return false
}
func (info *FileInfo) isDir() bool {
return info.parent == nil || info.stat.Mode()&unix.S_IFDIR != 0
}
func getIno(fi os.FileInfo) uint64 {
return fi.Sys().(*syscall.Stat_t).Ino
}
func hasHardlinks(fi os.FileInfo) bool {
return fi.Sys().(*syscall.Stat_t).Nlink > 1
}

@ -0,0 +1,34 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"os"
"github.com/docker/docker/pkg/system"
)
func statDifferent(oldStat *system.StatT, newStat *system.StatT) bool {
// Note there is slight difference between the Linux and Windows
// implementations here. Due to https://github.com/moby/moby/issues/9874,
// and the fix at https://github.com/moby/moby/pull/11422, Linux does not
// consider a change to the directory time as a change. Windows on NTFS
// does. See https://github.com/moby/moby/pull/37982 for more information.
if !sameFsTime(oldStat.Mtim(), newStat.Mtim()) ||
oldStat.Mode() != newStat.Mode() ||
oldStat.Size() != newStat.Size() && !oldStat.Mode().IsDir() {
return true
}
return false
}
func (info *FileInfo) isDir() bool {
return info.parent == nil || info.stat.Mode().IsDir()
}
func getIno(fi os.FileInfo) (inode uint64) {
return
}
func hasHardlinks(fi os.FileInfo) bool {
return false
}

@ -0,0 +1,480 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"errors"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
"github.com/docker/docker/pkg/system"
"github.com/sirupsen/logrus"
)
// Errors used or returned by this file.
var (
ErrNotDirectory = errors.New("not a directory")
ErrDirNotExists = errors.New("no such directory")
ErrCannotCopyDir = errors.New("cannot copy directory")
ErrInvalidCopySource = errors.New("invalid copy source content")
)
// PreserveTrailingDotOrSeparator returns the given cleaned path (after
// processing using any utility functions from the path or filepath stdlib
// packages) and appends a trailing `/.` or `/` if its corresponding original
// path (from before being processed by utility functions from the path or
// filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
// path already ends in a `.` path segment, then another is not added. If the
// clean path already ends in the separator, then another is not added.
func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string {
// Ensure paths are in platform semantics
cleanedPath = strings.Replace(cleanedPath, "/", string(sep), -1)
originalPath = strings.Replace(originalPath, "/", string(sep), -1)
if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
if !hasTrailingPathSeparator(cleanedPath, sep) {
// Add a separator if it doesn't already end with one (a cleaned
// path would only end in a separator if it is the root).
cleanedPath += string(sep)
}
cleanedPath += "."
}
if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) {
cleanedPath += string(sep)
}
return cleanedPath
}
// assertsDirectory returns whether the given path is
// asserted to be a directory, i.e., the path ends with
// a trailing '/' or `/.`, assuming a path separator of `/`.
func assertsDirectory(path string, sep byte) bool {
return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path)
}
// hasTrailingPathSeparator returns whether the given
// path ends with the system's path separator character.
func hasTrailingPathSeparator(path string, sep byte) bool {
return len(path) > 0 && path[len(path)-1] == sep
}
// specifiesCurrentDir returns whether the given path specifies
// a "current directory", i.e., the last path segment is `.`.
func specifiesCurrentDir(path string) bool {
return filepath.Base(path) == "."
}
// SplitPathDirEntry splits the given path between its directory name and its
// basename by first cleaning the path but preserves a trailing "." if the
// original path specified the current directory.
func SplitPathDirEntry(path string) (dir, base string) {
cleanedPath := filepath.Clean(filepath.FromSlash(path))
if specifiesCurrentDir(path) {
cleanedPath += string(os.PathSeparator) + "."
}
return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
}
// TarResource archives the resource described by the given CopyInfo to a Tar
// archive. A non-nil error is returned if sourcePath does not exist or is
// asserted to be a directory but exists as another type of file.
//
// This function acts as a convenient wrapper around TarWithOptions, which
// requires a directory as the source path. TarResource accepts either a
// directory or a file path and correctly sets the Tar options.
func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
}
// TarResourceRebase is like TarResource but renames the first path element of
// items in the resulting tar archive to match the given rebaseName if not "".
func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
sourcePath = normalizePath(sourcePath)
if _, err = os.Lstat(sourcePath); err != nil {
// Catches the case where the source does not exist or is not a
// directory if asserted to be a directory, as this also causes an
// error.
return
}
// Separate the source path between its directory and
// the entry in that directory which we are archiving.
sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
opts := TarResourceRebaseOpts(sourceBase, rebaseName)
logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
return TarWithOptions(sourceDir, opts)
}
// TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
// parameters to be sent to TarWithOptions (the TarOptions struct)
func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
filter := []string{sourceBase}
return &TarOptions{
Compression: Uncompressed,
IncludeFiles: filter,
IncludeSourceDir: true,
RebaseNames: map[string]string{
sourceBase: rebaseName,
},
}
}
// CopyInfo holds basic info about the source
// or destination path of a copy operation.
type CopyInfo struct {
Path string
Exists bool
IsDir bool
RebaseName string
}
// CopyInfoSourcePath stats the given path to create a CopyInfo
// struct representing that resource for the source of an archive copy
// operation. The given path should be an absolute local path. A source path
// has all symlinks evaluated that appear before the last path separator ("/"
// on Unix). As it is to be a copy source, the path must exist.
func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
// normalize the file path and then evaluate the symbol link
// we will use the target file instead of the symbol link if
// followLink is set
path = normalizePath(path)
resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
if err != nil {
return CopyInfo{}, err
}
stat, err := os.Lstat(resolvedPath)
if err != nil {
return CopyInfo{}, err
}
return CopyInfo{
Path: resolvedPath,
Exists: true,
IsDir: stat.IsDir(),
RebaseName: rebaseName,
}, nil
}
// CopyInfoDestinationPath stats the given path to create a CopyInfo
// struct representing that resource for the destination of an archive copy
// operation. The given path should be an absolute local path.
func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
path = normalizePath(path)
originalPath := path
stat, err := os.Lstat(path)
if err == nil && stat.Mode()&os.ModeSymlink == 0 {
// The path exists and is not a symlink.
return CopyInfo{
Path: path,
Exists: true,
IsDir: stat.IsDir(),
}, nil
}
// While the path is a symlink.
for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
if n > maxSymlinkIter {
// Don't follow symlinks more than this arbitrary number of times.
return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
}
// The path is a symbolic link. We need to evaluate it so that the
// destination of the copy operation is the link target and not the
// link itself. This is notably different than CopyInfoSourcePath which
// only evaluates symlinks before the last appearing path separator.
// Also note that it is okay if the last path element is a broken
// symlink as the copy operation should create the target.
var linkTarget string
linkTarget, err = os.Readlink(path)
if err != nil {
return CopyInfo{}, err
}
if !system.IsAbs(linkTarget) {
// Join with the parent directory.
dstParent, _ := SplitPathDirEntry(path)
linkTarget = filepath.Join(dstParent, linkTarget)
}
path = linkTarget
stat, err = os.Lstat(path)
}
if err != nil {
// It's okay if the destination path doesn't exist. We can still
// continue the copy operation if the parent directory exists.
if !os.IsNotExist(err) {
return CopyInfo{}, err
}
// Ensure destination parent dir exists.
dstParent, _ := SplitPathDirEntry(path)
parentDirStat, err := os.Stat(dstParent)
if err != nil {
return CopyInfo{}, err
}
if !parentDirStat.IsDir() {
return CopyInfo{}, ErrNotDirectory
}
return CopyInfo{Path: path}, nil
}
// The path exists after resolving symlinks.
return CopyInfo{
Path: path,
Exists: true,
IsDir: stat.IsDir(),
}, nil
}
// PrepareArchiveCopy prepares the given srcContent archive, which should
// contain the archived resource described by srcInfo, to the destination
// described by dstInfo. Returns the possibly modified content archive along
// with the path to the destination directory which it should be extracted to.
func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
// Ensure in platform semantics
srcInfo.Path = normalizePath(srcInfo.Path)
dstInfo.Path = normalizePath(dstInfo.Path)
// Separate the destination path between its directory and base
// components in case the source archive contents need to be rebased.
dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
_, srcBase := SplitPathDirEntry(srcInfo.Path)
switch {
case dstInfo.Exists && dstInfo.IsDir:
// The destination exists as a directory. No alteration
// to srcContent is needed as its contents can be
// simply extracted to the destination directory.
return dstInfo.Path, ioutil.NopCloser(srcContent), nil
case dstInfo.Exists && srcInfo.IsDir:
// The destination exists as some type of file and the source
// content is a directory. This is an error condition since
// you cannot copy a directory to an existing file location.
return "", nil, ErrCannotCopyDir
case dstInfo.Exists:
// The destination exists as some type of file and the source content
// is also a file. The source content entry will have to be renamed to
// have a basename which matches the destination path's basename.
if len(srcInfo.RebaseName) != 0 {
srcBase = srcInfo.RebaseName
}
return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
case srcInfo.IsDir:
// The destination does not exist and the source content is an archive
// of a directory. The archive should be extracted to the parent of
// the destination path instead, and when it is, the directory that is
// created as a result should take the name of the destination path.
// The source content entries will have to be renamed to have a
// basename which matches the destination path's basename.
if len(srcInfo.RebaseName) != 0 {
srcBase = srcInfo.RebaseName
}
return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
case assertsDirectory(dstInfo.Path, os.PathSeparator):
// The destination does not exist and is asserted to be created as a
// directory, but the source content is not a directory. This is an
// error condition since you cannot create a directory from a file
// source.
return "", nil, ErrDirNotExists
default:
// The last remaining case is when the destination does not exist, is
// not asserted to be a directory, and the source content is not an
// archive of a directory. It this case, the destination file will need
// to be created when the archive is extracted and the source content
// entry will have to be renamed to have a basename which matches the
// destination path's basename.
if len(srcInfo.RebaseName) != 0 {
srcBase = srcInfo.RebaseName
}
return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
}
}
// RebaseArchiveEntries rewrites the given srcContent archive replacing
// an occurrence of oldBase with newBase at the beginning of entry names.
func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
if oldBase == string(os.PathSeparator) {
// If oldBase specifies the root directory, use an empty string as
// oldBase instead so that newBase doesn't replace the path separator
// that all paths will start with.
oldBase = ""
}
rebased, w := io.Pipe()
go func() {
srcTar := tar.NewReader(srcContent)
rebasedTar := tar.NewWriter(w)
for {
hdr, err := srcTar.Next()
if err == io.EOF {
// Signals end of archive.
rebasedTar.Close()
w.Close()
return
}
if err != nil {
w.CloseWithError(err)
return
}
// srcContent tar stream, as served by TarWithOptions(), is
// definitely in PAX format, but tar.Next() mistakenly guesses it
// as USTAR, which creates a problem: if the newBase is >100
// characters long, WriteHeader() returns an error like
// "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...".
//
// To fix, set the format to PAX here. See docker/for-linux issue #484.
hdr.Format = tar.FormatPAX
hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
if hdr.Typeflag == tar.TypeLink {
hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
}
if err = rebasedTar.WriteHeader(hdr); err != nil {
w.CloseWithError(err)
return
}
if _, err = io.Copy(rebasedTar, srcTar); err != nil {
w.CloseWithError(err)
return
}
}
}()
return rebased
}
// TODO @gupta-ak. These might have to be changed in the future to be
// continuity driver aware as well to support LCOW.
// CopyResource performs an archive copy from the given source path to the
// given destination path. The source path MUST exist and the destination
// path's parent directory must exist.
func CopyResource(srcPath, dstPath string, followLink bool) error {
var (
srcInfo CopyInfo
err error
)
// Ensure in platform semantics
srcPath = normalizePath(srcPath)
dstPath = normalizePath(dstPath)
// Clean the source and destination paths.
srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator)
dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator)
if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
return err
}
content, err := TarResource(srcInfo)
if err != nil {
return err
}
defer content.Close()
return CopyTo(content, srcInfo, dstPath)
}
// CopyTo handles extracting the given content whose
// entries should be sourced from srcInfo to dstPath.
func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
// The destination path need not exist, but CopyInfoDestinationPath will
// ensure that at least the parent directory exists.
dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
if err != nil {
return err
}
dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
if err != nil {
return err
}
defer copyArchive.Close()
options := &TarOptions{
NoLchown: true,
NoOverwriteDirNonDir: true,
}
return Untar(copyArchive, dstDir, options)
}
// ResolveHostSourcePath decides real path need to be copied with parameters such as
// whether to follow symbol link or not, if followLink is true, resolvedPath will return
// link target of any symbol link file, else it will only resolve symlink of directory
// but return symbol link file itself without resolving.
func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
if followLink {
resolvedPath, err = filepath.EvalSymlinks(path)
if err != nil {
return
}
resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
} else {
dirPath, basePath := filepath.Split(path)
// if not follow symbol link, then resolve symbol link of parent dir
var resolvedDirPath string
resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
if err != nil {
return
}
// resolvedDirPath will have been cleaned (no trailing path separators) so
// we can manually join it with the base path element.
resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
if hasTrailingPathSeparator(path, os.PathSeparator) &&
filepath.Base(path) != filepath.Base(resolvedPath) {
rebaseName = filepath.Base(path)
}
}
return resolvedPath, rebaseName, nil
}
// GetRebaseName normalizes and compares path and resolvedPath,
// return completed resolved path and rebased file name
func GetRebaseName(path, resolvedPath string) (string, string) {
// linkTarget will have been cleaned (no trailing path separators and dot) so
// we can manually join it with them
var rebaseName string
if specifiesCurrentDir(path) &&
!specifiesCurrentDir(resolvedPath) {
resolvedPath += string(filepath.Separator) + "."
}
if hasTrailingPathSeparator(path, os.PathSeparator) &&
!hasTrailingPathSeparator(resolvedPath, os.PathSeparator) {
resolvedPath += string(filepath.Separator)
}
if filepath.Base(path) != filepath.Base(resolvedPath) {
// In the case where the path had a trailing separator and a symlink
// evaluation has changed the last path component, we will need to
// rebase the name in the archive that is being copied to match the
// originally requested name.
rebaseName = filepath.Base(path)
}
return resolvedPath, rebaseName
}

@ -0,0 +1,11 @@
// +build !windows
package archive // import "github.com/docker/docker/pkg/archive"
import (
"path/filepath"
)
func normalizePath(path string) string {
return filepath.ToSlash(path)
}

@ -0,0 +1,9 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"path/filepath"
)
func normalizePath(path string) string {
return filepath.FromSlash(path)
}

@ -0,0 +1,260 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"strings"
"github.com/docker/docker/pkg/idtools"
"github.com/docker/docker/pkg/pools"
"github.com/docker/docker/pkg/system"
"github.com/sirupsen/logrus"
)
// UnpackLayer unpack `layer` to a `dest`. The stream `layer` can be
// compressed or uncompressed.
// Returns the size in bytes of the contents of the layer.
func UnpackLayer(dest string, layer io.Reader, options *TarOptions) (size int64, err error) {
tr := tar.NewReader(layer)
trBuf := pools.BufioReader32KPool.Get(tr)
defer pools.BufioReader32KPool.Put(trBuf)
var dirs []*tar.Header
unpackedPaths := make(map[string]struct{})
if options == nil {
options = &TarOptions{}
}
if options.ExcludePatterns == nil {
options.ExcludePatterns = []string{}
}
idMapping := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps)
aufsTempdir := ""
aufsHardlinks := make(map[string]*tar.Header)
// Iterate through the files in the archive.
for {
hdr, err := tr.Next()
if err == io.EOF {
// end of tar archive
break
}
if err != nil {
return 0, err
}
size += hdr.Size
// Normalize name, for safety and for a simple is-root check
hdr.Name = filepath.Clean(hdr.Name)
// Windows does not support filenames with colons in them. Ignore
// these files. This is not a problem though (although it might
// appear that it is). Let's suppose a client is running docker pull.
// The daemon it points to is Windows. Would it make sense for the
// client to be doing a docker pull Ubuntu for example (which has files
// with colons in the name under /usr/share/man/man3)? No, absolutely
// not as it would really only make sense that they were pulling a
// Windows image. However, for development, it is necessary to be able
// to pull Linux images which are in the repository.
//
// TODO Windows. Once the registry is aware of what images are Windows-
// specific or Linux-specific, this warning should be changed to an error
// to cater for the situation where someone does manage to upload a Linux
// image but have it tagged as Windows inadvertently.
if runtime.GOOS == "windows" {
if strings.Contains(hdr.Name, ":") {
logrus.Warnf("Windows: Ignoring %s (is this a Linux image?)", hdr.Name)
continue
}
}
// Note as these operations are platform specific, so must the slash be.
if !strings.HasSuffix(hdr.Name, string(os.PathSeparator)) {
// Not the root directory, ensure that the parent directory exists.
// This happened in some tests where an image had a tarfile without any
// parent directories.
parent := filepath.Dir(hdr.Name)
parentPath := filepath.Join(dest, parent)
if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) {
err = system.MkdirAll(parentPath, 0600)
if err != nil {
return 0, err
}
}
}
// Skip AUFS metadata dirs
if strings.HasPrefix(hdr.Name, WhiteoutMetaPrefix) {
// Regular files inside /.wh..wh.plnk can be used as hardlink targets
// We don't want this directory, but we need the files in them so that
// such hardlinks can be resolved.
if strings.HasPrefix(hdr.Name, WhiteoutLinkDir) && hdr.Typeflag == tar.TypeReg {
basename := filepath.Base(hdr.Name)
aufsHardlinks[basename] = hdr
if aufsTempdir == "" {
if aufsTempdir, err = ioutil.TempDir("", "dockerplnk"); err != nil {
return 0, err
}
defer os.RemoveAll(aufsTempdir)
}
if err := createTarFile(filepath.Join(aufsTempdir, basename), dest, hdr, tr, true, nil, options.InUserNS); err != nil {
return 0, err
}
}
if hdr.Name != WhiteoutOpaqueDir {
continue
}
}
path := filepath.Join(dest, hdr.Name)
rel, err := filepath.Rel(dest, path)
if err != nil {
return 0, err
}
// Note as these operations are platform specific, so must the slash be.
if strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
return 0, breakoutError(fmt.Errorf("%q is outside of %q", hdr.Name, dest))
}
base := filepath.Base(path)
if strings.HasPrefix(base, WhiteoutPrefix) {
dir := filepath.Dir(path)
if base == WhiteoutOpaqueDir {
_, err := os.Lstat(dir)
if err != nil {
return 0, err
}
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
err = nil // parent was deleted
}
return err
}
if path == dir {
return nil
}
if _, exists := unpackedPaths[path]; !exists {
err := os.RemoveAll(path)
return err
}
return nil
})
if err != nil {
return 0, err
}
} else {
originalBase := base[len(WhiteoutPrefix):]
originalPath := filepath.Join(dir, originalBase)
if err := os.RemoveAll(originalPath); err != nil {
return 0, err
}
}
} else {
// If path exits we almost always just want to remove and replace it.
// The only exception is when it is a directory *and* the file from
// the layer is also a directory. Then we want to merge them (i.e.
// just apply the metadata from the layer).
if fi, err := os.Lstat(path); err == nil {
if !(fi.IsDir() && hdr.Typeflag == tar.TypeDir) {
if err := os.RemoveAll(path); err != nil {
return 0, err
}
}
}
trBuf.Reset(tr)
srcData := io.Reader(trBuf)
srcHdr := hdr
// Hard links into /.wh..wh.plnk don't work, as we don't extract that directory, so
// we manually retarget these into the temporary files we extracted them into
if hdr.Typeflag == tar.TypeLink && strings.HasPrefix(filepath.Clean(hdr.Linkname), WhiteoutLinkDir) {
linkBasename := filepath.Base(hdr.Linkname)
srcHdr = aufsHardlinks[linkBasename]
if srcHdr == nil {
return 0, fmt.Errorf("Invalid aufs hardlink")
}
tmpFile, err := os.Open(filepath.Join(aufsTempdir, linkBasename))
if err != nil {
return 0, err
}
defer tmpFile.Close()
srcData = tmpFile
}
if err := remapIDs(idMapping, srcHdr); err != nil {
return 0, err
}
if err := createTarFile(path, dest, srcHdr, srcData, !options.NoLchown, nil, options.InUserNS); err != nil {
return 0, err
}
// Directory mtimes must be handled at the end to avoid further
// file creation in them to modify the directory mtime
if hdr.Typeflag == tar.TypeDir {
dirs = append(dirs, hdr)
}
unpackedPaths[path] = struct{}{}
}
}
for _, hdr := range dirs {
path := filepath.Join(dest, hdr.Name)
if err := system.Chtimes(path, hdr.AccessTime, hdr.ModTime); err != nil {
return 0, err
}
}
return size, nil
}
// ApplyLayer parses a diff in the standard layer format from `layer`,
// and applies it to the directory `dest`. The stream `layer` can be
// compressed or uncompressed.
// Returns the size in bytes of the contents of the layer.
func ApplyLayer(dest string, layer io.Reader) (int64, error) {
return applyLayerHandler(dest, layer, &TarOptions{}, true)
}
// ApplyUncompressedLayer parses a diff in the standard layer format from
// `layer`, and applies it to the directory `dest`. The stream `layer`
// can only be uncompressed.
// Returns the size in bytes of the contents of the layer.
func ApplyUncompressedLayer(dest string, layer io.Reader, options *TarOptions) (int64, error) {
return applyLayerHandler(dest, layer, options, false)
}
// do the bulk load of ApplyLayer, but allow for not calling DecompressStream
func applyLayerHandler(dest string, layer io.Reader, options *TarOptions, decompress bool) (int64, error) {
dest = filepath.Clean(dest)
// We need to be able to set any perms
if runtime.GOOS != "windows" {
oldmask, err := system.Umask(0)
if err != nil {
return 0, err
}
defer system.Umask(oldmask)
}
if decompress {
decompLayer, err := DecompressStream(layer)
if err != nil {
return 0, err
}
defer decompLayer.Close()
layer = decompLayer
}
return UnpackLayer(dest, layer, options)
}

@ -0,0 +1,16 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"syscall"
"time"
)
func timeToTimespec(time time.Time) (ts syscall.Timespec) {
if time.IsZero() {
// Return UTIME_OMIT special value
ts.Sec = 0
ts.Nsec = (1 << 30) - 2
return
}
return syscall.NsecToTimespec(time.UnixNano())
}

@ -0,0 +1,16 @@
// +build !linux
package archive // import "github.com/docker/docker/pkg/archive"
import (
"syscall"
"time"
)
func timeToTimespec(time time.Time) (ts syscall.Timespec) {
nsec := int64(0)
if !time.IsZero() {
nsec = time.UnixNano()
}
return syscall.NsecToTimespec(nsec)
}

@ -0,0 +1,23 @@
package archive // import "github.com/docker/docker/pkg/archive"
// Whiteouts are files with a special meaning for the layered filesystem.
// Docker uses AUFS whiteout files inside exported archives. In other
// filesystems these files are generated/handled on tar creation/extraction.
// WhiteoutPrefix prefix means file is a whiteout. If this is followed by a
// filename this means that file has been removed from the base layer.
const WhiteoutPrefix = ".wh."
// WhiteoutMetaPrefix prefix means whiteout has a special meaning and is not
// for removing an actual file. Normally these files are excluded from exported
// archives.
const WhiteoutMetaPrefix = WhiteoutPrefix + WhiteoutPrefix
// WhiteoutLinkDir is a directory AUFS uses for storing hardlink links to other
// layers. Normally these should not go into exported archives and all changed
// hardlinks should be copied to the top layer.
const WhiteoutLinkDir = WhiteoutMetaPrefix + "plnk"
// WhiteoutOpaqueDir file means directory has been made opaque - meaning
// readdir calls to this directory do not follow to lower layers.
const WhiteoutOpaqueDir = WhiteoutMetaPrefix + ".opq"

@ -0,0 +1,59 @@
package archive // import "github.com/docker/docker/pkg/archive"
import (
"archive/tar"
"bytes"
"io"
)
// Generate generates a new archive from the content provided
// as input.
//
// `files` is a sequence of path/content pairs. A new file is
// added to the archive for each pair.
// If the last pair is incomplete, the file is created with an
// empty content. For example:
//
// Generate("foo.txt", "hello world", "emptyfile")
//
// The above call will return an archive with 2 files:
// * ./foo.txt with content "hello world"
// * ./empty with empty content
//
// FIXME: stream content instead of buffering
// FIXME: specify permissions and other archive metadata
func Generate(input ...string) (io.Reader, error) {
files := parseStringPairs(input...)
buf := new(bytes.Buffer)
tw := tar.NewWriter(buf)
for _, file := range files {
name, content := file[0], file[1]
hdr := &tar.Header{
Name: name,
Size: int64(len(content)),
}
if err := tw.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := tw.Write([]byte(content)); err != nil {
return nil, err
}
}
if err := tw.Close(); err != nil {
return nil, err
}
return buf, nil
}
func parseStringPairs(input ...string) (output [][2]string) {
output = make([][2]string, 0, len(input)/2+1)
for i := 0; i < len(input); i += 2 {
var pair [2]string
pair[0] = input[i]
if i+1 < len(input) {
pair[1] = input[i+1]
}
output = append(output, pair)
}
return
}

@ -0,0 +1,298 @@
package fileutils // import "github.com/docker/docker/pkg/fileutils"
import (
"errors"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strings"
"text/scanner"
"github.com/sirupsen/logrus"
)
// PatternMatcher allows checking paths against a list of patterns
type PatternMatcher struct {
patterns []*Pattern
exclusions bool
}
// NewPatternMatcher creates a new matcher object for specific patterns that can
// be used later to match against patterns against paths
func NewPatternMatcher(patterns []string) (*PatternMatcher, error) {
pm := &PatternMatcher{
patterns: make([]*Pattern, 0, len(patterns)),
}
for _, p := range patterns {
// Eliminate leading and trailing whitespace.
p = strings.TrimSpace(p)
if p == "" {
continue
}
p = filepath.Clean(p)
newp := &Pattern{}
if p[0] == '!' {
if len(p) == 1 {
return nil, errors.New("illegal exclusion pattern: \"!\"")
}
newp.exclusion = true
p = p[1:]
pm.exclusions = true
}
// Do some syntax checking on the pattern.
// filepath's Match() has some really weird rules that are inconsistent
// so instead of trying to dup their logic, just call Match() for its
// error state and if there is an error in the pattern return it.
// If this becomes an issue we can remove this since its really only
// needed in the error (syntax) case - which isn't really critical.
if _, err := filepath.Match(p, "."); err != nil {
return nil, err
}
newp.cleanedPattern = p
newp.dirs = strings.Split(p, string(os.PathSeparator))
pm.patterns = append(pm.patterns, newp)
}
return pm, nil
}
// Matches matches path against all the patterns. Matches is not safe to be
// called concurrently
func (pm *PatternMatcher) Matches(file string) (bool, error) {
matched := false
file = filepath.FromSlash(file)
parentPath := filepath.Dir(file)
parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
for _, pattern := range pm.patterns {
negative := false
if pattern.exclusion {
negative = true
}
match, err := pattern.match(file)
if err != nil {
return false, err
}
if !match && parentPath != "." {
// Check to see if the pattern matches one of our parent dirs.
if len(pattern.dirs) <= len(parentPathDirs) {
match, _ = pattern.match(strings.Join(parentPathDirs[:len(pattern.dirs)], string(os.PathSeparator)))
}
}
if match {
matched = !negative
}
}
if matched {
logrus.Debugf("Skipping excluded path: %s", file)
}
return matched, nil
}
// Exclusions returns true if any of the patterns define exclusions
func (pm *PatternMatcher) Exclusions() bool {
return pm.exclusions
}
// Patterns returns array of active patterns
func (pm *PatternMatcher) Patterns() []*Pattern {
return pm.patterns
}
// Pattern defines a single regexp used to filter file paths.
type Pattern struct {
cleanedPattern string
dirs []string
regexp *regexp.Regexp
exclusion bool
}
func (p *Pattern) String() string {
return p.cleanedPattern
}
// Exclusion returns true if this pattern defines exclusion
func (p *Pattern) Exclusion() bool {
return p.exclusion
}
func (p *Pattern) match(path string) (bool, error) {
if p.regexp == nil {
if err := p.compile(); err != nil {
return false, filepath.ErrBadPattern
}
}
b := p.regexp.MatchString(path)
return b, nil
}
func (p *Pattern) compile() error {
regStr := "^"
pattern := p.cleanedPattern
// Go through the pattern and convert it to a regexp.
// We use a scanner so we can support utf-8 chars.
var scan scanner.Scanner
scan.Init(strings.NewReader(pattern))
sl := string(os.PathSeparator)
escSL := sl
if sl == `\` {
escSL += `\`
}
for scan.Peek() != scanner.EOF {
ch := scan.Next()
if ch == '*' {
if scan.Peek() == '*' {
// is some flavor of "**"
scan.Next()
// Treat **/ as ** so eat the "/"
if string(scan.Peek()) == sl {
scan.Next()
}
if scan.Peek() == scanner.EOF {
// is "**EOF" - to align with .gitignore just accept all
regStr += ".*"
} else {
// is "**"
// Note that this allows for any # of /'s (even 0) because
// the .* will eat everything, even /'s
regStr += "(.*" + escSL + ")?"
}
} else {
// is "*" so map it to anything but "/"
regStr += "[^" + escSL + "]*"
}
} else if ch == '?' {
// "?" is any char except "/"
regStr += "[^" + escSL + "]"
} else if ch == '.' || ch == '$' {
// Escape some regexp special chars that have no meaning
// in golang's filepath.Match
regStr += `\` + string(ch)
} else if ch == '\\' {
// escape next char. Note that a trailing \ in the pattern
// will be left alone (but need to escape it)
if sl == `\` {
// On windows map "\" to "\\", meaning an escaped backslash,
// and then just continue because filepath.Match on
// Windows doesn't allow escaping at all
regStr += escSL
continue
}
if scan.Peek() != scanner.EOF {
regStr += `\` + string(scan.Next())
} else {
regStr += `\`
}
} else {
regStr += string(ch)
}
}
regStr += "$"
re, err := regexp.Compile(regStr)
if err != nil {
return err
}
p.regexp = re
return nil
}
// Matches returns true if file matches any of the patterns
// and isn't excluded by any of the subsequent patterns.
func Matches(file string, patterns []string) (bool, error) {
pm, err := NewPatternMatcher(patterns)
if err != nil {
return false, err
}
file = filepath.Clean(file)
if file == "." {
// Don't let them exclude everything, kind of silly.
return false, nil
}
return pm.Matches(file)
}
// CopyFile copies from src to dst until either EOF is reached
// on src or an error occurs. It verifies src exists and removes
// the dst if it exists.
func CopyFile(src, dst string) (int64, error) {
cleanSrc := filepath.Clean(src)
cleanDst := filepath.Clean(dst)
if cleanSrc == cleanDst {
return 0, nil
}
sf, err := os.Open(cleanSrc)
if err != nil {
return 0, err
}
defer sf.Close()
if err := os.Remove(cleanDst); err != nil && !os.IsNotExist(err) {
return 0, err
}
df, err := os.Create(cleanDst)
if err != nil {
return 0, err
}
defer df.Close()
return io.Copy(df, sf)
}
// ReadSymlinkedDirectory returns the target directory of a symlink.
// The target of the symbolic link may not be a file.
func ReadSymlinkedDirectory(path string) (string, error) {
var realPath string
var err error
if realPath, err = filepath.Abs(path); err != nil {
return "", fmt.Errorf("unable to get absolute path for %s: %s", path, err)
}
if realPath, err = filepath.EvalSymlinks(realPath); err != nil {
return "", fmt.Errorf("failed to canonicalise path for %s: %s", path, err)
}
realPathInfo, err := os.Stat(realPath)
if err != nil {
return "", fmt.Errorf("failed to stat target '%s' of '%s': %s", realPath, path, err)
}
if !realPathInfo.Mode().IsDir() {
return "", fmt.Errorf("canonical path points to a file '%s'", realPath)
}
return realPath, nil
}
// CreateIfNotExists creates a file or a directory only if it does not already exist.
func CreateIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
return os.MkdirAll(path, 0755)
}
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0755)
if err != nil {
return err
}
f.Close()
}
}
return nil
}

@ -0,0 +1,27 @@
package fileutils // import "github.com/docker/docker/pkg/fileutils"
import (
"os"
"os/exec"
"strconv"
"strings"
)
// GetTotalUsedFds returns the number of used File Descriptors by
// executing `lsof -p PID`
func GetTotalUsedFds() int {
pid := os.Getpid()
cmd := exec.Command("lsof", "-p", strconv.Itoa(pid))
output, err := cmd.CombinedOutput()
if err != nil {
return -1
}
outputStr := strings.TrimSpace(string(output))
fds := strings.Split(outputStr, "\n")
return len(fds) - 1
}

@ -0,0 +1,22 @@
// +build linux freebsd
package fileutils // import "github.com/docker/docker/pkg/fileutils"
import (
"fmt"
"io/ioutil"
"os"
"github.com/sirupsen/logrus"
)
// GetTotalUsedFds Returns the number of used File Descriptors by
// reading it via /proc filesystem.
func GetTotalUsedFds() int {
if fds, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/fd", os.Getpid())); err != nil {
logrus.Errorf("Error opening /proc/%d/fd: %s", os.Getpid(), err)
} else {
return len(fds)
}
return -1
}

@ -0,0 +1,7 @@
package fileutils // import "github.com/docker/docker/pkg/fileutils"
// GetTotalUsedFds Returns the number of used File Descriptors. Not supported
// on Windows.
func GetTotalUsedFds() int {
return -1
}

@ -0,0 +1,264 @@
package idtools // import "github.com/docker/docker/pkg/idtools"
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
// IDMap contains a single entry for user namespace range remapping. An array
// of IDMap entries represents the structure that will be provided to the Linux
// kernel for creating a user namespace.
type IDMap struct {
ContainerID int `json:"container_id"`
HostID int `json:"host_id"`
Size int `json:"size"`
}
type subIDRange struct {
Start int
Length int
}
type ranges []subIDRange
func (e ranges) Len() int { return len(e) }
func (e ranges) Swap(i, j int) { e[i], e[j] = e[j], e[i] }
func (e ranges) Less(i, j int) bool { return e[i].Start < e[j].Start }
const (
subuidFileName = "/etc/subuid"
subgidFileName = "/etc/subgid"
)
// MkdirAllAndChown creates a directory (include any along the path) and then modifies
// ownership to the requested uid/gid. If the directory already exists, this
// function will still change ownership to the requested uid/gid pair.
func MkdirAllAndChown(path string, mode os.FileMode, owner Identity) error {
return mkdirAs(path, mode, owner, true, true)
}
// MkdirAndChown creates a directory and then modifies ownership to the requested uid/gid.
// If the directory already exists, this function still changes ownership.
// Note that unlike os.Mkdir(), this function does not return IsExist error
// in case path already exists.
func MkdirAndChown(path string, mode os.FileMode, owner Identity) error {
return mkdirAs(path, mode, owner, false, true)
}
// MkdirAllAndChownNew creates a directory (include any along the path) and then modifies
// ownership ONLY of newly created directories to the requested uid/gid. If the
// directories along the path exist, no change of ownership will be performed
func MkdirAllAndChownNew(path string, mode os.FileMode, owner Identity) error {
return mkdirAs(path, mode, owner, true, false)
}
// GetRootUIDGID retrieves the remapped root uid/gid pair from the set of maps.
// If the maps are empty, then the root uid/gid will default to "real" 0/0
func GetRootUIDGID(uidMap, gidMap []IDMap) (int, int, error) {
uid, err := toHost(0, uidMap)
if err != nil {
return -1, -1, err
}
gid, err := toHost(0, gidMap)
if err != nil {
return -1, -1, err
}
return uid, gid, nil
}
// toContainer takes an id mapping, and uses it to translate a
// host ID to the remapped ID. If no map is provided, then the translation
// assumes a 1-to-1 mapping and returns the passed in id
func toContainer(hostID int, idMap []IDMap) (int, error) {
if idMap == nil {
return hostID, nil
}
for _, m := range idMap {
if (hostID >= m.HostID) && (hostID <= (m.HostID + m.Size - 1)) {
contID := m.ContainerID + (hostID - m.HostID)
return contID, nil
}
}
return -1, fmt.Errorf("Host ID %d cannot be mapped to a container ID", hostID)
}
// toHost takes an id mapping and a remapped ID, and translates the
// ID to the mapped host ID. If no map is provided, then the translation
// assumes a 1-to-1 mapping and returns the passed in id #
func toHost(contID int, idMap []IDMap) (int, error) {
if idMap == nil {
return contID, nil
}
for _, m := range idMap {
if (contID >= m.ContainerID) && (contID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (contID - m.ContainerID)
return hostID, nil
}
}
return -1, fmt.Errorf("Container ID %d cannot be mapped to a host ID", contID)
}
// Identity is either a UID and GID pair or a SID (but not both)
type Identity struct {
UID int
GID int
SID string
}
// IdentityMapping contains a mappings of UIDs and GIDs
type IdentityMapping struct {
uids []IDMap
gids []IDMap
}
// NewIdentityMapping takes a requested user and group name and
// using the data from /etc/sub{uid,gid} ranges, creates the
// proper uid and gid remapping ranges for that user/group pair
func NewIdentityMapping(username, groupname string) (*IdentityMapping, error) {
subuidRanges, err := parseSubuid(username)
if err != nil {
return nil, err
}
subgidRanges, err := parseSubgid(groupname)
if err != nil {
return nil, err
}
if len(subuidRanges) == 0 {
return nil, fmt.Errorf("No subuid ranges found for user %q", username)
}
if len(subgidRanges) == 0 {
return nil, fmt.Errorf("No subgid ranges found for group %q", groupname)
}
return &IdentityMapping{
uids: createIDMap(subuidRanges),
gids: createIDMap(subgidRanges),
}, nil
}
// NewIDMappingsFromMaps creates a new mapping from two slices
// Deprecated: this is a temporary shim while transitioning to IDMapping
func NewIDMappingsFromMaps(uids []IDMap, gids []IDMap) *IdentityMapping {
return &IdentityMapping{uids: uids, gids: gids}
}
// RootPair returns a uid and gid pair for the root user. The error is ignored
// because a root user always exists, and the defaults are correct when the uid
// and gid maps are empty.
func (i *IdentityMapping) RootPair() Identity {
uid, gid, _ := GetRootUIDGID(i.uids, i.gids)
return Identity{UID: uid, GID: gid}
}
// ToHost returns the host UID and GID for the container uid, gid.
// Remapping is only performed if the ids aren't already the remapped root ids
func (i *IdentityMapping) ToHost(pair Identity) (Identity, error) {
var err error
target := i.RootPair()
if pair.UID != target.UID {
target.UID, err = toHost(pair.UID, i.uids)
if err != nil {
return target, err
}
}
if pair.GID != target.GID {
target.GID, err = toHost(pair.GID, i.gids)
}
return target, err
}
// ToContainer returns the container UID and GID for the host uid and gid
func (i *IdentityMapping) ToContainer(pair Identity) (int, int, error) {
uid, err := toContainer(pair.UID, i.uids)
if err != nil {
return -1, -1, err
}
gid, err := toContainer(pair.GID, i.gids)
return uid, gid, err
}
// Empty returns true if there are no id mappings
func (i *IdentityMapping) Empty() bool {
return len(i.uids) == 0 && len(i.gids) == 0
}
// UIDs return the UID mapping
// TODO: remove this once everything has been refactored to use pairs
func (i *IdentityMapping) UIDs() []IDMap {
return i.uids
}
// GIDs return the UID mapping
// TODO: remove this once everything has been refactored to use pairs
func (i *IdentityMapping) GIDs() []IDMap {
return i.gids
}
func createIDMap(subidRanges ranges) []IDMap {
idMap := []IDMap{}
containerID := 0
for _, idrange := range subidRanges {
idMap = append(idMap, IDMap{
ContainerID: containerID,
HostID: idrange.Start,
Size: idrange.Length,
})
containerID = containerID + idrange.Length
}
return idMap
}
func parseSubuid(username string) (ranges, error) {
return parseSubidFile(subuidFileName, username)
}
func parseSubgid(username string) (ranges, error) {
return parseSubidFile(subgidFileName, username)
}
// parseSubidFile will read the appropriate file (/etc/subuid or /etc/subgid)
// and return all found ranges for a specified username. If the special value
// "ALL" is supplied for username, then all ranges in the file will be returned
func parseSubidFile(path, username string) (ranges, error) {
var rangeList ranges
subidFile, err := os.Open(path)
if err != nil {
return rangeList, err
}
defer subidFile.Close()
s := bufio.NewScanner(subidFile)
for s.Scan() {
if err := s.Err(); err != nil {
return rangeList, err
}
text := strings.TrimSpace(s.Text())
if text == "" || strings.HasPrefix(text, "#") {
continue
}
parts := strings.Split(text, ":")
if len(parts) != 3 {
return rangeList, fmt.Errorf("Cannot parse subuid/gid information: Format not correct for %s file", path)
}
if parts[0] == username || username == "ALL" {
startid, err := strconv.Atoi(parts[1])
if err != nil {
return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
}
length, err := strconv.Atoi(parts[2])
if err != nil {
return rangeList, fmt.Errorf("String to int conversion failed during subuid/gid parsing of %s: %v", path, err)
}
rangeList = append(rangeList, subIDRange{startid, length})
}
}
return rangeList, nil
}

@ -0,0 +1,231 @@
// +build !windows
package idtools // import "github.com/docker/docker/pkg/idtools"
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"syscall"
"github.com/docker/docker/pkg/system"
"github.com/opencontainers/runc/libcontainer/user"
)
var (
entOnce sync.Once
getentCmd string
)
func mkdirAs(path string, mode os.FileMode, owner Identity, mkAll, chownExisting bool) error {
// make an array containing the original path asked for, plus (for mkAll == true)
// all path components leading up to the complete path that don't exist before we MkdirAll
// so that we can chown all of them properly at the end. If chownExisting is false, we won't
// chown the full directory path if it exists
var paths []string
stat, err := system.Stat(path)
if err == nil {
if !stat.IsDir() {
return &os.PathError{Op: "mkdir", Path: path, Err: syscall.ENOTDIR}
}
if !chownExisting {
return nil
}
// short-circuit--we were called with an existing directory and chown was requested
return lazyChown(path, owner.UID, owner.GID, stat)
}
if os.IsNotExist(err) {
paths = []string{path}
}
if mkAll {
// walk back to "/" looking for directories which do not exist
// and add them to the paths array for chown after creation
dirPath := path
for {
dirPath = filepath.Dir(dirPath)
if dirPath == "/" {
break
}
if _, err := os.Stat(dirPath); err != nil && os.IsNotExist(err) {
paths = append(paths, dirPath)
}
}
if err := system.MkdirAll(path, mode); err != nil {
return err
}
} else {
if err := os.Mkdir(path, mode); err != nil && !os.IsExist(err) {
return err
}
}
// even if it existed, we will chown the requested path + any subpaths that
// didn't exist when we called MkdirAll
for _, pathComponent := range paths {
if err := lazyChown(pathComponent, owner.UID, owner.GID, nil); err != nil {
return err
}
}
return nil
}
// CanAccess takes a valid (existing) directory and a uid, gid pair and determines
// if that uid, gid pair has access (execute bit) to the directory
func CanAccess(path string, pair Identity) bool {
statInfo, err := system.Stat(path)
if err != nil {
return false
}
fileMode := os.FileMode(statInfo.Mode())
permBits := fileMode.Perm()
return accessible(statInfo.UID() == uint32(pair.UID),
statInfo.GID() == uint32(pair.GID), permBits)
}
func accessible(isOwner, isGroup bool, perms os.FileMode) bool {
if isOwner && (perms&0100 == 0100) {
return true
}
if isGroup && (perms&0010 == 0010) {
return true
}
if perms&0001 == 0001 {
return true
}
return false
}
// LookupUser uses traditional local system files lookup (from libcontainer/user) on a username,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupUser(username string) (user.User, error) {
// first try a local system files lookup using existing capabilities
usr, err := user.LookupUser(username)
if err == nil {
return usr, nil
}
// local files lookup failed; attempt to call `getent` to query configured passwd dbs
usr, err = getentUser(fmt.Sprintf("%s %s", "passwd", username))
if err != nil {
return user.User{}, err
}
return usr, nil
}
// LookupUID uses traditional local system files lookup (from libcontainer/user) on a uid,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupUID(uid int) (user.User, error) {
// first try a local system files lookup using existing capabilities
usr, err := user.LookupUid(uid)
if err == nil {
return usr, nil
}
// local files lookup failed; attempt to call `getent` to query configured passwd dbs
return getentUser(fmt.Sprintf("%s %d", "passwd", uid))
}
func getentUser(args string) (user.User, error) {
reader, err := callGetent(args)
if err != nil {
return user.User{}, err
}
users, err := user.ParsePasswd(reader)
if err != nil {
return user.User{}, err
}
if len(users) == 0 {
return user.User{}, fmt.Errorf("getent failed to find passwd entry for %q", strings.Split(args, " ")[1])
}
return users[0], nil
}
// LookupGroup uses traditional local system files lookup (from libcontainer/user) on a group name,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupGroup(groupname string) (user.Group, error) {
// first try a local system files lookup using existing capabilities
group, err := user.LookupGroup(groupname)
if err == nil {
return group, nil
}
// local files lookup failed; attempt to call `getent` to query configured group dbs
return getentGroup(fmt.Sprintf("%s %s", "group", groupname))
}
// LookupGID uses traditional local system files lookup (from libcontainer/user) on a group ID,
// followed by a call to `getent` for supporting host configured non-files passwd and group dbs
func LookupGID(gid int) (user.Group, error) {
// first try a local system files lookup using existing capabilities
group, err := user.LookupGid(gid)
if err == nil {
return group, nil
}
// local files lookup failed; attempt to call `getent` to query configured group dbs
return getentGroup(fmt.Sprintf("%s %d", "group", gid))
}
func getentGroup(args string) (user.Group, error) {
reader, err := callGetent(args)
if err != nil {
return user.Group{}, err
}
groups, err := user.ParseGroup(reader)
if err != nil {
return user.Group{}, err
}
if len(groups) == 0 {
return user.Group{}, fmt.Errorf("getent failed to find groups entry for %q", strings.Split(args, " ")[1])
}
return groups[0], nil
}
func callGetent(args string) (io.Reader, error) {
entOnce.Do(func() { getentCmd, _ = resolveBinary("getent") })
// if no `getent` command on host, can't do anything else
if getentCmd == "" {
return nil, fmt.Errorf("")
}
out, err := execCmd(getentCmd, args)
if err != nil {
exitCode, errC := system.GetExitCode(err)
if errC != nil {
return nil, err
}
switch exitCode {
case 1:
return nil, fmt.Errorf("getent reported invalid parameters/database unknown")
case 2:
terms := strings.Split(args, " ")
return nil, fmt.Errorf("getent unable to find entry %q in %s database", terms[1], terms[0])
case 3:
return nil, fmt.Errorf("getent database doesn't support enumeration")
default:
return nil, err
}
}
return bytes.NewReader(out), nil
}
// lazyChown performs a chown only if the uid/gid don't match what's requested
// Normally a Chown is a no-op if uid/gid match, but in some cases this can still cause an error, e.g. if the
// dir is on an NFS share, so don't call chown unless we absolutely must.
func lazyChown(p string, uid, gid int, stat *system.StatT) error {
if stat == nil {
var err error
stat, err = system.Stat(p)
if err != nil {
return err
}
}
if stat.UID() == uint32(uid) && stat.GID() == uint32(gid) {
return nil
}
return os.Chown(p, uid, gid)
}

@ -0,0 +1,25 @@
package idtools // import "github.com/docker/docker/pkg/idtools"
import (
"os"
"github.com/docker/docker/pkg/system"
)
// This is currently a wrapper around MkdirAll, however, since currently
// permissions aren't set through this path, the identity isn't utilized.
// Ownership is handled elsewhere, but in the future could be support here
// too.
func mkdirAs(path string, mode os.FileMode, owner Identity, mkAll, chownExisting bool) error {
if err := system.MkdirAll(path, mode); err != nil {
return err
}
return nil
}
// CanAccess takes a valid (existing) directory and a uid, gid pair and determines
// if that uid, gid pair has access (execute bit) to the directory
// Windows does not require/support this function, so always return true
func CanAccess(path string, identity Identity) bool {
return true
}

@ -0,0 +1,164 @@
package idtools // import "github.com/docker/docker/pkg/idtools"
import (
"fmt"
"regexp"
"sort"
"strconv"
"strings"
"sync"
)
// add a user and/or group to Linux /etc/passwd, /etc/group using standard
// Linux distribution commands:
// adduser --system --shell /bin/false --disabled-login --disabled-password --no-create-home --group <username>
// useradd -r -s /bin/false <username>
var (
once sync.Once
userCommand string
cmdTemplates = map[string]string{
"adduser": "--system --shell /bin/false --no-create-home --disabled-login --disabled-password --group %s",
"useradd": "-r -s /bin/false %s",
"usermod": "-%s %d-%d %s",
}
idOutRegexp = regexp.MustCompile(`uid=([0-9]+).*gid=([0-9]+)`)
// default length for a UID/GID subordinate range
defaultRangeLen = 65536
defaultRangeStart = 100000
userMod = "usermod"
)
// AddNamespaceRangesUser takes a username and uses the standard system
// utility to create a system user/group pair used to hold the
// /etc/sub{uid,gid} ranges which will be used for user namespace
// mapping ranges in containers.
func AddNamespaceRangesUser(name string) (int, int, error) {
if err := addUser(name); err != nil {
return -1, -1, fmt.Errorf("Error adding user %q: %v", name, err)
}
// Query the system for the created uid and gid pair
out, err := execCmd("id", name)
if err != nil {
return -1, -1, fmt.Errorf("Error trying to find uid/gid for new user %q: %v", name, err)
}
matches := idOutRegexp.FindStringSubmatch(strings.TrimSpace(string(out)))
if len(matches) != 3 {
return -1, -1, fmt.Errorf("Can't find uid, gid from `id` output: %q", string(out))
}
uid, err := strconv.Atoi(matches[1])
if err != nil {
return -1, -1, fmt.Errorf("Can't convert found uid (%s) to int: %v", matches[1], err)
}
gid, err := strconv.Atoi(matches[2])
if err != nil {
return -1, -1, fmt.Errorf("Can't convert found gid (%s) to int: %v", matches[2], err)
}
// Now we need to create the subuid/subgid ranges for our new user/group (system users
// do not get auto-created ranges in subuid/subgid)
if err := createSubordinateRanges(name); err != nil {
return -1, -1, fmt.Errorf("Couldn't create subordinate ID ranges: %v", err)
}
return uid, gid, nil
}
func addUser(userName string) error {
once.Do(func() {
// set up which commands are used for adding users/groups dependent on distro
if _, err := resolveBinary("adduser"); err == nil {
userCommand = "adduser"
} else if _, err := resolveBinary("useradd"); err == nil {
userCommand = "useradd"
}
})
if userCommand == "" {
return fmt.Errorf("Cannot add user; no useradd/adduser binary found")
}
args := fmt.Sprintf(cmdTemplates[userCommand], userName)
out, err := execCmd(userCommand, args)
if err != nil {
return fmt.Errorf("Failed to add user with error: %v; output: %q", err, string(out))
}
return nil
}
func createSubordinateRanges(name string) error {
// first, we should verify that ranges weren't automatically created
// by the distro tooling
ranges, err := parseSubuid(name)
if err != nil {
return fmt.Errorf("Error while looking for subuid ranges for user %q: %v", name, err)
}
if len(ranges) == 0 {
// no UID ranges; let's create one
startID, err := findNextUIDRange()
if err != nil {
return fmt.Errorf("Can't find available subuid range: %v", err)
}
out, err := execCmd(userMod, fmt.Sprintf(cmdTemplates[userMod], "v", startID, startID+defaultRangeLen-1, name))
if err != nil {
return fmt.Errorf("Unable to add subuid range to user: %q; output: %s, err: %v", name, out, err)
}
}
ranges, err = parseSubgid(name)
if err != nil {
return fmt.Errorf("Error while looking for subgid ranges for user %q: %v", name, err)
}
if len(ranges) == 0 {
// no GID ranges; let's create one
startID, err := findNextGIDRange()
if err != nil {
return fmt.Errorf("Can't find available subgid range: %v", err)
}
out, err := execCmd(userMod, fmt.Sprintf(cmdTemplates[userMod], "w", startID, startID+defaultRangeLen-1, name))
if err != nil {
return fmt.Errorf("Unable to add subgid range to user: %q; output: %s, err: %v", name, out, err)
}
}
return nil
}
func findNextUIDRange() (int, error) {
ranges, err := parseSubuid("ALL")
if err != nil {
return -1, fmt.Errorf("Couldn't parse all ranges in /etc/subuid file: %v", err)
}
sort.Sort(ranges)
return findNextRangeStart(ranges)
}
func findNextGIDRange() (int, error) {
ranges, err := parseSubgid("ALL")
if err != nil {
return -1, fmt.Errorf("Couldn't parse all ranges in /etc/subgid file: %v", err)
}
sort.Sort(ranges)
return findNextRangeStart(ranges)
}
func findNextRangeStart(rangeList ranges) (int, error) {
startID := defaultRangeStart
for _, arange := range rangeList {
if wouldOverlap(arange, startID) {
startID = arange.Start + arange.Length
}
}
return startID, nil
}
func wouldOverlap(arange subIDRange, ID int) bool {
low := ID
high := ID + defaultRangeLen
if (low >= arange.Start && low <= arange.Start+arange.Length) ||
(high <= arange.Start+arange.Length && high >= arange.Start) {
return true
}
return false
}

@ -0,0 +1,12 @@
// +build !linux
package idtools // import "github.com/docker/docker/pkg/idtools"
import "fmt"
// AddNamespaceRangesUser takes a name and finds an unused uid, gid pair
// and calls the appropriate helper function to add the group and then
// the user to the group in /etc/group and /etc/passwd respectively.
func AddNamespaceRangesUser(name string) (int, int, error) {
return -1, -1, fmt.Errorf("No support for adding users or groups on this OS")
}

@ -0,0 +1,32 @@
// +build !windows
package idtools // import "github.com/docker/docker/pkg/idtools"
import (
"fmt"
"os/exec"
"path/filepath"
"strings"
)
func resolveBinary(binname string) (string, error) {
binaryPath, err := exec.LookPath(binname)
if err != nil {
return "", err
}
resolvedPath, err := filepath.EvalSymlinks(binaryPath)
if err != nil {
return "", err
}
//only return no error if the final resolved binary basename
//matches what was searched for
if filepath.Base(resolvedPath) == binname {
return resolvedPath, nil
}
return "", fmt.Errorf("Binary %q does not resolve to a binary of that name in $PATH (%q)", binname, resolvedPath)
}
func execCmd(cmd, args string) ([]byte, error) {
execCmd := exec.Command(cmd, strings.Split(args, " ")...)
return execCmd.CombinedOutput()
}

@ -0,0 +1,51 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import (
"errors"
"io"
)
var errBufferFull = errors.New("buffer is full")
type fixedBuffer struct {
buf []byte
pos int
lastRead int
}
func (b *fixedBuffer) Write(p []byte) (int, error) {
n := copy(b.buf[b.pos:cap(b.buf)], p)
b.pos += n
if n < len(p) {
if b.pos == cap(b.buf) {
return n, errBufferFull
}
return n, io.ErrShortWrite
}
return n, nil
}
func (b *fixedBuffer) Read(p []byte) (int, error) {
n := copy(p, b.buf[b.lastRead:b.pos])
b.lastRead += n
return n, nil
}
func (b *fixedBuffer) Len() int {
return b.pos - b.lastRead
}
func (b *fixedBuffer) Cap() int {
return cap(b.buf)
}
func (b *fixedBuffer) Reset() {
b.pos = 0
b.lastRead = 0
b.buf = b.buf[:0]
}
func (b *fixedBuffer) String() string {
return string(b.buf[b.lastRead:b.pos])
}

@ -0,0 +1,187 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import (
"errors"
"io"
"sync"
)
// maxCap is the highest capacity to use in byte slices that buffer data.
const maxCap = 1e6
// minCap is the lowest capacity to use in byte slices that buffer data
const minCap = 64
// blockThreshold is the minimum number of bytes in the buffer which will cause
// a write to BytesPipe to block when allocating a new slice.
const blockThreshold = 1e6
var (
// ErrClosed is returned when Write is called on a closed BytesPipe.
ErrClosed = errors.New("write to closed BytesPipe")
bufPools = make(map[int]*sync.Pool)
bufPoolsLock sync.Mutex
)
// BytesPipe is io.ReadWriteCloser which works similarly to pipe(queue).
// All written data may be read at most once. Also, BytesPipe allocates
// and releases new byte slices to adjust to current needs, so the buffer
// won't be overgrown after peak loads.
type BytesPipe struct {
mu sync.Mutex
wait *sync.Cond
buf []*fixedBuffer
bufLen int
closeErr error // error to return from next Read. set to nil if not closed.
}
// NewBytesPipe creates new BytesPipe, initialized by specified slice.
// If buf is nil, then it will be initialized with slice which cap is 64.
// buf will be adjusted in a way that len(buf) == 0, cap(buf) == cap(buf).
func NewBytesPipe() *BytesPipe {
bp := &BytesPipe{}
bp.buf = append(bp.buf, getBuffer(minCap))
bp.wait = sync.NewCond(&bp.mu)
return bp
}
// Write writes p to BytesPipe.
// It can allocate new []byte slices in a process of writing.
func (bp *BytesPipe) Write(p []byte) (int, error) {
bp.mu.Lock()
written := 0
loop0:
for {
if bp.closeErr != nil {
bp.mu.Unlock()
return written, ErrClosed
}
if len(bp.buf) == 0 {
bp.buf = append(bp.buf, getBuffer(64))
}
// get the last buffer
b := bp.buf[len(bp.buf)-1]
n, err := b.Write(p)
written += n
bp.bufLen += n
// errBufferFull is an error we expect to get if the buffer is full
if err != nil && err != errBufferFull {
bp.wait.Broadcast()
bp.mu.Unlock()
return written, err
}
// if there was enough room to write all then break
if len(p) == n {
break
}
// more data: write to the next slice
p = p[n:]
// make sure the buffer doesn't grow too big from this write
for bp.bufLen >= blockThreshold {
bp.wait.Wait()
if bp.closeErr != nil {
continue loop0
}
}
// add new byte slice to the buffers slice and continue writing
nextCap := b.Cap() * 2
if nextCap > maxCap {
nextCap = maxCap
}
bp.buf = append(bp.buf, getBuffer(nextCap))
}
bp.wait.Broadcast()
bp.mu.Unlock()
return written, nil
}
// CloseWithError causes further reads from a BytesPipe to return immediately.
func (bp *BytesPipe) CloseWithError(err error) error {
bp.mu.Lock()
if err != nil {
bp.closeErr = err
} else {
bp.closeErr = io.EOF
}
bp.wait.Broadcast()
bp.mu.Unlock()
return nil
}
// Close causes further reads from a BytesPipe to return immediately.
func (bp *BytesPipe) Close() error {
return bp.CloseWithError(nil)
}
// Read reads bytes from BytesPipe.
// Data could be read only once.
func (bp *BytesPipe) Read(p []byte) (n int, err error) {
bp.mu.Lock()
if bp.bufLen == 0 {
if bp.closeErr != nil {
err := bp.closeErr
bp.mu.Unlock()
return 0, err
}
bp.wait.Wait()
if bp.bufLen == 0 && bp.closeErr != nil {
err := bp.closeErr
bp.mu.Unlock()
return 0, err
}
}
for bp.bufLen > 0 {
b := bp.buf[0]
read, _ := b.Read(p) // ignore error since fixedBuffer doesn't really return an error
n += read
bp.bufLen -= read
if b.Len() == 0 {
// it's empty so return it to the pool and move to the next one
returnBuffer(b)
bp.buf[0] = nil
bp.buf = bp.buf[1:]
}
if len(p) == read {
break
}
p = p[read:]
}
bp.wait.Broadcast()
bp.mu.Unlock()
return
}
func returnBuffer(b *fixedBuffer) {
b.Reset()
bufPoolsLock.Lock()
pool := bufPools[b.Cap()]
bufPoolsLock.Unlock()
if pool != nil {
pool.Put(b)
}
}
func getBuffer(size int) *fixedBuffer {
bufPoolsLock.Lock()
pool, ok := bufPools[size]
if !ok {
pool = &sync.Pool{New: func() interface{} { return &fixedBuffer{buf: make([]byte, 0, size)} }}
bufPools[size] = pool
}
bufPoolsLock.Unlock()
return pool.Get().(*fixedBuffer)
}

@ -0,0 +1,162 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import (
"io"
"io/ioutil"
"os"
"path/filepath"
)
// NewAtomicFileWriter returns WriteCloser so that writing to it writes to a
// temporary file and closing it atomically changes the temporary file to
// destination path. Writing and closing concurrently is not allowed.
func NewAtomicFileWriter(filename string, perm os.FileMode) (io.WriteCloser, error) {
f, err := ioutil.TempFile(filepath.Dir(filename), ".tmp-"+filepath.Base(filename))
if err != nil {
return nil, err
}
abspath, err := filepath.Abs(filename)
if err != nil {
return nil, err
}
return &atomicFileWriter{
f: f,
fn: abspath,
perm: perm,
}, nil
}
// AtomicWriteFile atomically writes data to a file named by filename.
func AtomicWriteFile(filename string, data []byte, perm os.FileMode) error {
f, err := NewAtomicFileWriter(filename, perm)
if err != nil {
return err
}
n, err := f.Write(data)
if err == nil && n < len(data) {
err = io.ErrShortWrite
f.(*atomicFileWriter).writeErr = err
}
if err1 := f.Close(); err == nil {
err = err1
}
return err
}
type atomicFileWriter struct {
f *os.File
fn string
writeErr error
perm os.FileMode
}
func (w *atomicFileWriter) Write(dt []byte) (int, error) {
n, err := w.f.Write(dt)
if err != nil {
w.writeErr = err
}
return n, err
}
func (w *atomicFileWriter) Close() (retErr error) {
defer func() {
if retErr != nil || w.writeErr != nil {
os.Remove(w.f.Name())
}
}()
if err := w.f.Sync(); err != nil {
w.f.Close()
return err
}
if err := w.f.Close(); err != nil {
return err
}
if err := os.Chmod(w.f.Name(), w.perm); err != nil {
return err
}
if w.writeErr == nil {
return os.Rename(w.f.Name(), w.fn)
}
return nil
}
// AtomicWriteSet is used to atomically write a set
// of files and ensure they are visible at the same time.
// Must be committed to a new directory.
type AtomicWriteSet struct {
root string
}
// NewAtomicWriteSet creates a new atomic write set to
// atomically create a set of files. The given directory
// is used as the base directory for storing files before
// commit. If no temporary directory is given the system
// default is used.
func NewAtomicWriteSet(tmpDir string) (*AtomicWriteSet, error) {
td, err := ioutil.TempDir(tmpDir, "write-set-")
if err != nil {
return nil, err
}
return &AtomicWriteSet{
root: td,
}, nil
}
// WriteFile writes a file to the set, guaranteeing the file
// has been synced.
func (ws *AtomicWriteSet) WriteFile(filename string, data []byte, perm os.FileMode) error {
f, err := ws.FileWriter(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
if err != nil {
return err
}
n, err := f.Write(data)
if err == nil && n < len(data) {
err = io.ErrShortWrite
}
if err1 := f.Close(); err == nil {
err = err1
}
return err
}
type syncFileCloser struct {
*os.File
}
func (w syncFileCloser) Close() error {
err := w.File.Sync()
if err1 := w.File.Close(); err == nil {
err = err1
}
return err
}
// FileWriter opens a file writer inside the set. The file
// should be synced and closed before calling commit.
func (ws *AtomicWriteSet) FileWriter(name string, flag int, perm os.FileMode) (io.WriteCloser, error) {
f, err := os.OpenFile(filepath.Join(ws.root, name), flag, perm)
if err != nil {
return nil, err
}
return syncFileCloser{f}, nil
}
// Cancel cancels the set and removes all temporary data
// created in the set.
func (ws *AtomicWriteSet) Cancel() error {
return os.RemoveAll(ws.root)
}
// Commit moves all created files to the target directory. The
// target directory must not exist and the parent of the target
// directory must exist.
func (ws *AtomicWriteSet) Commit(target string) error {
return os.Rename(ws.root, target)
}
// String returns the location the set is writing to.
func (ws *AtomicWriteSet) String() string {
return ws.root
}

@ -0,0 +1,157 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
)
// ReadCloserWrapper wraps an io.Reader, and implements an io.ReadCloser
// It calls the given callback function when closed. It should be constructed
// with NewReadCloserWrapper
type ReadCloserWrapper struct {
io.Reader
closer func() error
}
// Close calls back the passed closer function
func (r *ReadCloserWrapper) Close() error {
return r.closer()
}
// NewReadCloserWrapper returns a new io.ReadCloser.
func NewReadCloserWrapper(r io.Reader, closer func() error) io.ReadCloser {
return &ReadCloserWrapper{
Reader: r,
closer: closer,
}
}
type readerErrWrapper struct {
reader io.Reader
closer func()
}
func (r *readerErrWrapper) Read(p []byte) (int, error) {
n, err := r.reader.Read(p)
if err != nil {
r.closer()
}
return n, err
}
// NewReaderErrWrapper returns a new io.Reader.
func NewReaderErrWrapper(r io.Reader, closer func()) io.Reader {
return &readerErrWrapper{
reader: r,
closer: closer,
}
}
// HashData returns the sha256 sum of src.
func HashData(src io.Reader) (string, error) {
h := sha256.New()
if _, err := io.Copy(h, src); err != nil {
return "", err
}
return "sha256:" + hex.EncodeToString(h.Sum(nil)), nil
}
// OnEOFReader wraps an io.ReadCloser and a function
// the function will run at the end of file or close the file.
type OnEOFReader struct {
Rc io.ReadCloser
Fn func()
}
func (r *OnEOFReader) Read(p []byte) (n int, err error) {
n, err = r.Rc.Read(p)
if err == io.EOF {
r.runFunc()
}
return
}
// Close closes the file and run the function.
func (r *OnEOFReader) Close() error {
err := r.Rc.Close()
r.runFunc()
return err
}
func (r *OnEOFReader) runFunc() {
if fn := r.Fn; fn != nil {
fn()
r.Fn = nil
}
}
// cancelReadCloser wraps an io.ReadCloser with a context for cancelling read
// operations.
type cancelReadCloser struct {
cancel func()
pR *io.PipeReader // Stream to read from
pW *io.PipeWriter
}
// NewCancelReadCloser creates a wrapper that closes the ReadCloser when the
// context is cancelled. The returned io.ReadCloser must be closed when it is
// no longer needed.
func NewCancelReadCloser(ctx context.Context, in io.ReadCloser) io.ReadCloser {
pR, pW := io.Pipe()
// Create a context used to signal when the pipe is closed
doneCtx, cancel := context.WithCancel(context.Background())
p := &cancelReadCloser{
cancel: cancel,
pR: pR,
pW: pW,
}
go func() {
_, err := io.Copy(pW, in)
select {
case <-ctx.Done():
// If the context was closed, p.closeWithError
// was already called. Calling it again would
// change the error that Read returns.
default:
p.closeWithError(err)
}
in.Close()
}()
go func() {
for {
select {
case <-ctx.Done():
p.closeWithError(ctx.Err())
case <-doneCtx.Done():
return
}
}
}()
return p
}
// Read wraps the Read method of the pipe that provides data from the wrapped
// ReadCloser.
func (p *cancelReadCloser) Read(buf []byte) (n int, err error) {
return p.pR.Read(buf)
}
// closeWithError closes the wrapper and its underlying reader. It will
// cause future calls to Read to return err.
func (p *cancelReadCloser) closeWithError(err error) {
p.pW.CloseWithError(err)
p.cancel()
}
// Close closes the wrapper its underlying reader. It will cause
// future calls to Read to return io.EOF.
func (p *cancelReadCloser) Close() error {
p.closeWithError(io.EOF)
return nil
}

@ -0,0 +1,10 @@
// +build !windows
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import "io/ioutil"
// TempDir on Unix systems is equivalent to ioutil.TempDir.
func TempDir(dir, prefix string) (string, error) {
return ioutil.TempDir(dir, prefix)
}

@ -0,0 +1,16 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import (
"io/ioutil"
"github.com/docker/docker/pkg/longpath"
)
// TempDir is the equivalent of ioutil.TempDir, except that the result is in Windows longpath format.
func TempDir(dir, prefix string) (string, error) {
tempDir, err := ioutil.TempDir(dir, prefix)
if err != nil {
return "", err
}
return longpath.AddPrefix(tempDir), nil
}

@ -0,0 +1,92 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import (
"io"
"sync"
)
// WriteFlusher wraps the Write and Flush operation ensuring that every write
// is a flush. In addition, the Close method can be called to intercept
// Read/Write calls if the targets lifecycle has already ended.
type WriteFlusher struct {
w io.Writer
flusher flusher
flushed chan struct{}
flushedOnce sync.Once
closed chan struct{}
closeLock sync.Mutex
}
type flusher interface {
Flush()
}
var errWriteFlusherClosed = io.EOF
func (wf *WriteFlusher) Write(b []byte) (n int, err error) {
select {
case <-wf.closed:
return 0, errWriteFlusherClosed
default:
}
n, err = wf.w.Write(b)
wf.Flush() // every write is a flush.
return n, err
}
// Flush the stream immediately.
func (wf *WriteFlusher) Flush() {
select {
case <-wf.closed:
return
default:
}
wf.flushedOnce.Do(func() {
close(wf.flushed)
})
wf.flusher.Flush()
}
// Flushed returns the state of flushed.
// If it's flushed, return true, or else it return false.
func (wf *WriteFlusher) Flushed() bool {
// BUG(stevvooe): Remove this method. Its use is inherently racy. Seems to
// be used to detect whether or a response code has been issued or not.
// Another hook should be used instead.
var flushed bool
select {
case <-wf.flushed:
flushed = true
default:
}
return flushed
}
// Close closes the write flusher, disallowing any further writes to the
// target. After the flusher is closed, all calls to write or flush will
// result in an error.
func (wf *WriteFlusher) Close() error {
wf.closeLock.Lock()
defer wf.closeLock.Unlock()
select {
case <-wf.closed:
return errWriteFlusherClosed
default:
close(wf.closed)
}
return nil
}
// NewWriteFlusher returns a new WriteFlusher.
func NewWriteFlusher(w io.Writer) *WriteFlusher {
var fl flusher
if f, ok := w.(flusher); ok {
fl = f
} else {
fl = &NopFlusher{}
}
return &WriteFlusher{w: w, flusher: fl, closed: make(chan struct{}), flushed: make(chan struct{})}
}

@ -0,0 +1,66 @@
package ioutils // import "github.com/docker/docker/pkg/ioutils"
import "io"
// NopWriter represents a type which write operation is nop.
type NopWriter struct{}
func (*NopWriter) Write(buf []byte) (int, error) {
return len(buf), nil
}
type nopWriteCloser struct {
io.Writer
}
func (w *nopWriteCloser) Close() error { return nil }
// NopWriteCloser returns a nopWriteCloser.
func NopWriteCloser(w io.Writer) io.WriteCloser {
return &nopWriteCloser{w}
}
// NopFlusher represents a type which flush operation is nop.
type NopFlusher struct{}
// Flush is a nop operation.
func (f *NopFlusher) Flush() {}
type writeCloserWrapper struct {
io.Writer
closer func() error
}
func (r *writeCloserWrapper) Close() error {
return r.closer()
}
// NewWriteCloserWrapper returns a new io.WriteCloser.
func NewWriteCloserWrapper(r io.Writer, closer func() error) io.WriteCloser {
return &writeCloserWrapper{
Writer: r,
closer: closer,
}
}
// WriteCounter wraps a concrete io.Writer and hold a count of the number
// of bytes written to the writer during a "session".
// This can be convenient when write return is masked
// (e.g., json.Encoder.Encode())
type WriteCounter struct {
Count int64
Writer io.Writer
}
// NewWriteCounter returns a new WriteCounter.
func NewWriteCounter(w io.Writer) *WriteCounter {
return &WriteCounter{
Writer: w,
}
}
func (wc *WriteCounter) Write(p []byte) (count int, err error) {
count, err = wc.Writer.Write(p)
wc.Count += int64(count)
return
}

@ -0,0 +1,26 @@
// longpath introduces some constants and helper functions for handling long paths
// in Windows, which are expected to be prepended with `\\?\` and followed by either
// a drive letter, a UNC server\share, or a volume identifier.
package longpath // import "github.com/docker/docker/pkg/longpath"
import (
"strings"
)
// Prefix is the longpath prefix for Windows file paths.
const Prefix = `\\?\`
// AddPrefix will add the Windows long path prefix to the path provided if
// it does not already have it.
func AddPrefix(path string) string {
if !strings.HasPrefix(path, Prefix) {
if strings.HasPrefix(path, `\\`) {
// This is a UNC path, so we need to add 'UNC' to the path as well.
path = Prefix + `UNC` + path[1:]
} else {
path = Prefix + path
}
}
return path
}

@ -0,0 +1,137 @@
package mount // import "github.com/docker/docker/pkg/mount"
import (
"fmt"
"strings"
)
var flags = map[string]struct {
clear bool
flag int
}{
"defaults": {false, 0},
"ro": {false, RDONLY},
"rw": {true, RDONLY},
"suid": {true, NOSUID},
"nosuid": {false, NOSUID},
"dev": {true, NODEV},
"nodev": {false, NODEV},
"exec": {true, NOEXEC},
"noexec": {false, NOEXEC},
"sync": {false, SYNCHRONOUS},
"async": {true, SYNCHRONOUS},
"dirsync": {false, DIRSYNC},
"remount": {false, REMOUNT},
"mand": {false, MANDLOCK},
"nomand": {true, MANDLOCK},
"atime": {true, NOATIME},
"noatime": {false, NOATIME},
"diratime": {true, NODIRATIME},
"nodiratime": {false, NODIRATIME},
"bind": {false, BIND},
"rbind": {false, RBIND},
"unbindable": {false, UNBINDABLE},
"runbindable": {false, RUNBINDABLE},
"private": {false, PRIVATE},
"rprivate": {false, RPRIVATE},
"shared": {false, SHARED},
"rshared": {false, RSHARED},
"slave": {false, SLAVE},
"rslave": {false, RSLAVE},
"relatime": {false, RELATIME},
"norelatime": {true, RELATIME},
"strictatime": {false, STRICTATIME},
"nostrictatime": {true, STRICTATIME},
}
var validFlags = map[string]bool{
"": true,
"size": true,
"mode": true,
"uid": true,
"gid": true,
"nr_inodes": true,
"nr_blocks": true,
"mpol": true,
}
var propagationFlags = map[string]bool{
"bind": true,
"rbind": true,
"unbindable": true,
"runbindable": true,
"private": true,
"rprivate": true,
"shared": true,
"rshared": true,
"slave": true,
"rslave": true,
}
// MergeTmpfsOptions merge mount options to make sure there is no duplicate.
func MergeTmpfsOptions(options []string) ([]string, error) {
// We use collisions maps to remove duplicates.
// For flag, the key is the flag value (the key for propagation flag is -1)
// For data=value, the key is the data
flagCollisions := map[int]bool{}
dataCollisions := map[string]bool{}
var newOptions []string
// We process in reverse order
for i := len(options) - 1; i >= 0; i-- {
option := options[i]
if option == "defaults" {
continue
}
if f, ok := flags[option]; ok && f.flag != 0 {
// There is only one propagation mode
key := f.flag
if propagationFlags[option] {
key = -1
}
// Check to see if there is collision for flag
if !flagCollisions[key] {
// We prepend the option and add to collision map
newOptions = append([]string{option}, newOptions...)
flagCollisions[key] = true
}
continue
}
opt := strings.SplitN(option, "=", 2)
if len(opt) != 2 || !validFlags[opt[0]] {
return nil, fmt.Errorf("Invalid tmpfs option %q", opt)
}
if !dataCollisions[opt[0]] {
// We prepend the option and add to collision map
newOptions = append([]string{option}, newOptions...)
dataCollisions[opt[0]] = true
}
}
return newOptions, nil
}
// Parse fstab type mount options into mount() flags
// and device specific data
func parseOptions(options string) (int, string) {
var (
flag int
data []string
)
for _, o := range strings.Split(options, ",") {
// If the option does not exist in the flags table or the flag
// is not supported on the platform,
// then it is a data value for a specific fs type
if f, exists := flags[o]; exists && f.flag != 0 {
if f.clear {
flag &= ^f.flag
} else {
flag |= f.flag
}
} else {
data = append(data, o)
}
}
return flag, strings.Join(data, ",")
}

@ -0,0 +1,49 @@
// +build freebsd,cgo
package mount // import "github.com/docker/docker/pkg/mount"
/*
#include <sys/mount.h>
*/
import "C"
const (
// RDONLY will mount the filesystem as read-only.
RDONLY = C.MNT_RDONLY
// NOSUID will not allow set-user-identifier or set-group-identifier bits to
// take effect.
NOSUID = C.MNT_NOSUID
// NOEXEC will not allow execution of any binaries on the mounted file system.
NOEXEC = C.MNT_NOEXEC
// SYNCHRONOUS will allow any I/O to the file system to be done synchronously.
SYNCHRONOUS = C.MNT_SYNCHRONOUS
// NOATIME will not update the file access time when reading from a file.
NOATIME = C.MNT_NOATIME
)
// These flags are unsupported.
const (
BIND = 0
DIRSYNC = 0
MANDLOCK = 0
NODEV = 0
NODIRATIME = 0
UNBINDABLE = 0
RUNBINDABLE = 0
PRIVATE = 0
RPRIVATE = 0
SHARED = 0
RSHARED = 0
SLAVE = 0
RSLAVE = 0
RBIND = 0
RELATIVE = 0
RELATIME = 0
REMOUNT = 0
STRICTATIME = 0
mntDetach = 0
)

@ -0,0 +1,87 @@
package mount // import "github.com/docker/docker/pkg/mount"
import (
"golang.org/x/sys/unix"
)
const (
// RDONLY will mount the file system read-only.
RDONLY = unix.MS_RDONLY
// NOSUID will not allow set-user-identifier or set-group-identifier bits to
// take effect.
NOSUID = unix.MS_NOSUID
// NODEV will not interpret character or block special devices on the file
// system.
NODEV = unix.MS_NODEV
// NOEXEC will not allow execution of any binaries on the mounted file system.
NOEXEC = unix.MS_NOEXEC
// SYNCHRONOUS will allow I/O to the file system to be done synchronously.
SYNCHRONOUS = unix.MS_SYNCHRONOUS
// DIRSYNC will force all directory updates within the file system to be done
// synchronously. This affects the following system calls: create, link,
// unlink, symlink, mkdir, rmdir, mknod and rename.
DIRSYNC = unix.MS_DIRSYNC
// REMOUNT will attempt to remount an already-mounted file system. This is
// commonly used to change the mount flags for a file system, especially to
// make a readonly file system writeable. It does not change device or mount
// point.
REMOUNT = unix.MS_REMOUNT
// MANDLOCK will force mandatory locks on a filesystem.
MANDLOCK = unix.MS_MANDLOCK
// NOATIME will not update the file access time when reading from a file.
NOATIME = unix.MS_NOATIME
// NODIRATIME will not update the directory access time.
NODIRATIME = unix.MS_NODIRATIME
// BIND remounts a subtree somewhere else.
BIND = unix.MS_BIND
// RBIND remounts a subtree and all possible submounts somewhere else.
RBIND = unix.MS_BIND | unix.MS_REC
// UNBINDABLE creates a mount which cannot be cloned through a bind operation.
UNBINDABLE = unix.MS_UNBINDABLE
// RUNBINDABLE marks the entire mount tree as UNBINDABLE.
RUNBINDABLE = unix.MS_UNBINDABLE | unix.MS_REC
// PRIVATE creates a mount which carries no propagation abilities.
PRIVATE = unix.MS_PRIVATE
// RPRIVATE marks the entire mount tree as PRIVATE.
RPRIVATE = unix.MS_PRIVATE | unix.MS_REC
// SLAVE creates a mount which receives propagation from its master, but not
// vice versa.
SLAVE = unix.MS_SLAVE
// RSLAVE marks the entire mount tree as SLAVE.
RSLAVE = unix.MS_SLAVE | unix.MS_REC
// SHARED creates a mount which provides the ability to create mirrors of
// that mount such that mounts and unmounts within any of the mirrors
// propagate to the other mirrors.
SHARED = unix.MS_SHARED
// RSHARED marks the entire mount tree as SHARED.
RSHARED = unix.MS_SHARED | unix.MS_REC
// RELATIME updates inode access times relative to modify or change time.
RELATIME = unix.MS_RELATIME
// STRICTATIME allows to explicitly request full atime updates. This makes
// it possible for the kernel to default to relatime or noatime but still
// allow userspace to override it.
STRICTATIME = unix.MS_STRICTATIME
mntDetach = unix.MNT_DETACH
)

@ -0,0 +1,31 @@
// +build !linux,!freebsd freebsd,!cgo
package mount // import "github.com/docker/docker/pkg/mount"
// These flags are unsupported.
const (
BIND = 0
DIRSYNC = 0
MANDLOCK = 0
NOATIME = 0
NODEV = 0
NODIRATIME = 0
NOEXEC = 0
NOSUID = 0
UNBINDABLE = 0
RUNBINDABLE = 0
PRIVATE = 0
RPRIVATE = 0
SHARED = 0
RSHARED = 0
SLAVE = 0
RSLAVE = 0
RBIND = 0
RELATIME = 0
RELATIVE = 0
REMOUNT = 0
STRICTATIME = 0
SYNCHRONOUS = 0
RDONLY = 0
mntDetach = 0
)

@ -0,0 +1,159 @@
package mount // import "github.com/docker/docker/pkg/mount"
import (
"sort"
"strconv"
"strings"
"github.com/sirupsen/logrus"
)
// mountError records an error from mount or unmount operation
type mountError struct {
op string
source, target string
flags uintptr
data string
err error
}
func (e *mountError) Error() string {
out := e.op + " "
if e.source != "" {
out += e.source + ":" + e.target
} else {
out += e.target
}
if e.flags != uintptr(0) {
out += ", flags: 0x" + strconv.FormatUint(uint64(e.flags), 16)
}
if e.data != "" {
out += ", data: " + e.data
}
out += ": " + e.err.Error()
return out
}
// Cause returns the underlying cause of the error
func (e *mountError) Cause() error {
return e.err
}
// FilterFunc is a type defining a callback function
// to filter out unwanted entries. It takes a pointer
// to an Info struct (not fully populated, currently
// only Mountpoint is filled in), and returns two booleans:
// - skip: true if the entry should be skipped
// - stop: true if parsing should be stopped after the entry
type FilterFunc func(*Info) (skip, stop bool)
// PrefixFilter discards all entries whose mount points
// do not start with a prefix specified
func PrefixFilter(prefix string) FilterFunc {
return func(m *Info) (bool, bool) {
skip := !strings.HasPrefix(m.Mountpoint, prefix)
return skip, false
}
}
// SingleEntryFilter looks for a specific entry
func SingleEntryFilter(mp string) FilterFunc {
return func(m *Info) (bool, bool) {
if m.Mountpoint == mp {
return false, true // don't skip, stop now
}
return true, false // skip, keep going
}
}
// ParentsFilter returns all entries whose mount points
// can be parents of a path specified, discarding others.
// For example, given `/var/lib/docker/something`, entries
// like `/var/lib/docker`, `/var` and `/` are returned.
func ParentsFilter(path string) FilterFunc {
return func(m *Info) (bool, bool) {
skip := !strings.HasPrefix(path, m.Mountpoint)
return skip, false
}
}
// GetMounts retrieves a list of mounts for the current running process,
// with an optional filter applied (use nil for no filter).
func GetMounts(f FilterFunc) ([]*Info, error) {
return parseMountTable(f)
}
// Mounted determines if a specified mountpoint has been mounted.
// On Linux it looks at /proc/self/mountinfo.
func Mounted(mountpoint string) (bool, error) {
entries, err := GetMounts(SingleEntryFilter(mountpoint))
if err != nil {
return false, err
}
return len(entries) > 0, nil
}
// Mount will mount filesystem according to the specified configuration, on the
// condition that the target path is *not* already mounted. Options must be
// specified like the mount or fstab unix commands: "opt1=val1,opt2=val2". See
// flags.go for supported option flags.
func Mount(device, target, mType, options string) error {
flag, data := parseOptions(options)
if flag&REMOUNT != REMOUNT {
if mounted, err := Mounted(target); err != nil || mounted {
return err
}
}
return mount(device, target, mType, uintptr(flag), data)
}
// ForceMount will mount a filesystem according to the specified configuration,
// *regardless* if the target path is not already mounted. Options must be
// specified like the mount or fstab unix commands: "opt1=val1,opt2=val2". See
// flags.go for supported option flags.
func ForceMount(device, target, mType, options string) error {
flag, data := parseOptions(options)
return mount(device, target, mType, uintptr(flag), data)
}
// Unmount lazily unmounts a filesystem on supported platforms, otherwise
// does a normal unmount.
func Unmount(target string) error {
return unmount(target, mntDetach)
}
// RecursiveUnmount unmounts the target and all mounts underneath, starting with
// the deepsest mount first.
func RecursiveUnmount(target string) error {
mounts, err := parseMountTable(PrefixFilter(target))
if err != nil {
return err
}
// Make the deepest mount be first
sort.Slice(mounts, func(i, j int) bool {
return len(mounts[i].Mountpoint) > len(mounts[j].Mountpoint)
})
for i, m := range mounts {
logrus.Debugf("Trying to unmount %s", m.Mountpoint)
err = unmount(m.Mountpoint, mntDetach)
if err != nil {
if i == len(mounts)-1 { // last mount
if mounted, e := Mounted(m.Mountpoint); e != nil || mounted {
return err
}
} else {
// This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem
logrus.WithError(err).Warnf("Failed to unmount submount %s", m.Mountpoint)
}
}
logrus.Debugf("Unmounted %s", m.Mountpoint)
}
return nil
}

@ -0,0 +1,59 @@
package mount // import "github.com/docker/docker/pkg/mount"
/*
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <sys/_iovec.h>
#include <sys/mount.h>
#include <sys/param.h>
*/
import "C"
import (
"strings"
"syscall"
"unsafe"
)
func allocateIOVecs(options []string) []C.struct_iovec {
out := make([]C.struct_iovec, len(options))
for i, option := range options {
out[i].iov_base = unsafe.Pointer(C.CString(option))
out[i].iov_len = C.size_t(len(option) + 1)
}
return out
}
func mount(device, target, mType string, flag uintptr, data string) error {
isNullFS := false
xs := strings.Split(data, ",")
for _, x := range xs {
if x == "bind" {
isNullFS = true
}
}
options := []string{"fspath", target}
if isNullFS {
options = append(options, "fstype", "nullfs", "target", device)
} else {
options = append(options, "fstype", mType, "from", device)
}
rawOptions := allocateIOVecs(options)
for _, rawOption := range rawOptions {
defer C.free(rawOption.iov_base)
}
if errno := C.nmount(&rawOptions[0], C.uint(len(options)), C.int(flag)); errno != 0 {
return &mountError{
op: "mount",
source: device,
target: target,
flags: flag,
err: syscall.Errno(errno),
}
}
return nil
}

@ -0,0 +1,73 @@
package mount // import "github.com/docker/docker/pkg/mount"
import (
"golang.org/x/sys/unix"
)
const (
// ptypes is the set propagation types.
ptypes = unix.MS_SHARED | unix.MS_PRIVATE | unix.MS_SLAVE | unix.MS_UNBINDABLE
// pflags is the full set valid flags for a change propagation call.
pflags = ptypes | unix.MS_REC | unix.MS_SILENT
// broflags is the combination of bind and read only
broflags = unix.MS_BIND | unix.MS_RDONLY
)
// isremount returns true if either device name or flags identify a remount request, false otherwise.
func isremount(device string, flags uintptr) bool {
switch {
// We treat device "" and "none" as a remount request to provide compatibility with
// requests that don't explicitly set MS_REMOUNT such as those manipulating bind mounts.
case flags&unix.MS_REMOUNT != 0, device == "", device == "none":
return true
default:
return false
}
}
func mount(device, target, mType string, flags uintptr, data string) error {
oflags := flags &^ ptypes
if !isremount(device, flags) || data != "" {
// Initial call applying all non-propagation flags for mount
// or remount with changed data
if err := unix.Mount(device, target, mType, oflags, data); err != nil {
return &mountError{
op: "mount",
source: device,
target: target,
flags: oflags,
data: data,
err: err,
}
}
}
if flags&ptypes != 0 {
// Change the propagation type.
if err := unix.Mount("", target, "", flags&pflags, ""); err != nil {
return &mountError{
op: "remount",
target: target,
flags: flags & pflags,
err: err,
}
}
}
if oflags&broflags == broflags {
// Remount the bind to apply read only.
if err := unix.Mount("", target, "", oflags|unix.MS_REMOUNT, ""); err != nil {
return &mountError{
op: "remount-ro",
target: target,
flags: oflags | unix.MS_REMOUNT,
err: err,
}
}
}
return nil
}

@ -0,0 +1,7 @@
// +build !linux,!freebsd freebsd,!cgo
package mount // import "github.com/docker/docker/pkg/mount"
func mount(device, target, mType string, flag uintptr, data string) error {
panic("Not implemented")
}

@ -0,0 +1,40 @@
package mount // import "github.com/docker/docker/pkg/mount"
// Info reveals information about a particular mounted filesystem. This
// struct is populated from the content in the /proc/<pid>/mountinfo file.
type Info struct {
// ID is a unique identifier of the mount (may be reused after umount).
ID int
// Parent indicates the ID of the mount parent (or of self for the top of the
// mount tree).
Parent int
// Major indicates one half of the device ID which identifies the device class.
Major int
// Minor indicates one half of the device ID which identifies a specific
// instance of device.
Minor int
// Root of the mount within the filesystem.
Root string
// Mountpoint indicates the mount point relative to the process's root.
Mountpoint string
// Opts represents mount-specific options.
Opts string
// Optional represents optional fields.
Optional string
// Fstype indicates the type of filesystem, such as EXT3.
Fstype string
// Source indicates filesystem specific information or "none".
Source string
// VfsOpts represents per super block options.
VfsOpts string
}

@ -0,0 +1,54 @@
package mount // import "github.com/docker/docker/pkg/mount"
/*
#include <sys/param.h>
#include <sys/ucred.h>
#include <sys/mount.h>
*/
import "C"
import (
"fmt"
"reflect"
"unsafe"
)
//parseMountTable returns information about mounted filesystems
func parseMountTable(filter FilterFunc) ([]*Info, error) {
var rawEntries *C.struct_statfs
count := int(C.getmntinfo(&rawEntries, C.MNT_WAIT))
if count == 0 {
return nil, fmt.Errorf("Failed to call getmntinfo")
}
var entries []C.struct_statfs
header := (*reflect.SliceHeader)(unsafe.Pointer(&entries))
header.Cap = count
header.Len = count
header.Data = uintptr(unsafe.Pointer(rawEntries))
var out []*Info
for _, entry := range entries {
var mountinfo Info
var skip, stop bool
mountinfo.Mountpoint = C.GoString(&entry.f_mntonname[0])
if filter != nil {
// filter out entries we're not interested in
skip, stop = filter(&mountinfo)
if skip {
continue
}
}
mountinfo.Source = C.GoString(&entry.f_mntfromname[0])
mountinfo.Fstype = C.GoString(&entry.f_fstypename[0])
out = append(out, &mountinfo)
if stop {
break
}
}
return out, nil
}

@ -0,0 +1,144 @@
package mount // import "github.com/docker/docker/pkg/mount"
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"github.com/pkg/errors"
)
func parseInfoFile(r io.Reader, filter FilterFunc) ([]*Info, error) {
s := bufio.NewScanner(r)
out := []*Info{}
var err error
for s.Scan() {
if err = s.Err(); err != nil {
return nil, err
}
/*
See http://man7.org/linux/man-pages/man5/proc.5.html
36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
(1) mount ID: unique identifier of the mount (may be reused after umount)
(2) parent ID: ID of parent (or of self for the top of the mount tree)
(3) major:minor: value of st_dev for files on filesystem
(4) root: root of the mount within the filesystem
(5) mount point: mount point relative to the process's root
(6) mount options: per mount options
(7) optional fields: zero or more fields of the form "tag[:value]"
(8) separator: marks the end of the optional fields
(9) filesystem type: name of filesystem of the form "type[.subtype]"
(10) mount source: filesystem specific information or "none"
(11) super options: per super block options
*/
text := s.Text()
fields := strings.Split(text, " ")
numFields := len(fields)
if numFields < 10 {
// should be at least 10 fields
return nil, fmt.Errorf("Parsing '%s' failed: not enough fields (%d)", text, numFields)
}
p := &Info{}
// ignore any numbers parsing errors, as there should not be any
p.ID, _ = strconv.Atoi(fields[0])
p.Parent, _ = strconv.Atoi(fields[1])
mm := strings.Split(fields[2], ":")
if len(mm) != 2 {
return nil, fmt.Errorf("Parsing '%s' failed: unexpected minor:major pair %s", text, mm)
}
p.Major, _ = strconv.Atoi(mm[0])
p.Minor, _ = strconv.Atoi(mm[1])
p.Root, err = strconv.Unquote(`"` + fields[3] + `"`)
if err != nil {
return nil, errors.Wrapf(err, "Parsing '%s' failed: unable to unquote root field", fields[3])
}
p.Mountpoint, err = strconv.Unquote(`"` + fields[4] + `"`)
if err != nil {
return nil, errors.Wrapf(err, "Parsing '%s' failed: unable to unquote mount point field", fields[4])
}
p.Opts = fields[5]
var skip, stop bool
if filter != nil {
// filter out entries we're not interested in
skip, stop = filter(p)
if skip {
continue
}
}
// one or more optional fields, when a separator (-)
i := 6
for ; i < numFields && fields[i] != "-"; i++ {
switch i {
case 6:
p.Optional = fields[6]
default:
/* NOTE there might be more optional fields before the such as
fields[7]...fields[N] (where N < sepIndex), although
as of Linux kernel 4.15 the only known ones are
mount propagation flags in fields[6]. The correct
behavior is to ignore any unknown optional fields.
*/
break
}
}
if i == numFields {
return nil, fmt.Errorf("Parsing '%s' failed: missing separator ('-')", text)
}
// There should be 3 fields after the separator...
if i+4 > numFields {
return nil, fmt.Errorf("Parsing '%s' failed: not enough fields after a separator", text)
}
// ... but in Linux <= 3.9 mounting a cifs with spaces in a share name
// (like "//serv/My Documents") _may_ end up having a space in the last field
// of mountinfo (like "unc=//serv/My Documents"). Since kernel 3.10-rc1, cifs
// option unc= is ignored, so a space should not appear. In here we ignore
// those "extra" fields caused by extra spaces.
p.Fstype = fields[i+1]
p.Source = fields[i+2]
p.VfsOpts = fields[i+3]
out = append(out, p)
if stop {
break
}
}
return out, nil
}
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts
func parseMountTable(filter FilterFunc) ([]*Info, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
return parseInfoFile(f, filter)
}
// PidMountInfo collects the mounts for a specific process ID. If the process
// ID is unknown, it is better to use `GetMounts` which will inspect
// "/proc/self/mountinfo" instead.
func PidMountInfo(pid int) ([]*Info, error) {
f, err := os.Open(fmt.Sprintf("/proc/%d/mountinfo", pid))
if err != nil {
return nil, err
}
defer f.Close()
return parseInfoFile(f, nil)
}

@ -0,0 +1,12 @@
// +build !windows,!linux,!freebsd freebsd,!cgo
package mount // import "github.com/docker/docker/pkg/mount"
import (
"fmt"
"runtime"
)
func parseMountTable(f FilterFunc) ([]*Info, error) {
return nil, fmt.Errorf("mount.parseMountTable is not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
}

@ -0,0 +1,6 @@
package mount // import "github.com/docker/docker/pkg/mount"
func parseMountTable(f FilterFunc) ([]*Info, error) {
// Do NOT return an error!
return nil, nil
}

@ -0,0 +1,71 @@
package mount // import "github.com/docker/docker/pkg/mount"
// MakeShared ensures a mounted filesystem has the SHARED mount option enabled.
// See the supported options in flags.go for further reference.
func MakeShared(mountPoint string) error {
return ensureMountedAs(mountPoint, SHARED)
}
// MakeRShared ensures a mounted filesystem has the RSHARED mount option enabled.
// See the supported options in flags.go for further reference.
func MakeRShared(mountPoint string) error {
return ensureMountedAs(mountPoint, RSHARED)
}
// MakePrivate ensures a mounted filesystem has the PRIVATE mount option enabled.
// See the supported options in flags.go for further reference.
func MakePrivate(mountPoint string) error {
return ensureMountedAs(mountPoint, PRIVATE)
}
// MakeRPrivate ensures a mounted filesystem has the RPRIVATE mount option
// enabled. See the supported options in flags.go for further reference.
func MakeRPrivate(mountPoint string) error {
return ensureMountedAs(mountPoint, RPRIVATE)
}
// MakeSlave ensures a mounted filesystem has the SLAVE mount option enabled.
// See the supported options in flags.go for further reference.
func MakeSlave(mountPoint string) error {
return ensureMountedAs(mountPoint, SLAVE)
}
// MakeRSlave ensures a mounted filesystem has the RSLAVE mount option enabled.
// See the supported options in flags.go for further reference.
func MakeRSlave(mountPoint string) error {
return ensureMountedAs(mountPoint, RSLAVE)
}
// MakeUnbindable ensures a mounted filesystem has the UNBINDABLE mount option
// enabled. See the supported options in flags.go for further reference.
func MakeUnbindable(mountPoint string) error {
return ensureMountedAs(mountPoint, UNBINDABLE)
}
// MakeRUnbindable ensures a mounted filesystem has the RUNBINDABLE mount
// option enabled. See the supported options in flags.go for further reference.
func MakeRUnbindable(mountPoint string) error {
return ensureMountedAs(mountPoint, RUNBINDABLE)
}
// MakeMount ensures that the file or directory given is a mount point,
// bind mounting it to itself it case it is not.
func MakeMount(mnt string) error {
mounted, err := Mounted(mnt)
if err != nil {
return err
}
if mounted {
return nil
}
return mount(mnt, mnt, "none", uintptr(BIND), "")
}
func ensureMountedAs(mnt string, flags int) error {
if err := MakeMount(mnt); err != nil {
return err
}
return mount("", mnt, "none", uintptr(flags), "")
}

@ -0,0 +1,22 @@
// +build !windows
package mount // import "github.com/docker/docker/pkg/mount"
import "golang.org/x/sys/unix"
func unmount(target string, flags int) error {
err := unix.Unmount(target, flags)
if err == nil || err == unix.EINVAL {
// Ignore "not mounted" error here. Note the same error
// can be returned if flags are invalid, so this code
// assumes that the flags value is always correct.
return nil
}
return &mountError{
op: "umount",
target: target,
flags: uintptr(flags),
err: err,
}
}

@ -0,0 +1,7 @@
// +build windows
package mount // import "github.com/docker/docker/pkg/mount"
func unmount(target string, flag int) error {
panic("Not implemented")
}

@ -0,0 +1,137 @@
// Package pools provides a collection of pools which provide various
// data types with buffers. These can be used to lower the number of
// memory allocations and reuse buffers.
//
// New pools should be added to this package to allow them to be
// shared across packages.
//
// Utility functions which operate on pools should be added to this
// package to allow them to be reused.
package pools // import "github.com/docker/docker/pkg/pools"
import (
"bufio"
"io"
"sync"
"github.com/docker/docker/pkg/ioutils"
)
const buffer32K = 32 * 1024
var (
// BufioReader32KPool is a pool which returns bufio.Reader with a 32K buffer.
BufioReader32KPool = newBufioReaderPoolWithSize(buffer32K)
// BufioWriter32KPool is a pool which returns bufio.Writer with a 32K buffer.
BufioWriter32KPool = newBufioWriterPoolWithSize(buffer32K)
buffer32KPool = newBufferPoolWithSize(buffer32K)
)
// BufioReaderPool is a bufio reader that uses sync.Pool.
type BufioReaderPool struct {
pool sync.Pool
}
// newBufioReaderPoolWithSize is unexported because new pools should be
// added here to be shared where required.
func newBufioReaderPoolWithSize(size int) *BufioReaderPool {
return &BufioReaderPool{
pool: sync.Pool{
New: func() interface{} { return bufio.NewReaderSize(nil, size) },
},
}
}
// Get returns a bufio.Reader which reads from r. The buffer size is that of the pool.
func (bufPool *BufioReaderPool) Get(r io.Reader) *bufio.Reader {
buf := bufPool.pool.Get().(*bufio.Reader)
buf.Reset(r)
return buf
}
// Put puts the bufio.Reader back into the pool.
func (bufPool *BufioReaderPool) Put(b *bufio.Reader) {
b.Reset(nil)
bufPool.pool.Put(b)
}
type bufferPool struct {
pool sync.Pool
}
func newBufferPoolWithSize(size int) *bufferPool {
return &bufferPool{
pool: sync.Pool{
New: func() interface{} { return make([]byte, size) },
},
}
}
func (bp *bufferPool) Get() []byte {
return bp.pool.Get().([]byte)
}
func (bp *bufferPool) Put(b []byte) {
bp.pool.Put(b)
}
// Copy is a convenience wrapper which uses a buffer to avoid allocation in io.Copy.
func Copy(dst io.Writer, src io.Reader) (written int64, err error) {
buf := buffer32KPool.Get()
written, err = io.CopyBuffer(dst, src, buf)
buffer32KPool.Put(buf)
return
}
// NewReadCloserWrapper returns a wrapper which puts the bufio.Reader back
// into the pool and closes the reader if it's an io.ReadCloser.
func (bufPool *BufioReaderPool) NewReadCloserWrapper(buf *bufio.Reader, r io.Reader) io.ReadCloser {
return ioutils.NewReadCloserWrapper(r, func() error {
if readCloser, ok := r.(io.ReadCloser); ok {
readCloser.Close()
}
bufPool.Put(buf)
return nil
})
}
// BufioWriterPool is a bufio writer that uses sync.Pool.
type BufioWriterPool struct {
pool sync.Pool
}
// newBufioWriterPoolWithSize is unexported because new pools should be
// added here to be shared where required.
func newBufioWriterPoolWithSize(size int) *BufioWriterPool {
return &BufioWriterPool{
pool: sync.Pool{
New: func() interface{} { return bufio.NewWriterSize(nil, size) },
},
}
}
// Get returns a bufio.Writer which writes to w. The buffer size is that of the pool.
func (bufPool *BufioWriterPool) Get(w io.Writer) *bufio.Writer {
buf := bufPool.pool.Get().(*bufio.Writer)
buf.Reset(w)
return buf
}
// Put puts the bufio.Writer back into the pool.
func (bufPool *BufioWriterPool) Put(b *bufio.Writer) {
b.Reset(nil)
bufPool.pool.Put(b)
}
// NewWriteCloserWrapper returns a wrapper which puts the bufio.Writer back
// into the pool and closes the writer if it's an io.Writecloser.
func (bufPool *BufioWriterPool) NewWriteCloserWrapper(buf *bufio.Writer, w io.Writer) io.WriteCloser {
return ioutils.NewWriteCloserWrapper(w, func() error {
buf.Flush()
if writeCloser, ok := w.(io.WriteCloser); ok {
writeCloser.Close()
}
bufPool.Put(buf)
return nil
})
}

@ -0,0 +1,16 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"strings"
"golang.org/x/sys/windows"
)
// EscapeArgs makes a Windows-style escaped command line from a set of arguments
func EscapeArgs(args []string) string {
escapedArgs := make([]string, len(args))
for i, a := range args {
escapedArgs[i] = windows.EscapeArg(a)
}
return strings.Join(escapedArgs, " ")
}

@ -0,0 +1,31 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"time"
)
// Chtimes changes the access time and modified time of a file at the given path
func Chtimes(name string, atime time.Time, mtime time.Time) error {
unixMinTime := time.Unix(0, 0)
unixMaxTime := maxTime
// If the modified time is prior to the Unix Epoch, or after the
// end of Unix Time, os.Chtimes has undefined behavior
// default to Unix Epoch in this case, just in case
if atime.Before(unixMinTime) || atime.After(unixMaxTime) {
atime = unixMinTime
}
if mtime.Before(unixMinTime) || mtime.After(unixMaxTime) {
mtime = unixMinTime
}
if err := os.Chtimes(name, atime, mtime); err != nil {
return err
}
// Take platform specific action for setting create time.
return setCTime(name, mtime)
}

@ -0,0 +1,14 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
import (
"time"
)
//setCTime will set the create time on a file. On Unix, the create
//time is updated as a side effect of setting the modified time, so
//no action is required.
func setCTime(path string, ctime time.Time) error {
return nil
}

@ -0,0 +1,26 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"time"
"golang.org/x/sys/windows"
)
//setCTime will set the create time on a file. On Windows, this requires
//calling SetFileTime and explicitly including the create time.
func setCTime(path string, ctime time.Time) error {
ctimespec := windows.NsecToTimespec(ctime.UnixNano())
pathp, e := windows.UTF16PtrFromString(path)
if e != nil {
return e
}
h, e := windows.CreateFile(pathp,
windows.FILE_WRITE_ATTRIBUTES, windows.FILE_SHARE_WRITE, nil,
windows.OPEN_EXISTING, windows.FILE_FLAG_BACKUP_SEMANTICS, 0)
if e != nil {
return e
}
defer windows.Close(h)
c := windows.NsecToFiletime(windows.TimespecToNsec(ctimespec))
return windows.SetFileTime(h, &c, nil, nil)
}

@ -0,0 +1,13 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"errors"
)
var (
// ErrNotSupportedPlatform means the platform is not supported.
ErrNotSupportedPlatform = errors.New("platform and architecture is not supported")
// ErrNotSupportedOperatingSystem means the operating system is not supported.
ErrNotSupportedOperatingSystem = errors.New("operating system is not supported")
)

@ -0,0 +1,19 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"fmt"
"os/exec"
"syscall"
)
// GetExitCode returns the ExitStatus of the specified error if its type is
// exec.ExitError, returns 0 and an error otherwise.
func GetExitCode(err error) (int, error) {
exitCode := 0
if exiterr, ok := err.(*exec.ExitError); ok {
if procExit, ok := exiterr.Sys().(syscall.WaitStatus); ok {
return procExit.ExitStatus(), nil
}
}
return exitCode, fmt.Errorf("failed to get exit code")
}

@ -0,0 +1,67 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
import (
"io/ioutil"
"os"
"path/filepath"
)
// MkdirAllWithACL is a wrapper for os.MkdirAll on unix systems.
func MkdirAllWithACL(path string, perm os.FileMode, sddl string) error {
return os.MkdirAll(path, perm)
}
// MkdirAll creates a directory named path along with any necessary parents,
// with permission specified by attribute perm for all dir created.
func MkdirAll(path string, perm os.FileMode) error {
return os.MkdirAll(path, perm)
}
// IsAbs is a platform-specific wrapper for filepath.IsAbs.
func IsAbs(path string) bool {
return filepath.IsAbs(path)
}
// The functions below here are wrappers for the equivalents in the os and ioutils packages.
// They are passthrough on Unix platforms, and only relevant on Windows.
// CreateSequential creates the named file with mode 0666 (before umask), truncating
// it if it already exists. If successful, methods on the returned
// File can be used for I/O; the associated file descriptor has mode
// O_RDWR.
// If there is an error, it will be of type *PathError.
func CreateSequential(name string) (*os.File, error) {
return os.Create(name)
}
// OpenSequential opens the named file for reading. If successful, methods on
// the returned file can be used for reading; the associated file
// descriptor has mode O_RDONLY.
// If there is an error, it will be of type *PathError.
func OpenSequential(name string) (*os.File, error) {
return os.Open(name)
}
// OpenFileSequential is the generalized open call; most users will use Open
// or Create instead. It opens the named file with specified flag
// (O_RDONLY etc.) and perm, (0666 etc.) if applicable. If successful,
// methods on the returned File can be used for I/O.
// If there is an error, it will be of type *PathError.
func OpenFileSequential(name string, flag int, perm os.FileMode) (*os.File, error) {
return os.OpenFile(name, flag, perm)
}
// TempFileSequential creates a new temporary file in the directory dir
// with a name beginning with prefix, opens the file for reading
// and writing, and returns the resulting *os.File.
// If dir is the empty string, TempFile uses the default directory
// for temporary files (see os.TempDir).
// Multiple programs calling TempFile simultaneously
// will not choose the same file. The caller can use f.Name()
// to find the pathname of the file. It is the caller's responsibility
// to remove the file when no longer needed.
func TempFileSequential(dir, prefix string) (f *os.File, err error) {
return ioutil.TempFile(dir, prefix)
}

@ -0,0 +1,295 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"syscall"
"time"
"unsafe"
winio "github.com/Microsoft/go-winio"
"golang.org/x/sys/windows"
)
const (
// SddlAdministratorsLocalSystem is local administrators plus NT AUTHORITY\System
SddlAdministratorsLocalSystem = "D:P(A;OICI;GA;;;BA)(A;OICI;GA;;;SY)"
)
// MkdirAllWithACL is a wrapper for MkdirAll that creates a directory
// with an appropriate SDDL defined ACL.
func MkdirAllWithACL(path string, perm os.FileMode, sddl string) error {
return mkdirall(path, true, sddl)
}
// MkdirAll implementation that is volume path aware for Windows. It can be used
// as a drop-in replacement for os.MkdirAll()
func MkdirAll(path string, _ os.FileMode) error {
return mkdirall(path, false, "")
}
// mkdirall is a custom version of os.MkdirAll modified for use on Windows
// so that it is both volume path aware, and can create a directory with
// a DACL.
func mkdirall(path string, applyACL bool, sddl string) error {
if re := regexp.MustCompile(`^\\\\\?\\Volume{[a-z0-9-]+}$`); re.MatchString(path) {
return nil
}
// The rest of this method is largely copied from os.MkdirAll and should be kept
// as-is to ensure compatibility.
// Fast path: if we can tell whether path is a directory or file, stop with success or error.
dir, err := os.Stat(path)
if err == nil {
if dir.IsDir() {
return nil
}
return &os.PathError{
Op: "mkdir",
Path: path,
Err: syscall.ENOTDIR,
}
}
// Slow path: make sure parent exists and then call Mkdir for path.
i := len(path)
for i > 0 && os.IsPathSeparator(path[i-1]) { // Skip trailing path separator.
i--
}
j := i
for j > 0 && !os.IsPathSeparator(path[j-1]) { // Scan backward over element.
j--
}
if j > 1 {
// Create parent
err = mkdirall(path[0:j-1], false, sddl)
if err != nil {
return err
}
}
// Parent now exists; invoke os.Mkdir or mkdirWithACL and use its result.
if applyACL {
err = mkdirWithACL(path, sddl)
} else {
err = os.Mkdir(path, 0)
}
if err != nil {
// Handle arguments like "foo/." by
// double-checking that directory doesn't exist.
dir, err1 := os.Lstat(path)
if err1 == nil && dir.IsDir() {
return nil
}
return err
}
return nil
}
// mkdirWithACL creates a new directory. If there is an error, it will be of
// type *PathError. .
//
// This is a modified and combined version of os.Mkdir and windows.Mkdir
// in golang to cater for creating a directory am ACL permitting full
// access, with inheritance, to any subfolder/file for Built-in Administrators
// and Local System.
func mkdirWithACL(name string, sddl string) error {
sa := windows.SecurityAttributes{Length: 0}
sd, err := winio.SddlToSecurityDescriptor(sddl)
if err != nil {
return &os.PathError{Op: "mkdir", Path: name, Err: err}
}
sa.Length = uint32(unsafe.Sizeof(sa))
sa.InheritHandle = 1
sa.SecurityDescriptor = uintptr(unsafe.Pointer(&sd[0]))
namep, err := windows.UTF16PtrFromString(name)
if err != nil {
return &os.PathError{Op: "mkdir", Path: name, Err: err}
}
e := windows.CreateDirectory(namep, &sa)
if e != nil {
return &os.PathError{Op: "mkdir", Path: name, Err: e}
}
return nil
}
// IsAbs is a platform-specific wrapper for filepath.IsAbs. On Windows,
// golang filepath.IsAbs does not consider a path \windows\system32 as absolute
// as it doesn't start with a drive-letter/colon combination. However, in
// docker we need to verify things such as WORKDIR /windows/system32 in
// a Dockerfile (which gets translated to \windows\system32 when being processed
// by the daemon. This SHOULD be treated as absolute from a docker processing
// perspective.
func IsAbs(path string) bool {
if !filepath.IsAbs(path) {
if !strings.HasPrefix(path, string(os.PathSeparator)) {
return false
}
}
return true
}
// The origin of the functions below here are the golang OS and windows packages,
// slightly modified to only cope with files, not directories due to the
// specific use case.
//
// The alteration is to allow a file on Windows to be opened with
// FILE_FLAG_SEQUENTIAL_SCAN (particular for docker load), to avoid eating
// the standby list, particularly when accessing large files such as layer.tar.
// CreateSequential creates the named file with mode 0666 (before umask), truncating
// it if it already exists. If successful, methods on the returned
// File can be used for I/O; the associated file descriptor has mode
// O_RDWR.
// If there is an error, it will be of type *PathError.
func CreateSequential(name string) (*os.File, error) {
return OpenFileSequential(name, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0)
}
// OpenSequential opens the named file for reading. If successful, methods on
// the returned file can be used for reading; the associated file
// descriptor has mode O_RDONLY.
// If there is an error, it will be of type *PathError.
func OpenSequential(name string) (*os.File, error) {
return OpenFileSequential(name, os.O_RDONLY, 0)
}
// OpenFileSequential is the generalized open call; most users will use Open
// or Create instead.
// If there is an error, it will be of type *PathError.
func OpenFileSequential(name string, flag int, _ os.FileMode) (*os.File, error) {
if name == "" {
return nil, &os.PathError{Op: "open", Path: name, Err: syscall.ENOENT}
}
r, errf := windowsOpenFileSequential(name, flag, 0)
if errf == nil {
return r, nil
}
return nil, &os.PathError{Op: "open", Path: name, Err: errf}
}
func windowsOpenFileSequential(name string, flag int, _ os.FileMode) (file *os.File, err error) {
r, e := windowsOpenSequential(name, flag|windows.O_CLOEXEC, 0)
if e != nil {
return nil, e
}
return os.NewFile(uintptr(r), name), nil
}
func makeInheritSa() *windows.SecurityAttributes {
var sa windows.SecurityAttributes
sa.Length = uint32(unsafe.Sizeof(sa))
sa.InheritHandle = 1
return &sa
}
func windowsOpenSequential(path string, mode int, _ uint32) (fd windows.Handle, err error) {
if len(path) == 0 {
return windows.InvalidHandle, windows.ERROR_FILE_NOT_FOUND
}
pathp, err := windows.UTF16PtrFromString(path)
if err != nil {
return windows.InvalidHandle, err
}
var access uint32
switch mode & (windows.O_RDONLY | windows.O_WRONLY | windows.O_RDWR) {
case windows.O_RDONLY:
access = windows.GENERIC_READ
case windows.O_WRONLY:
access = windows.GENERIC_WRITE
case windows.O_RDWR:
access = windows.GENERIC_READ | windows.GENERIC_WRITE
}
if mode&windows.O_CREAT != 0 {
access |= windows.GENERIC_WRITE
}
if mode&windows.O_APPEND != 0 {
access &^= windows.GENERIC_WRITE
access |= windows.FILE_APPEND_DATA
}
sharemode := uint32(windows.FILE_SHARE_READ | windows.FILE_SHARE_WRITE)
var sa *windows.SecurityAttributes
if mode&windows.O_CLOEXEC == 0 {
sa = makeInheritSa()
}
var createmode uint32
switch {
case mode&(windows.O_CREAT|windows.O_EXCL) == (windows.O_CREAT | windows.O_EXCL):
createmode = windows.CREATE_NEW
case mode&(windows.O_CREAT|windows.O_TRUNC) == (windows.O_CREAT | windows.O_TRUNC):
createmode = windows.CREATE_ALWAYS
case mode&windows.O_CREAT == windows.O_CREAT:
createmode = windows.OPEN_ALWAYS
case mode&windows.O_TRUNC == windows.O_TRUNC:
createmode = windows.TRUNCATE_EXISTING
default:
createmode = windows.OPEN_EXISTING
}
// Use FILE_FLAG_SEQUENTIAL_SCAN rather than FILE_ATTRIBUTE_NORMAL as implemented in golang.
//https://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
const fileFlagSequentialScan = 0x08000000 // FILE_FLAG_SEQUENTIAL_SCAN
h, e := windows.CreateFile(pathp, access, sharemode, sa, createmode, fileFlagSequentialScan, 0)
return h, e
}
// Helpers for TempFileSequential
var rand uint32
var randmu sync.Mutex
func reseed() uint32 {
return uint32(time.Now().UnixNano() + int64(os.Getpid()))
}
func nextSuffix() string {
randmu.Lock()
r := rand
if r == 0 {
r = reseed()
}
r = r*1664525 + 1013904223 // constants from Numerical Recipes
rand = r
randmu.Unlock()
return strconv.Itoa(int(1e9 + r%1e9))[1:]
}
// TempFileSequential is a copy of ioutil.TempFile, modified to use sequential
// file access. Below is the original comment from golang:
// TempFile creates a new temporary file in the directory dir
// with a name beginning with prefix, opens the file for reading
// and writing, and returns the resulting *os.File.
// If dir is the empty string, TempFile uses the default directory
// for temporary files (see os.TempDir).
// Multiple programs calling TempFile simultaneously
// will not choose the same file. The caller can use f.Name()
// to find the pathname of the file. It is the caller's responsibility
// to remove the file when no longer needed.
func TempFileSequential(dir, prefix string) (f *os.File, err error) {
if dir == "" {
dir = os.TempDir()
}
nconflict := 0
for i := 0; i < 10000; i++ {
name := filepath.Join(dir, prefix+nextSuffix())
f, err = OpenFileSequential(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
if os.IsExist(err) {
if nconflict++; nconflict > 10 {
randmu.Lock()
rand = reseed()
randmu.Unlock()
}
continue
}
break
}
return
}

@ -0,0 +1,22 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"syscall"
"time"
"unsafe"
)
// Used by chtimes
var maxTime time.Time
func init() {
// chtimes initialization
if unsafe.Sizeof(syscall.Timespec{}.Nsec) == 8 {
// This is a 64 bit timespec
// os.Chtimes limits time to the following
maxTime = time.Unix(0, 1<<63-1)
} else {
// This is a 32 bit timespec
maxTime = time.Unix(1<<31-1, 0)
}
}

@ -0,0 +1,12 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
// InitLCOW does nothing since LCOW is a windows only feature
func InitLCOW(experimental bool) {
}
// ContainerdRuntimeSupported returns true if the use of ContainerD runtime is supported.
func ContainerdRuntimeSupported(_ bool, _ string) bool {
return true
}

@ -0,0 +1,41 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"github.com/Microsoft/hcsshim/osversion"
"github.com/sirupsen/logrus"
)
var (
// lcowSupported determines if Linux Containers on Windows are supported.
lcowSupported = false
// containerdRuntimeSupported determines if ContainerD should be the runtime.
// As of March 2019, this is an experimental feature.
containerdRuntimeSupported = false
)
// InitLCOW sets whether LCOW is supported or not. Requires RS5+
func InitLCOW(experimental bool) {
v := GetOSVersion()
if experimental && v.Build >= osversion.RS5 {
lcowSupported = true
}
}
// InitContainerdRuntime sets whether to use ContainerD for runtime
// on Windows. This is an experimental feature still in development, and
// also requires an environment variable to be set (so as not to turn the
// feature on from simply experimental which would also mean LCOW.
func InitContainerdRuntime(experimental bool, cdPath string) {
if experimental && len(cdPath) > 0 && len(os.Getenv("DOCKER_WINDOWS_CONTAINERD_RUNTIME")) > 0 {
logrus.Warnf("Using ContainerD runtime. This feature is experimental")
containerdRuntimeSupported = true
}
}
// ContainerdRuntimeSupported returns true if the use of ContainerD runtime is supported.
func ContainerdRuntimeSupported() bool {
return containerdRuntimeSupported
}

@ -0,0 +1,32 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"runtime"
"strings"
specs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
)
// IsOSSupported determines if an operating system is supported by the host
func IsOSSupported(os string) bool {
if strings.EqualFold(runtime.GOOS, os) {
return true
}
if LCOWSupported() && strings.EqualFold(os, "linux") {
return true
}
return false
}
// ValidatePlatform determines if a platform structure is valid.
// TODO This is a temporary windows-only function, should be replaced by
// comparison of worker capabilities
func ValidatePlatform(platform specs.Platform) error {
if runtime.GOOS == "windows" {
if !(platform.OS == runtime.GOOS || (LCOWSupported() && platform.OS == "linux")) {
return errors.Errorf("unsupported os %s", platform.OS)
}
}
return nil
}

@ -0,0 +1,8 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
// LCOWSupported returns true if Linux containers on Windows are supported.
func LCOWSupported() bool {
return false
}

@ -0,0 +1,6 @@
package system // import "github.com/docker/docker/pkg/system"
// LCOWSupported returns true if Linux containers on Windows are supported.
func LCOWSupported() bool {
return lcowSupported
}

@ -0,0 +1,20 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"syscall"
)
// Lstat takes a path to a file and returns
// a system.StatT type pertaining to that file.
//
// Throws an error if the file does not exist
func Lstat(path string) (*StatT, error) {
s := &syscall.Stat_t{}
if err := syscall.Lstat(path, s); err != nil {
return nil, &os.PathError{Op: "Lstat", Path: path, Err: err}
}
return fromStatT(s)
}

@ -0,0 +1,14 @@
package system // import "github.com/docker/docker/pkg/system"
import "os"
// Lstat calls os.Lstat to get a fileinfo interface back.
// This is then copied into our own locally defined structure.
func Lstat(path string) (*StatT, error) {
fi, err := os.Lstat(path)
if err != nil {
return nil, err
}
return fromStatT(&fi)
}

@ -0,0 +1,17 @@
package system // import "github.com/docker/docker/pkg/system"
// MemInfo contains memory statistics of the host system.
type MemInfo struct {
// Total usable RAM (i.e. physical RAM minus a few reserved bits and the
// kernel binary code).
MemTotal int64
// Amount of free memory.
MemFree int64
// Total amount of swap space available.
SwapTotal int64
// Amount of swap space that is currently unused.
SwapFree int64
}

@ -0,0 +1,71 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"bufio"
"io"
"os"
"strconv"
"strings"
"github.com/docker/go-units"
)
// ReadMemInfo retrieves memory statistics of the host system and returns a
// MemInfo type.
func ReadMemInfo() (*MemInfo, error) {
file, err := os.Open("/proc/meminfo")
if err != nil {
return nil, err
}
defer file.Close()
return parseMemInfo(file)
}
// parseMemInfo parses the /proc/meminfo file into
// a MemInfo object given an io.Reader to the file.
// Throws error if there are problems reading from the file
func parseMemInfo(reader io.Reader) (*MemInfo, error) {
meminfo := &MemInfo{}
scanner := bufio.NewScanner(reader)
memAvailable := int64(-1)
for scanner.Scan() {
// Expected format: ["MemTotal:", "1234", "kB"]
parts := strings.Fields(scanner.Text())
// Sanity checks: Skip malformed entries.
if len(parts) < 3 || parts[2] != "kB" {
continue
}
// Convert to bytes.
size, err := strconv.Atoi(parts[1])
if err != nil {
continue
}
bytes := int64(size) * units.KiB
switch parts[0] {
case "MemTotal:":
meminfo.MemTotal = bytes
case "MemFree:":
meminfo.MemFree = bytes
case "MemAvailable:":
memAvailable = bytes
case "SwapTotal:":
meminfo.SwapTotal = bytes
case "SwapFree:":
meminfo.SwapFree = bytes
}
}
if memAvailable != -1 {
meminfo.MemFree = memAvailable
}
// Handle errors that may have occurred during the reading of the file.
if err := scanner.Err(); err != nil {
return nil, err
}
return meminfo, nil
}

@ -0,0 +1,8 @@
// +build !linux,!windows
package system // import "github.com/docker/docker/pkg/system"
// ReadMemInfo is not supported on platforms other than linux and windows.
func ReadMemInfo() (*MemInfo, error) {
return nil, ErrNotSupportedPlatform
}

@ -0,0 +1,45 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"unsafe"
"golang.org/x/sys/windows"
)
var (
modkernel32 = windows.NewLazySystemDLL("kernel32.dll")
procGlobalMemoryStatusEx = modkernel32.NewProc("GlobalMemoryStatusEx")
)
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa366589(v=vs.85).aspx
// https://msdn.microsoft.com/en-us/library/windows/desktop/aa366770(v=vs.85).aspx
type memorystatusex struct {
dwLength uint32
dwMemoryLoad uint32
ullTotalPhys uint64
ullAvailPhys uint64
ullTotalPageFile uint64
ullAvailPageFile uint64
ullTotalVirtual uint64
ullAvailVirtual uint64
ullAvailExtendedVirtual uint64
}
// ReadMemInfo retrieves memory statistics of the host system and returns a
// MemInfo type.
func ReadMemInfo() (*MemInfo, error) {
msi := &memorystatusex{
dwLength: 64,
}
r1, _, _ := procGlobalMemoryStatusEx.Call(uintptr(unsafe.Pointer(msi)))
if r1 == 0 {
return &MemInfo{}, nil
}
return &MemInfo{
MemTotal: int64(msi.ullTotalPhys),
MemFree: int64(msi.ullAvailPhys),
SwapTotal: int64(msi.ullTotalPageFile),
SwapFree: int64(msi.ullAvailPageFile),
}, nil
}

@ -0,0 +1,22 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
import (
"golang.org/x/sys/unix"
)
// Mknod creates a filesystem node (file, device special file or named pipe) named path
// with attributes specified by mode and dev.
func Mknod(path string, mode uint32, dev int) error {
return unix.Mknod(path, mode, dev)
}
// Mkdev is used to build the value of linux devices (in /dev/) which specifies major
// and minor number of the newly created device special file.
// Linux device nodes are a bit weird due to backwards compat with 16 bit device nodes.
// They are, from low to high: the lower 8 bits of the minor, then 12 bits of the major,
// then the top 12 bits of the minor.
func Mkdev(major int64, minor int64) uint32 {
return uint32(unix.Mkdev(uint32(major), uint32(minor)))
}

@ -0,0 +1,11 @@
package system // import "github.com/docker/docker/pkg/system"
// Mknod is not implemented on Windows.
func Mknod(path string, mode uint32, dev int) error {
return ErrNotSupportedPlatform
}
// Mkdev is not implemented on Windows.
func Mkdev(major int64, minor int64) uint32 {
panic("Mkdev not implemented on Windows.")
}

@ -0,0 +1,60 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"fmt"
"path/filepath"
"runtime"
"strings"
"github.com/containerd/continuity/pathdriver"
)
const defaultUnixPathEnv = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
// DefaultPathEnv is unix style list of directories to search for
// executables. Each directory is separated from the next by a colon
// ':' character .
func DefaultPathEnv(os string) string {
if runtime.GOOS == "windows" {
if os != runtime.GOOS {
return defaultUnixPathEnv
}
// Deliberately empty on Windows containers on Windows as the default path will be set by
// the container. Docker has no context of what the default path should be.
return ""
}
return defaultUnixPathEnv
}
// CheckSystemDriveAndRemoveDriveLetter verifies that a path, if it includes a drive letter,
// is the system drive.
// On Linux: this is a no-op.
// On Windows: this does the following>
// CheckSystemDriveAndRemoveDriveLetter verifies and manipulates a Windows path.
// This is used, for example, when validating a user provided path in docker cp.
// If a drive letter is supplied, it must be the system drive. The drive letter
// is always removed. Also, it translates it to OS semantics (IOW / to \). We
// need the path in this syntax so that it can ultimately be concatenated with
// a Windows long-path which doesn't support drive-letters. Examples:
// C: --> Fail
// C:\ --> \
// a --> a
// /a --> \a
// d:\ --> Fail
func CheckSystemDriveAndRemoveDriveLetter(path string, driver pathdriver.PathDriver) (string, error) {
if runtime.GOOS != "windows" || LCOWSupported() {
return path, nil
}
if len(path) == 2 && string(path[1]) == ":" {
return "", fmt.Errorf("No relative path specified in %q", path)
}
if !driver.IsAbs(path) || len(path) < 2 {
return filepath.FromSlash(path), nil
}
if string(path[1]) == ":" && !strings.EqualFold(string(path[0]), "c") {
return "", fmt.Errorf("The specified path is not on the system drive (C:)")
}
return filepath.FromSlash(path[2:]), nil
}

@ -0,0 +1,10 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
// GetLongPathName converts Windows short pathnames to full pathnames.
// For example C:\Users\ADMIN~1 --> C:\Users\Administrator.
// It is a no-op on non-Windows platforms
func GetLongPathName(path string) (string, error) {
return path, nil
}

@ -0,0 +1,24 @@
package system // import "github.com/docker/docker/pkg/system"
import "syscall"
// GetLongPathName converts Windows short pathnames to full pathnames.
// For example C:\Users\ADMIN~1 --> C:\Users\Administrator.
// It is a no-op on non-Windows platforms
func GetLongPathName(path string) (string, error) {
// See https://groups.google.com/forum/#!topic/golang-dev/1tufzkruoTg
p := syscall.StringToUTF16(path)
b := p // GetLongPathName says we can reuse buffer
n, err := syscall.GetLongPathName(&p[0], &b[0], uint32(len(b)))
if err != nil {
return "", err
}
if n > uint32(len(b)) {
b = make([]uint16, n)
_, err = syscall.GetLongPathName(&p[0], &b[0], uint32(len(b)))
if err != nil {
return "", err
}
}
return syscall.UTF16ToString(b), nil
}

@ -0,0 +1,24 @@
// +build linux freebsd darwin
package system // import "github.com/docker/docker/pkg/system"
import (
"syscall"
"golang.org/x/sys/unix"
)
// IsProcessAlive returns true if process with a given pid is running.
func IsProcessAlive(pid int) bool {
err := unix.Kill(pid, syscall.Signal(0))
if err == nil || err == unix.EPERM {
return true
}
return false
}
// KillProcess force-stops a process.
func KillProcess(pid int) {
unix.Kill(pid, unix.SIGKILL)
}

@ -0,0 +1,18 @@
package system // import "github.com/docker/docker/pkg/system"
import "os"
// IsProcessAlive returns true if process with a given pid is running.
func IsProcessAlive(pid int) bool {
_, err := os.FindProcess(pid)
return err == nil
}
// KillProcess force-stops a process.
func KillProcess(pid int) {
p, err := os.FindProcess(pid)
if err == nil {
p.Kill()
}
}

@ -0,0 +1,80 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"syscall"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/pkg/errors"
)
// EnsureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
// often be remedied.
// Only use `EnsureRemoveAll` if you really want to make every effort to remove
// a directory.
//
// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
// can be a race between reading directory entries and then actually attempting
// to remove everything in the directory.
// These types of errors do not need to be returned since it's ok for the dir to
// be gone we can just retry the remove operation.
//
// This should not return a `os.ErrNotExist` kind of error under any circumstances
func EnsureRemoveAll(dir string) error {
notExistErr := make(map[string]bool)
// track retries
exitOnErr := make(map[string]int)
maxRetry := 50
// Attempt to unmount anything beneath this dir first
mount.RecursiveUnmount(dir)
for {
err := os.RemoveAll(dir)
if err == nil {
return nil
}
pe, ok := err.(*os.PathError)
if !ok {
return err
}
if os.IsNotExist(err) {
if notExistErr[pe.Path] {
return err
}
notExistErr[pe.Path] = true
// There is a race where some subdir can be removed but after the parent
// dir entries have been read.
// So the path could be from `os.Remove(subdir)`
// If the reported non-existent path is not the passed in `dir` we
// should just retry, but otherwise return with no error.
if pe.Path == dir {
return nil
}
continue
}
if pe.Err != syscall.EBUSY {
return err
}
if mounted, _ := mount.Mounted(pe.Path); mounted {
if e := mount.Unmount(pe.Path); e != nil {
if mounted, _ := mount.Mounted(pe.Path); mounted {
return errors.Wrapf(e, "error while removing %s", dir)
}
}
}
if exitOnErr[pe.Path] == maxRetry {
return err
}
exitOnErr[pe.Path]++
time.Sleep(100 * time.Millisecond)
}
}

@ -0,0 +1,13 @@
package system // import "github.com/docker/docker/pkg/system"
import "syscall"
// fromStatT converts a syscall.Stat_t type to a system.Stat_t type
func fromStatT(s *syscall.Stat_t) (*StatT, error) {
return &StatT{size: s.Size,
mode: uint32(s.Mode),
uid: s.Uid,
gid: s.Gid,
rdev: uint64(s.Rdev),
mtim: s.Mtimespec}, nil
}

@ -0,0 +1,13 @@
package system // import "github.com/docker/docker/pkg/system"
import "syscall"
// fromStatT converts a syscall.Stat_t type to a system.Stat_t type
func fromStatT(s *syscall.Stat_t) (*StatT, error) {
return &StatT{size: s.Size,
mode: uint32(s.Mode),
uid: s.Uid,
gid: s.Gid,
rdev: uint64(s.Rdev),
mtim: s.Mtimespec}, nil
}

@ -0,0 +1,20 @@
package system // import "github.com/docker/docker/pkg/system"
import "syscall"
// fromStatT converts a syscall.Stat_t type to a system.Stat_t type
func fromStatT(s *syscall.Stat_t) (*StatT, error) {
return &StatT{size: s.Size,
mode: s.Mode,
uid: s.Uid,
gid: s.Gid,
// the type is 32bit on mips
rdev: uint64(s.Rdev), // nolint: unconvert
mtim: s.Mtim}, nil
}
// FromStatT converts a syscall.Stat_t type to a system.Stat_t type
// This is exposed on Linux as pkg/archive/changes uses it.
func FromStatT(s *syscall.Stat_t) (*StatT, error) {
return fromStatT(s)
}

@ -0,0 +1,13 @@
package system // import "github.com/docker/docker/pkg/system"
import "syscall"
// fromStatT converts a syscall.Stat_t type to a system.Stat_t type
func fromStatT(s *syscall.Stat_t) (*StatT, error) {
return &StatT{size: s.Size,
mode: uint32(s.Mode),
uid: s.Uid,
gid: s.Gid,
rdev: uint64(s.Rdev),
mtim: s.Mtim}, nil
}

@ -0,0 +1,66 @@
// +build !windows
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"syscall"
)
// StatT type contains status of a file. It contains metadata
// like permission, owner, group, size, etc about a file.
type StatT struct {
mode uint32
uid uint32
gid uint32
rdev uint64
size int64
mtim syscall.Timespec
}
// Mode returns file's permission mode.
func (s StatT) Mode() uint32 {
return s.mode
}
// UID returns file's user id of owner.
func (s StatT) UID() uint32 {
return s.uid
}
// GID returns file's group id of owner.
func (s StatT) GID() uint32 {
return s.gid
}
// Rdev returns file's device ID (if it's special file).
func (s StatT) Rdev() uint64 {
return s.rdev
}
// Size returns file's size.
func (s StatT) Size() int64 {
return s.size
}
// Mtim returns file's last modification time.
func (s StatT) Mtim() syscall.Timespec {
return s.mtim
}
// IsDir reports whether s describes a directory.
func (s StatT) IsDir() bool {
return s.mode&syscall.S_IFDIR != 0
}
// Stat takes a path to a file and returns
// a system.StatT type pertaining to that file.
//
// Throws an error if the file does not exist
func Stat(path string) (*StatT, error) {
s := &syscall.Stat_t{}
if err := syscall.Stat(path, s); err != nil {
return nil, &os.PathError{Op: "Stat", Path: path, Err: err}
}
return fromStatT(s)
}

@ -0,0 +1,49 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"os"
"time"
)
// StatT type contains status of a file. It contains metadata
// like permission, size, etc about a file.
type StatT struct {
mode os.FileMode
size int64
mtim time.Time
}
// Size returns file's size.
func (s StatT) Size() int64 {
return s.size
}
// Mode returns file's permission mode.
func (s StatT) Mode() os.FileMode {
return os.FileMode(s.mode)
}
// Mtim returns file's last modification time.
func (s StatT) Mtim() time.Time {
return time.Time(s.mtim)
}
// Stat takes a path to a file and returns
// a system.StatT type pertaining to that file.
//
// Throws an error if the file does not exist
func Stat(path string) (*StatT, error) {
fi, err := os.Stat(path)
if err != nil {
return nil, err
}
return fromStatT(&fi)
}
// fromStatT converts a os.FileInfo type to a system.StatT type
func fromStatT(fi *os.FileInfo) (*StatT, error) {
return &StatT{
size: (*fi).Size(),
mode: (*fi).Mode(),
mtim: (*fi).ModTime()}, nil
}

@ -0,0 +1,17 @@
// +build linux freebsd
package system // import "github.com/docker/docker/pkg/system"
import "golang.org/x/sys/unix"
// Unmount is a platform-specific helper function to call
// the unmount syscall.
func Unmount(dest string) error {
return unix.Unmount(dest, 0)
}
// CommandLineToArgv should not be used on Unix.
// It simply returns commandLine in the only element in the returned array.
func CommandLineToArgv(commandLine string) ([]string, error) {
return []string{commandLine}, nil
}

@ -0,0 +1,176 @@
package system // import "github.com/docker/docker/pkg/system"
import (
"fmt"
"syscall"
"unsafe"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
const (
OWNER_SECURITY_INFORMATION = 0x00000001
GROUP_SECURITY_INFORMATION = 0x00000002
DACL_SECURITY_INFORMATION = 0x00000004
SACL_SECURITY_INFORMATION = 0x00000008
LABEL_SECURITY_INFORMATION = 0x00000010
ATTRIBUTE_SECURITY_INFORMATION = 0x00000020
SCOPE_SECURITY_INFORMATION = 0x00000040
PROCESS_TRUST_LABEL_SECURITY_INFORMATION = 0x00000080
ACCESS_FILTER_SECURITY_INFORMATION = 0x00000100
BACKUP_SECURITY_INFORMATION = 0x00010000
PROTECTED_DACL_SECURITY_INFORMATION = 0x80000000
PROTECTED_SACL_SECURITY_INFORMATION = 0x40000000
UNPROTECTED_DACL_SECURITY_INFORMATION = 0x20000000
UNPROTECTED_SACL_SECURITY_INFORMATION = 0x10000000
)
const (
SE_UNKNOWN_OBJECT_TYPE = iota
SE_FILE_OBJECT
SE_SERVICE
SE_PRINTER
SE_REGISTRY_KEY
SE_LMSHARE
SE_KERNEL_OBJECT
SE_WINDOW_OBJECT
SE_DS_OBJECT
SE_DS_OBJECT_ALL
SE_PROVIDER_DEFINED_OBJECT
SE_WMIGUID_OBJECT
SE_REGISTRY_WOW64_32KEY
)
const (
SeTakeOwnershipPrivilege = "SeTakeOwnershipPrivilege"
)
const (
ContainerAdministratorSidString = "S-1-5-93-2-1"
ContainerUserSidString = "S-1-5-93-2-2"
)
var (
ntuserApiset = windows.NewLazyDLL("ext-ms-win-ntuser-window-l1-1-0")
modadvapi32 = windows.NewLazySystemDLL("advapi32.dll")
procGetVersionExW = modkernel32.NewProc("GetVersionExW")
procSetNamedSecurityInfo = modadvapi32.NewProc("SetNamedSecurityInfoW")
procGetSecurityDescriptorDacl = modadvapi32.NewProc("GetSecurityDescriptorDacl")
)
// OSVersion is a wrapper for Windows version information
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms724439(v=vs.85).aspx
type OSVersion struct {
Version uint32
MajorVersion uint8
MinorVersion uint8
Build uint16
}
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms724833(v=vs.85).aspx
type osVersionInfoEx struct {
OSVersionInfoSize uint32
MajorVersion uint32
MinorVersion uint32
BuildNumber uint32
PlatformID uint32
CSDVersion [128]uint16
ServicePackMajor uint16
ServicePackMinor uint16
SuiteMask uint16
ProductType byte
Reserve byte
}
// GetOSVersion gets the operating system version on Windows. Note that
// docker.exe must be manifested to get the correct version information.
func GetOSVersion() OSVersion {
var err error
osv := OSVersion{}
osv.Version, err = windows.GetVersion()
if err != nil {
// GetVersion never fails.
panic(err)
}
osv.MajorVersion = uint8(osv.Version & 0xFF)
osv.MinorVersion = uint8(osv.Version >> 8 & 0xFF)
osv.Build = uint16(osv.Version >> 16)
return osv
}
func (osv OSVersion) ToString() string {
return fmt.Sprintf("%d.%d.%d", osv.MajorVersion, osv.MinorVersion, osv.Build)
}
// IsWindowsClient returns true if the SKU is client
// @engine maintainers - this function should not be removed or modified as it
// is used to enforce licensing restrictions on Windows.
func IsWindowsClient() bool {
osviex := &osVersionInfoEx{OSVersionInfoSize: 284}
r1, _, err := procGetVersionExW.Call(uintptr(unsafe.Pointer(osviex)))
if r1 == 0 {
logrus.Warnf("GetVersionExW failed - assuming server SKU: %v", err)
return false
}
const verNTWorkstation = 0x00000001
return osviex.ProductType == verNTWorkstation
}
// Unmount is a platform-specific helper function to call
// the unmount syscall. Not supported on Windows
func Unmount(dest string) error {
return nil
}
// CommandLineToArgv wraps the Windows syscall to turn a commandline into an argument array.
func CommandLineToArgv(commandLine string) ([]string, error) {
var argc int32
argsPtr, err := windows.UTF16PtrFromString(commandLine)
if err != nil {
return nil, err
}
argv, err := windows.CommandLineToArgv(argsPtr, &argc)
if err != nil {
return nil, err
}
defer windows.LocalFree(windows.Handle(uintptr(unsafe.Pointer(argv))))
newArgs := make([]string, argc)
for i, v := range (*argv)[:argc] {
newArgs[i] = string(windows.UTF16ToString((*v)[:]))
}
return newArgs, nil
}
// HasWin32KSupport determines whether containers that depend on win32k can
// run on this machine. Win32k is the driver used to implement windowing.
func HasWin32KSupport() bool {
// For now, check for ntuser API support on the host. In the future, a host
// may support win32k in containers even if the host does not support ntuser
// APIs.
return ntuserApiset.Load() == nil
}
func SetNamedSecurityInfo(objectName *uint16, objectType uint32, securityInformation uint32, sidOwner *windows.SID, sidGroup *windows.SID, dacl *byte, sacl *byte) (result error) {
r0, _, _ := syscall.Syscall9(procSetNamedSecurityInfo.Addr(), 7, uintptr(unsafe.Pointer(objectName)), uintptr(objectType), uintptr(securityInformation), uintptr(unsafe.Pointer(sidOwner)), uintptr(unsafe.Pointer(sidGroup)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(sacl)), 0, 0)
if r0 != 0 {
result = syscall.Errno(r0)
}
return
}
func GetSecurityDescriptorDacl(securityDescriptor *byte, daclPresent *uint32, dacl **byte, daclDefaulted *uint32) (result error) {
r1, _, e1 := syscall.Syscall6(procGetSecurityDescriptorDacl.Addr(), 4, uintptr(unsafe.Pointer(securityDescriptor)), uintptr(unsafe.Pointer(daclPresent)), uintptr(unsafe.Pointer(dacl)), uintptr(unsafe.Pointer(daclDefaulted)), 0, 0)
if r1 == 0 {
if e1 != 0 {
result = syscall.Errno(e1)
} else {
result = syscall.EINVAL
}
}
return
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save