kubernetesk8s.io/kubernetes/pkg/kubelet/cm/devicemanager Index | Files | Directories

package devicemanager

import "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"

Index

Types

type ActivePodsFunc

type ActivePodsFunc func() []*v1.Pod

ActivePodsFunc is a function that returns a list of pods to reconcile.

type DeviceInstances

type DeviceInstances map[string]pluginapi.Device

DeviceInstances is a mapping device name -> plugin device data

type DeviceRunContainerOptions

type DeviceRunContainerOptions struct {
	// The environment variables list.
	Envs []kubecontainer.EnvVar
	// The mounts for the container.
	Mounts []kubecontainer.Mount
	// The host devices mapped into the container.
	Devices []kubecontainer.DeviceInfo
	// The Annotations for the container
	Annotations []kubecontainer.Annotation
}

DeviceRunContainerOptions contains the combined container runtime settings to consume its allocated devices.

type Manager

type Manager interface {
	// Start starts device plugin registration service.
	Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error

	// Allocate configures and assigns devices to a container in a pod. From
	// the requested device resources, Allocate will communicate with the
	// owning device plugin to allow setup procedures to take place, and for
	// the device plugin to provide runtime settings to use the device
	// (environment variables, mount points and device files).
	Allocate(pod *v1.Pod, container *v1.Container) error

	// UpdatePluginResources updates node resources based on devices already
	// allocated to pods. The node object is provided for the device manager to
	// update the node capacity to reflect the currently available devices.
	UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error

	// Stop stops the manager.
	Stop() error

	// GetDeviceRunContainerOptions checks whether we have cached containerDevices
	// for the passed-in <pod, container> and returns its DeviceRunContainerOptions
	// for the found one. An empty struct is returned in case no cached state is found.
	GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error)

	// GetCapacity returns the amount of available device plugin resource capacity, resource allocatable
	// and inactive device plugin resources previously registered on the node.
	GetCapacity() (v1.ResourceList, v1.ResourceList, []string)
	GetWatcherHandler() cache.PluginHandler

	// GetDevices returns information about the devices assigned to pods and containers
	GetDevices(podUID, containerName string) ResourceDeviceInstances

	// GetAllocatableDevices returns information about all the devices known to the manager
	GetAllocatableDevices() ResourceDeviceInstances

	// ShouldResetExtendedResourceCapacity returns whether the extended resources should be reset or not,
	// depending on the checkpoint file availability. Absence of the checkpoint file strongly indicates
	// the node has been recreated.
	ShouldResetExtendedResourceCapacity() bool

	// TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface
	// and is consulted to make Topology aware resource alignments
	GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint

	// TopologyManager HintProvider provider indicates the Device Manager implements the Topology Manager Interface
	// and is consulted to make Topology aware resource alignments per Pod
	GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint

	// UpdateAllocatedDevices frees any Devices that are bound to terminated pods.
	UpdateAllocatedDevices()
}

Manager manages all the Device Plugins running on a node.

type ManagerImpl

type ManagerImpl struct {
	// contains filtered or unexported fields
}

ManagerImpl is the structure in charge of managing Device Plugins.

func NewManagerImpl

func NewManagerImpl(topology []cadvisorapi.Node, topologyAffinityStore topologymanager.Store) (*ManagerImpl, error)

NewManagerImpl creates a new manager.

func (*ManagerImpl) Allocate

func (m *ManagerImpl) Allocate(pod *v1.Pod, container *v1.Container) error

Allocate is the call that you can use to allocate a set of devices from the registered device plugins.

func (*ManagerImpl) DeRegisterPlugin

func (m *ManagerImpl) DeRegisterPlugin(pluginName string)

DeRegisterPlugin deregisters the plugin TODO work on the behavior for deregistering plugins e.g: Should we delete the resource

func (*ManagerImpl) GetAllocatableDevices

func (m *ManagerImpl) GetAllocatableDevices() ResourceDeviceInstances

GetAllocatableDevices returns information about all the devices known to the manager

func (*ManagerImpl) GetCapacity

func (m *ManagerImpl) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)

GetCapacity is expected to be called when Kubelet updates its node status. The first returned variable contains the registered device plugin resource capacity. The second returned variable contains the registered device plugin resource allocatable. The third returned variable contains previously registered resources that are no longer active. Kubelet uses this information to update resource capacity/allocatable in its node status. After the call, device plugin can remove the inactive resources from its internal list as the change is already reflected in Kubelet node status. Note in the special case after Kubelet restarts, device plugin resource capacities can temporarily drop to zero till corresponding device plugins re-register. This is OK because cm.UpdatePluginResource() run during predicate Admit guarantees we adjust nodeinfo capacity for already allocated pods so that they can continue to run. However, new pods requiring device plugin resources will not be scheduled till device plugin re-registers.

func (*ManagerImpl) GetDeviceRunContainerOptions

func (m *ManagerImpl) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error)

GetDeviceRunContainerOptions checks whether we have cached containerDevices for the passed-in <pod, container> and returns its DeviceRunContainerOptions for the found one. An empty struct is returned in case no cached state is found.

func (*ManagerImpl) GetDevices

func (m *ManagerImpl) GetDevices(podUID, containerName string) ResourceDeviceInstances

GetDevices returns the devices used by the specified container

func (*ManagerImpl) GetPodTopologyHints

func (m *ManagerImpl) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint

GetPodTopologyHints implements the topologymanager.HintProvider Interface which ensures the Device Manager is consulted when Topology Aware Hints for Pod are created.

func (*ManagerImpl) GetTopologyHints

func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint

GetTopologyHints implements the TopologyManager HintProvider Interface which ensures the Device Manager is consulted when Topology Aware Hints for each container are created.

func (*ManagerImpl) GetWatcherHandler

func (m *ManagerImpl) GetWatcherHandler() cache.PluginHandler

GetWatcherHandler returns the plugin handler

func (*ManagerImpl) Register

Register registers a device plugin.

func (*ManagerImpl) RegisterPlugin

func (m *ManagerImpl) RegisterPlugin(pluginName string, endpoint string, versions []string) error

RegisterPlugin starts the endpoint and registers it TODO: Start the endpoint and wait for the First ListAndWatch call

before registering the plugin

func (*ManagerImpl) ShouldResetExtendedResourceCapacity

func (m *ManagerImpl) ShouldResetExtendedResourceCapacity() bool

ShouldResetExtendedResourceCapacity returns whether the extended resources should be zeroed or not, depending on whether the node has been recreated. Absence of the checkpoint file strongly indicates the node has been recreated.

func (*ManagerImpl) Start

func (m *ManagerImpl) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error

Start starts the Device Plugin Manager and start initialization of podDevices and allocatedDevices information from checkpointed state and starts device plugin registration service.

func (*ManagerImpl) Stop

func (m *ManagerImpl) Stop() error

Stop is the function that can stop the gRPC server. Can be called concurrently, more than once, and is safe to call without a prior Start.

func (*ManagerImpl) UpdateAllocatedDevices

func (m *ManagerImpl) UpdateAllocatedDevices()

UpdateAllocatedDevices frees any Devices that are bound to terminated pods.

func (*ManagerImpl) UpdatePluginResources

func (m *ManagerImpl) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error

UpdatePluginResources updates node resources based on devices already allocated to pods.

func (*ManagerImpl) ValidatePlugin

func (m *ManagerImpl) ValidatePlugin(pluginName string, endpoint string, versions []string) error

ValidatePlugin validates a plugin if the version is correct and the name has the format of an extended resource

type ManagerStub

type ManagerStub struct{}

ManagerStub provides a simple stub implementation for the Device Manager.

func NewManagerStub

func NewManagerStub() (*ManagerStub, error)

NewManagerStub creates a ManagerStub.

func (*ManagerStub) Allocate

func (h *ManagerStub) Allocate(pod *v1.Pod, container *v1.Container) error

Allocate simply returns nil.

func (*ManagerStub) GetAllocatableDevices

func (h *ManagerStub) GetAllocatableDevices() ResourceDeviceInstances

GetAllocatableDevices returns nothing

func (*ManagerStub) GetCapacity

func (h *ManagerStub) GetCapacity() (v1.ResourceList, v1.ResourceList, []string)

GetCapacity simply returns nil capacity and empty removed resource list.

func (*ManagerStub) GetDeviceRunContainerOptions

func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error)

GetDeviceRunContainerOptions simply returns nil.

func (*ManagerStub) GetDevices

func (h *ManagerStub) GetDevices(_, _ string) ResourceDeviceInstances

GetDevices returns nil

func (*ManagerStub) GetPodTopologyHints

func (h *ManagerStub) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint

GetPodTopologyHints returns an empty TopologyHint map

func (*ManagerStub) GetTopologyHints

func (h *ManagerStub) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint

GetTopologyHints returns an empty TopologyHint map

func (*ManagerStub) GetWatcherHandler

func (h *ManagerStub) GetWatcherHandler() cache.PluginHandler

GetWatcherHandler returns plugin watcher interface

func (*ManagerStub) ShouldResetExtendedResourceCapacity

func (h *ManagerStub) ShouldResetExtendedResourceCapacity() bool

ShouldResetExtendedResourceCapacity returns false

func (*ManagerStub) Start

func (h *ManagerStub) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error

Start simply returns nil.

func (*ManagerStub) Stop

func (h *ManagerStub) Stop() error

Stop simply returns nil.

func (*ManagerStub) UpdateAllocatedDevices

func (h *ManagerStub) UpdateAllocatedDevices()

UpdateAllocatedDevices returns nothing

func (*ManagerStub) UpdatePluginResources

func (h *ManagerStub) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error

UpdatePluginResources simply returns nil.

type PodReusableDevices

type PodReusableDevices map[string]map[string]sets.String

PodReusableDevices is a map by pod name of devices to reuse.

type ResourceDeviceInstances

type ResourceDeviceInstances map[string]DeviceInstances

ResourceDeviceInstances is a mapping resource name -> DeviceInstances

func NewResourceDeviceInstances

func NewResourceDeviceInstances() ResourceDeviceInstances

func (ResourceDeviceInstances) Clone

type Stub

type Stub struct {
	// contains filtered or unexported fields
}

Stub implementation for DevicePlugin.

func NewDevicePluginStub

func NewDevicePluginStub(devs []*pluginapi.Device, socket string, name string, preStartContainerFlag bool, getPreferredAllocationFlag bool) *Stub

NewDevicePluginStub returns an initialized DevicePlugin Stub.

func (*Stub) Allocate

Allocate does a mock allocation

func (*Stub) GetDevicePluginOptions

func (m *Stub) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error)

GetDevicePluginOptions returns DevicePluginOptions settings for the device plugin.

func (*Stub) GetInfo

GetInfo is the RPC which return pluginInfo

func (*Stub) GetPreferredAllocation

GetPreferredAllocation gets the preferred allocation from a set of available devices

func (*Stub) ListAndWatch

ListAndWatch lists devices and update that list according to the Update call

func (*Stub) NotifyRegistrationStatus

func (m *Stub) NotifyRegistrationStatus(ctx context.Context, status *watcherapi.RegistrationStatus) (*watcherapi.RegistrationStatusResponse, error)

NotifyRegistrationStatus receives the registration notification from watcher

func (*Stub) PreStartContainer

PreStartContainer resets the devices received

func (*Stub) Register

func (m *Stub) Register(kubeletEndpoint, resourceName string, pluginSockDir string) error

Register registers the device plugin for the given resourceName with Kubelet.

func (*Stub) SetAllocFunc

func (m *Stub) SetAllocFunc(f stubAllocFunc)

SetAllocFunc sets allocFunc of the device plugin

func (*Stub) SetGetPreferredAllocFunc

func (m *Stub) SetGetPreferredAllocFunc(f stubGetPreferredAllocFunc)

SetGetPreferredAllocFunc sets allocFunc of the device plugin

func (*Stub) Start

func (m *Stub) Start() error

Start starts the gRPC server of the device plugin. Can only be called once.

func (*Stub) Stop

func (m *Stub) Stop() error

Stop stops the gRPC server. Can be called without a prior Start and more than once. Not safe to be called concurrently by different goroutines!

func (*Stub) Update

func (m *Stub) Update(devs []*pluginapi.Device)

Update allows the device plugin to send new devices through ListAndWatch

Source Files

device_plugin_stub.go endpoint.go manager.go manager_stub.go pod_devices.go topology_hints.go types.go

Directories

PathSynopsis
pkg/kubelet/cm/devicemanager/checkpoint
Version
v1.22.9
Published
Apr 13, 2022
Platform
js/wasm
Imports
35 packages
Last checked
15 seconds ago

Tools for package owners.