package metrics
import "k8s.io/kubernetes/pkg/kubelet/metrics"
Index ¶
- Constants
- Variables
- func GetGather() metrics.Gatherer
- func GetImageSizeBucket(sizeInBytes uint64) string
- func Register(collectors ...metrics.StableCollector)
- func SetNodeName(name types.NodeName)
- func SinceInSeconds(start time.Time) float64
Constants ¶
const ( FirstNetworkPodStartSLIDurationKey = "first_network_pod_start_sli_duration_seconds" KubeletSubsystem = "kubelet" DRASubsystem = "dra" NodeNameKey = "node_name" NodeLabelKey = "node" NodeStartupPreKubeletKey = "node_startup_pre_kubelet_duration_seconds" NodeStartupPreRegistrationKey = "node_startup_pre_registration_duration_seconds" NodeStartupRegistrationKey = "node_startup_registration_duration_seconds" NodeStartupPostRegistrationKey = "node_startup_post_registration_duration_seconds" NodeStartupKey = "node_startup_duration_seconds" PodWorkerDurationKey = "pod_worker_duration_seconds" PodStartDurationKey = "pod_start_duration_seconds" PodStartSLIDurationKey = "pod_start_sli_duration_seconds" PodStartTotalDurationKey = "pod_start_total_duration_seconds" CgroupManagerOperationsKey = "cgroup_manager_duration_seconds" PodWorkerStartDurationKey = "pod_worker_start_duration_seconds" PodStatusSyncDurationKey = "pod_status_sync_duration_seconds" PLEGRelistDurationKey = "pleg_relist_duration_seconds" PLEGDiscardEventsKey = "pleg_discard_events" PLEGRelistIntervalKey = "pleg_relist_interval_seconds" PLEGLastSeenKey = "pleg_last_seen_seconds" EventedPLEGConnErrKey = "evented_pleg_connection_error_count" EventedPLEGConnKey = "evented_pleg_connection_success_count" EventedPLEGConnLatencyKey = "evented_pleg_connection_latency_seconds" EvictionsKey = "evictions" EvictionStatsAgeKey = "eviction_stats_age_seconds" PreemptionsKey = "preemptions" VolumeStatsCapacityBytesKey = "volume_stats_capacity_bytes" VolumeStatsAvailableBytesKey = "volume_stats_available_bytes" VolumeStatsUsedBytesKey = "volume_stats_used_bytes" VolumeStatsInodesKey = "volume_stats_inodes" VolumeStatsInodesFreeKey = "volume_stats_inodes_free" VolumeStatsInodesUsedKey = "volume_stats_inodes_used" VolumeStatsHealthStatusAbnormalKey = "volume_stats_health_status_abnormal" RunningPodsKey = "running_pods" RunningContainersKey = "running_containers" DesiredPodCountKey = "desired_pods" ActivePodCountKey = "active_pods" MirrorPodCountKey = "mirror_pods" WorkingPodCountKey = "working_pods" OrphanedRuntimePodTotalKey = "orphaned_runtime_pods_total" RestartedPodTotalKey = "restarted_pods_total" ImagePullDurationKey = "image_pull_duration_seconds" CgroupVersionKey = "cgroup_version" // Metrics keys of remote runtime operations RuntimeOperationsKey = "runtime_operations_total" RuntimeOperationsDurationKey = "runtime_operations_duration_seconds" RuntimeOperationsErrorsKey = "runtime_operations_errors_total" // Metrics keys of device plugin operations DevicePluginRegistrationCountKey = "device_plugin_registration_total" DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds" // Metrics keys of pod resources operations PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total" PodResourcesEndpointRequestsListKey = "pod_resources_endpoint_requests_list" PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable" PodResourcesEndpointErrorsListKey = "pod_resources_endpoint_errors_list" PodResourcesEndpointErrorsGetAllocatableKey = "pod_resources_endpoint_errors_get_allocatable" PodResourcesEndpointRequestsGetKey = "pod_resources_endpoint_requests_get" PodResourcesEndpointErrorsGetKey = "pod_resources_endpoint_errors_get" // Metrics keys for RuntimeClass RunPodSandboxDurationKey = "run_podsandbox_duration_seconds" RunPodSandboxErrorsKey = "run_podsandbox_errors_total" // Metrics to keep track of total number of Pods and Containers started StartedPodsTotalKey = "started_pods_total" StartedPodsErrorsTotalKey = "started_pods_errors_total" StartedContainersTotalKey = "started_containers_total" StartedContainersErrorsTotalKey = "started_containers_errors_total" // Metrics to track HostProcess container usage by this kubelet StartedHostProcessContainersTotalKey = "started_host_process_containers_total" StartedHostProcessContainersErrorsTotalKey = "started_host_process_containers_errors_total" // Metrics to track ephemeral container usage by this kubelet ManagedEphemeralContainersKey = "managed_ephemeral_containers" // Metrics to track the CPU manager behavior CPUManagerPinningRequestsTotalKey = "cpu_manager_pinning_requests_total" CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total" = "cpu_manager_shared_pool_size_millicores" CPUManagerExclusiveCPUsAllocationCountKey = "cpu_manager_exclusive_cpu_allocation_count" CPUManagerAllocationPerNUMAKey = "cpu_manager_allocation_per_numa" // Metrics to track the Memory manager behavior MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total" MemoryManagerPinningErrorsTotalKey = "memory_manager_pinning_errors_total" // Metrics to track the Topology manager behavior TopologyManagerAdmissionRequestsTotalKey = "topology_manager_admission_requests_total" TopologyManagerAdmissionErrorsTotalKey = "topology_manager_admission_errors_total" TopologyManagerAdmissionDurationKey = "topology_manager_admission_duration_ms" // Metric for tracking garbage collected images ImageGarbageCollectedTotalKey = "image_garbage_collected_total" // Metric for tracking aligment of compute resources ContainerAlignedComputeResourcesNameKey = "container_aligned_compute_resources_count" ContainerAlignedComputeResourcesFailureNameKey = "container_aligned_compute_resources_failure_count" ContainerAlignedComputeResourcesScopeLabelKey = "scope" ContainerAlignedComputeResourcesBoundaryLabelKey = "boundary" // Metric keys for DRA operations DRAOperationsDurationKey = "operations_duration_seconds" DRAGRPCOperationsDurationKey = "grpc_operations_duration_seconds" // Values used in metric labels Container = "container" InitContainer = "init_container" EphemeralContainer = "ephemeral_container" AlignScopePod = "pod" AlignScopeContainer = "container" AlignedPhysicalCPU = "physical_cpu" AlignedNUMANode = "numa_node" AlignedUncoreCache = "uncore_cache" // Metrics to track kubelet admission rejections. AdmissionRejectionsTotalKey = "admission_rejections_total" // Image Volume metrics ImageVolumeRequestedTotalKey = "image_volume_requested_total" ImageVolumeMountedSucceedTotalKey = "image_volume_mounted_succeed_total" ImageVolumeMountedErrorsTotalKey = "image_volume_mounted_errors_total" )
This const block defines the metric names for the kubelet metrics.
Variables ¶
var ( NodeName = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: NodeNameKey, Help: "The node's name. The count is always 1.", StabilityLevel: metrics.ALPHA, }, []string{NodeLabelKey}, ) ContainersPerPodCount = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: "containers_per_pod_count", Help: "The number of containers per pod.", Buckets: metrics.ExponentialBuckets(1, 2, 5), StabilityLevel: metrics.ALPHA, }, ) PodWorkerDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PodWorkerDurationKey, Help: "Duration in seconds to sync a single pod. Broken down by operation type: create, update, or sync", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, []string{"operation_type"}, ) PodStartDuration = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PodStartDurationKey, Help: "Duration in seconds from kubelet seeing a pod for the first time to the pod starting to run", Buckets: podStartupDurationBuckets, StabilityLevel: metrics.ALPHA, }, ) PodStartSLIDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PodStartSLIDurationKey, Help: "" /* 203 byte string literal not displayed */, Buckets: podStartupDurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{}, ) PodStartTotalDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PodStartTotalDurationKey, Help: "" /* 218 byte string literal not displayed */, Buckets: podStartupDurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{}, ) FirstNetworkPodStartSLIDuration = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: FirstNetworkPodStartSLIDurationKey, Help: "" /* 219 byte string literal not displayed */, StabilityLevel: metrics.INTERNAL, }, ) CgroupManagerDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: CgroupManagerOperationsKey, Help: "Duration in seconds for cgroup manager operations. Broken down by method.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, []string{"operation_type"}, ) PodWorkerStartDuration = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PodWorkerStartDurationKey, Help: "Duration in seconds from kubelet seeing a pod to starting a worker.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, ) PodStatusSyncDuration = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PodStatusSyncDurationKey, Help: "" /* 214 byte string literal not displayed */, Buckets: []float64{0.010, 0.050, 0.100, 0.500, 1, 5, 10, 20, 30, 45, 60}, StabilityLevel: metrics.ALPHA, }, ) PLEGRelistDuration = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PLEGRelistDurationKey, Help: "Duration in seconds for relisting pods in PLEG.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, ) PLEGDiscardEvents = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PLEGDiscardEventsKey, Help: "The number of discard events in PLEG.", StabilityLevel: metrics.ALPHA, }, ) PLEGRelistInterval = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: PLEGRelistIntervalKey, Help: "Interval in seconds between relisting in PLEG.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, ) PLEGLastSeen = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: PLEGLastSeenKey, Help: "Timestamp in seconds when PLEG was last seen active.", StabilityLevel: metrics.ALPHA, }, ) EventedPLEGConnErr = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: EventedPLEGConnErrKey, Help: "The number of errors encountered during the establishment of streaming connection with the CRI runtime.", StabilityLevel: metrics.ALPHA, }, ) EventedPLEGConn = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: EventedPLEGConnKey, Help: "The number of times a streaming client was obtained to receive CRI Events.", StabilityLevel: metrics.ALPHA, }, ) EventedPLEGConnLatency = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: EventedPLEGConnLatencyKey, Help: "The latency of streaming connection with the CRI runtime, measured in seconds.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, ) RuntimeOperations = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: RuntimeOperationsKey, Help: "Cumulative number of runtime operations by operation type.", StabilityLevel: metrics.ALPHA, }, []string{"operation_type"}, ) RuntimeOperationsDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: RuntimeOperationsDurationKey, Help: "Duration in seconds of runtime operations. Broken down by operation type.", Buckets: metrics.ExponentialBuckets(.005, 2.5, 14), StabilityLevel: metrics.ALPHA, }, []string{"operation_type"}, ) RuntimeOperationsErrors = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: RuntimeOperationsErrorsKey, Help: "Cumulative number of runtime operation errors by operation type.", StabilityLevel: metrics.ALPHA, }, []string{"operation_type"}, ) Evictions = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: EvictionsKey, Help: "Cumulative number of pod evictions by eviction signal", StabilityLevel: metrics.ALPHA, }, []string{"eviction_signal"}, ) EvictionStatsAge = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: EvictionStatsAgeKey, Help: "Time between when stats are collected, and when pod is evicted based on those stats by eviction signal", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, []string{"eviction_signal"}, ) Preemptions = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PreemptionsKey, Help: "Cumulative number of pod preemptions by preemption resource", StabilityLevel: metrics.ALPHA, }, []string{"preemption_signal"}, ) DevicePluginRegistrationCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: DevicePluginRegistrationCountKey, Help: "Cumulative number of device plugin registrations. Broken down by resource name.", StabilityLevel: metrics.ALPHA, }, []string{"resource_name"}, ) DevicePluginAllocationDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: DevicePluginAllocationDurationKey, Help: "Duration in seconds to serve a device plugin Allocation request. Broken down by resource name.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, []string{"resource_name"}, ) PodResourcesEndpointRequestsTotalCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointRequestsTotalKey, Help: "Cumulative number of requests to the PodResource endpoint. Broken down by server api version.", StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) PodResourcesEndpointRequestsListCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointRequestsListKey, Help: "Number of requests to the PodResource List endpoint. Broken down by server api version.", StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointRequestsGetAllocatableKey, Help: "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.", StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) PodResourcesEndpointErrorsListCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointErrorsListKey, Help: "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.", StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointErrorsGetAllocatableKey, Help: "" /* 129 byte string literal not displayed */, StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) PodResourcesEndpointRequestsGetCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointRequestsGetKey, Help: "Number of requests to the PodResource Get endpoint. Broken down by server api version.", StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) PodResourcesEndpointErrorsGetCount = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: PodResourcesEndpointErrorsGetKey, Help: "Number of requests to the PodResource Get endpoint which returned error. Broken down by server api version.", StabilityLevel: metrics.ALPHA, }, []string{"server_api_version"}, ) RunPodSandboxDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: RunPodSandboxDurationKey, Help: "Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.", Buckets: metrics.DefBuckets, StabilityLevel: metrics.ALPHA, }, []string{"runtime_handler"}, ) RunPodSandboxErrors = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: RunPodSandboxErrorsKey, Help: "Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.", StabilityLevel: metrics.ALPHA, }, []string{"runtime_handler"}, ) RunningPodCount = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: RunningPodsKey, Help: "Number of pods that have a running pod sandbox", StabilityLevel: metrics.ALPHA, }, ) RunningContainerCount = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: RunningContainersKey, Help: "Number of containers currently running", StabilityLevel: metrics.ALPHA, }, []string{"container_state"}, ) DesiredPodCount = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: DesiredPodCountKey, Help: "The number of pods the kubelet is being instructed to run. static is true if the pod is not from the apiserver.", StabilityLevel: metrics.ALPHA, }, []string{"static"}, ) ActivePodCount = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: ActivePodCountKey, Help: "" /* 158 byte string literal not displayed */, StabilityLevel: metrics.ALPHA, }, []string{"static"}, ) MirrorPodCount = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: MirrorPodCountKey, Help: "The number of mirror pods the kubelet will try to create (one per admitted static pod)", StabilityLevel: metrics.ALPHA, }, ) WorkingPodCount = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: WorkingPodCountKey, Help: "" /* 324 byte string literal not displayed */, StabilityLevel: metrics.ALPHA, }, []string{"lifecycle", "config", "static"}, ) OrphanedRuntimePodTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: OrphanedRuntimePodTotalKey, Help: "" /* 253 byte string literal not displayed */, StabilityLevel: metrics.ALPHA, }, ) RestartedPodTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: RestartedPodTotalKey, Help: "" /* 193 byte string literal not displayed */, StabilityLevel: metrics.ALPHA, }, []string{"static"}, ) StartedPodsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: StartedPodsTotalKey, Help: "Cumulative number of pods started", StabilityLevel: metrics.ALPHA, }, ) StartedPodsErrorsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: StartedPodsErrorsTotalKey, Help: "Cumulative number of errors when starting pods", StabilityLevel: metrics.ALPHA, }, ) StartedContainersTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: StartedContainersTotalKey, Help: "Cumulative number of containers started", StabilityLevel: metrics.ALPHA, }, []string{"container_type"}, ) StartedContainersErrorsTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: StartedContainersErrorsTotalKey, Help: "Cumulative number of errors when starting containers", StabilityLevel: metrics.ALPHA, }, []string{"container_type", "code"}, ) StartedHostProcessContainersTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: StartedHostProcessContainersTotalKey, Help: "Cumulative number of hostprocess containers started. This metric will only be collected on Windows.", StabilityLevel: metrics.ALPHA, }, []string{"container_type"}, ) StartedHostProcessContainersErrorsTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: StartedHostProcessContainersErrorsTotalKey, Help: "Cumulative number of errors when starting hostprocess containers. This metric will only be collected on Windows.", StabilityLevel: metrics.ALPHA, }, []string{"container_type", "code"}, ) ManagedEphemeralContainers = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: ManagedEphemeralContainersKey, Help: "Current number of ephemeral containers in pods managed by this kubelet.", StabilityLevel: metrics.ALPHA, }, ) GracefulShutdownStartTime = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: "graceful_shutdown_start_time_seconds", Help: "Last graceful shutdown start time since unix epoch in seconds", StabilityLevel: metrics.ALPHA, }, ) GracefulShutdownEndTime = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: "graceful_shutdown_end_time_seconds", Help: "Last graceful shutdown end time since unix epoch in seconds", StabilityLevel: metrics.ALPHA, }, ) LifecycleHandlerHTTPFallbacks = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: "lifecycle_handler_http_fallbacks_total", Help: "The number of times lifecycle handlers successfully fell back to http from https.", StabilityLevel: metrics.ALPHA, }, ) CPUManagerPinningRequestsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: CPUManagerPinningRequestsTotalKey, Help: "The number of cpu core allocations which required pinning.", StabilityLevel: metrics.ALPHA, }, ) CPUManagerPinningErrorsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: CPUManagerPinningErrorsTotalKey, Help: "The number of cpu core allocations which required pinning failed.", StabilityLevel: metrics.ALPHA, }, ) = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: CPUManagerSharedPoolSizeMilliCoresKey, Help: "The size of the shared CPU pool for non-guaranteed QoS pods, in millicores.", StabilityLevel: metrics.ALPHA, }, ) CPUManagerExclusiveCPUsAllocationCount = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: CPUManagerExclusiveCPUsAllocationCountKey, Help: "The total number of CPUs exclusively allocated to containers running on this node", StabilityLevel: metrics.ALPHA, }, ) CPUManagerAllocationPerNUMA = metrics.NewGaugeVec( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: CPUManagerAllocationPerNUMAKey, Help: "Number of CPUs allocated per NUMA node", StabilityLevel: metrics.ALPHA, }, []string{AlignedNUMANode}, ) ContainerAlignedComputeResources = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: ContainerAlignedComputeResourcesNameKey, Help: "Cumulative number of aligned compute resources allocated to containers by alignment type.", StabilityLevel: metrics.ALPHA, }, []string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey}, ) ContainerAlignedComputeResourcesFailure = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: ContainerAlignedComputeResourcesFailureNameKey, Help: "Cumulative number of failures to allocate aligned compute resources to containers by alignment type.", StabilityLevel: metrics.ALPHA, }, []string{ContainerAlignedComputeResourcesScopeLabelKey, ContainerAlignedComputeResourcesBoundaryLabelKey}, ) MemoryManagerPinningRequestTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: MemoryManagerPinningRequestsTotalKey, Help: "The number of memory pages allocations which required pinning.", StabilityLevel: metrics.ALPHA, }) MemoryManagerPinningErrorsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: MemoryManagerPinningErrorsTotalKey, Help: "The number of memory pages allocations which required pinning that failed.", StabilityLevel: metrics.ALPHA, }, ) TopologyManagerAdmissionRequestsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: TopologyManagerAdmissionRequestsTotalKey, Help: "The number of admission requests where resources have to be aligned.", StabilityLevel: metrics.ALPHA, }, ) TopologyManagerAdmissionErrorsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: TopologyManagerAdmissionErrorsTotalKey, Help: "The number of admission request failures where resources could not be aligned.", StabilityLevel: metrics.ALPHA, }, ) TopologyManagerAdmissionDuration = metrics.NewHistogram( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: TopologyManagerAdmissionDurationKey, Help: "Duration in milliseconds to serve a pod admission request.", Buckets: metrics.ExponentialBuckets(.05, 2, 15), StabilityLevel: metrics.ALPHA, }, ) OrphanPodCleanedVolumes = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: orphanPodCleanedVolumesKey, Help: "The total number of orphaned Pods whose volumes were cleaned in the last periodic sweep.", StabilityLevel: metrics.ALPHA, }, ) OrphanPodCleanedVolumesErrors = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: orphanPodCleanedVolumesErrorsKey, Help: "The number of orphaned Pods whose volumes failed to be cleaned in the last periodic sweep.", StabilityLevel: metrics.ALPHA, }, ) NodeStartupPreKubeletDuration = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: NodeStartupPreKubeletKey, Help: "Duration in seconds of node startup before kubelet starts.", StabilityLevel: metrics.ALPHA, }, ) NodeStartupPreRegistrationDuration = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: NodeStartupPreRegistrationKey, Help: "Duration in seconds of node startup before registration.", StabilityLevel: metrics.ALPHA, }, ) NodeStartupRegistrationDuration = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: NodeStartupRegistrationKey, Help: "Duration in seconds of node startup during registration.", StabilityLevel: metrics.ALPHA, }, ) NodeStartupPostRegistrationDuration = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: NodeStartupPostRegistrationKey, Help: "Duration in seconds of node startup after registration.", StabilityLevel: metrics.ALPHA, }, ) NodeStartupDuration = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: NodeStartupKey, Help: "Duration in seconds of node startup in total.", StabilityLevel: metrics.ALPHA, }, ) ImageGarbageCollectedTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: ImageGarbageCollectedTotalKey, Help: "Total number of images garbage collected by the kubelet, whether through disk usage or image age.", StabilityLevel: metrics.ALPHA, }, []string{"reason"}, ) ImagePullDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: KubeletSubsystem, Name: ImagePullDurationKey, Help: "Duration in seconds to pull an image.", Buckets: imagePullDurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{"image_size_in_bytes"}, ) LifecycleHandlerSleepTerminated = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: "sleep_action_terminated_early_total", Help: "The number of times lifecycle sleep handler got terminated before it finishes", StabilityLevel: metrics.ALPHA, }, ) CgroupVersion = metrics.NewGauge( &metrics.GaugeOpts{ Subsystem: KubeletSubsystem, Name: CgroupVersionKey, Help: "cgroup version on the hosts.", StabilityLevel: metrics.ALPHA, }, ) DRAOperationsDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: DRASubsystem, Name: DRAOperationsDurationKey, Help: "" /* 339 byte string literal not displayed */, Buckets: DRADurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{"operation_name", "is_error"}, ) DRAGRPCOperationsDuration = metrics.NewHistogramVec( &metrics.HistogramOpts{ Subsystem: DRASubsystem, Name: DRAGRPCOperationsDurationKey, Help: "Duration in seconds of the DRA gRPC operations", Buckets: DRADurationBuckets, StabilityLevel: metrics.ALPHA, }, []string{"driver_name", "method_name", "grpc_status_code"}, ) AdmissionRejectionsTotal = metrics.NewCounterVec( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: AdmissionRejectionsTotalKey, Help: "Cumulative number pod admission rejections by the Kubelet.", StabilityLevel: metrics.ALPHA, }, []string{"reason"}, ) ImageVolumeRequestedTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: ImageVolumeRequestedTotalKey, Help: "Number of requested image volumes.", StabilityLevel: metrics.ALPHA, }, ) ImageVolumeMountedSucceedTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: ImageVolumeMountedSucceedTotalKey, Help: "Number of successful image volume mounts.", StabilityLevel: metrics.ALPHA, }, ) ImageVolumeMountedErrorsTotal = metrics.NewCounter( &metrics.CounterOpts{ Subsystem: KubeletSubsystem, Name: ImageVolumeMountedErrorsTotalKey, Help: "Number of failed image volume mounts.", StabilityLevel: metrics.ALPHA, }, ) )
var ( // DRADurationBuckets is the bucket boundaries for DRA operation duration metrics // DRAOperationsDuration and DRAGRPCOperationsDuration defined below in this file. // The buckets max value 40 is based on the 45sec max gRPC timeout value defined // for the DRA gRPC calls in the pkg/kubelet/cm/dra/plugin/registration.go DRADurationBuckets = metrics.ExponentialBucketsRange(.1, 40, 15) )
Functions ¶
func GetGather ¶
GetGather returns the gatherer. It used by test case outside current package.
func GetImageSizeBucket ¶
func Register ¶
func Register(collectors ...metrics.StableCollector)
Register registers all metrics.
func SetNodeName ¶
SetNodeName sets the NodeName Gauge to 1.
func SinceInSeconds ¶
SinceInSeconds gets the time since the specified start in seconds.
Source Files ¶
metrics.go
Directories ¶
Path | Synopsis |
---|---|
pkg/kubelet/metrics/collectors |
- Version
- v1.33.0 (latest)
- Published
- Apr 23, 2025
- Platform
- linux/amd64
- Imports
- 7 packages
- Last checked
- 3 hours ago –
Tools for package owners.