TaskStats: More updates.

Signed-off-by: Shishir Mahajan <smahajan@roblox.com>
This commit is contained in:
Shishir Mahajan 2020-06-17 11:58:17 -07:00
parent 05f58c412c
commit 67a84542d6
No known key found for this signature in database
GPG Key ID: D41782E7688DEC4A
2 changed files with 62 additions and 35 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/hashicorp/consul-template/signals"
"github.com/hashicorp/go-hclog"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/stats"
"github.com/hashicorp/nomad/drivers/shared/eventer"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
@ -314,13 +315,16 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
d.logger.Info(fmt.Sprintf("Successfully created task with ID: %s", task.ID()))
h := &taskHandle{
taskConfig: cfg,
procState: drivers.TaskStateRunning,
startedAt: time.Now().Round(time.Millisecond),
logger: d.logger,
container: container,
containerName: containerName,
task: task,
taskConfig: cfg,
procState: drivers.TaskStateRunning,
startedAt: time.Now().Round(time.Millisecond),
logger: d.logger,
totalCpuStats: stats.NewCpuStats(),
userCpuStats: stats.NewCpuStats(),
systemCpuStats: stats.NewCpuStats(),
container: container,
containerName: containerName,
task: task,
}
driverState := TaskState{
@ -373,14 +377,17 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
}
h := &taskHandle{
taskConfig: handle.Config,
procState: drivers.TaskStateRunning,
startedAt: taskState.StartedAt,
exitResult: &drivers.ExitResult{},
logger: d.logger,
container: container,
containerName: taskState.ContainerName,
task: task,
taskConfig: handle.Config,
procState: drivers.TaskStateRunning,
startedAt: taskState.StartedAt,
exitResult: &drivers.ExitResult{},
logger: d.logger,
totalCpuStats: stats.NewCpuStats(),
userCpuStats: stats.NewCpuStats(),
systemCpuStats: stats.NewCpuStats(),
container: container,
containerName: taskState.ContainerName,
task: task,
}
d.tasks.Set(handle.Config.ID, h)

View File

@ -15,6 +15,8 @@ import (
"github.com/containerd/typeurl"
"github.com/hashicorp/go-hclog"
uuid "github.com/hashicorp/go-uuid"
"github.com/hashicorp/nomad/client/stats"
hstats "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/plugins/drivers"
)
@ -25,15 +27,18 @@ type taskHandle struct {
// stateLock syncs access to all fields below
stateLock sync.RWMutex
logger hclog.Logger
taskConfig *drivers.TaskConfig
procState drivers.TaskState
startedAt time.Time
completedAt time.Time
exitResult *drivers.ExitResult
containerName string
container containerd.Container
task containerd.Task
logger hclog.Logger
taskConfig *drivers.TaskConfig
procState drivers.TaskState
startedAt time.Time
completedAt time.Time
exitResult *drivers.ExitResult
totalCpuStats *stats.CpuStats
userCpuStats *stats.CpuStats
systemCpuStats *stats.CpuStats
containerName string
container containerd.Container
task containerd.Task
}
func (h *taskHandle) TaskStatus(ctxContainerd context.Context) *drivers.TaskStatus {
@ -78,6 +83,11 @@ func (h *taskHandle) run(ctxContainerd context.Context) {
h.stateLock.Lock()
defer h.stateLock.Unlock()
// Every executor runs this init at creation for stats
if err := hstats.Init(); err != nil {
h.logger.Error("unable to initialize stats", "error", err)
}
// Sleep for 5 seconds to allow h.task.Wait() to kick in.
// TODO: Use goroutine and a channel to synchronize this, instead of sleep.
time.Sleep(5 * time.Second)
@ -233,16 +243,22 @@ func (h *taskHandle) handleStats(ch chan *drivers.TaskResourceUsage, ctx, ctxCon
// Convert containerd V1 task metrics to TaskResourceUsage.
func (h *taskHandle) getV1TaskResourceUsage(metrics *v1.Metrics) *drivers.TaskResourceUsage {
totalPercent := h.totalCpuStats.Percent(float64(metrics.CPU.Usage.Total))
cs := &drivers.CpuStats{
SystemMode: float64(metrics.CPU.Usage.Kernel),
UserMode: float64(metrics.CPU.Usage.User),
TotalTicks: float64(metrics.CPU.Usage.Total),
SystemMode: h.systemCpuStats.Percent(float64(metrics.CPU.Usage.Kernel)),
UserMode: h.userCpuStats.Percent(float64(metrics.CPU.Usage.User)),
Percent: totalPercent,
TotalTicks: h.totalCpuStats.TicksConsumed(totalPercent),
Measured: []string{"Percent", "System Mode", "User Mode"},
}
ms := &drivers.MemoryStats{
Usage: metrics.Memory.Usage.Usage,
RSS: metrics.Memory.RSS,
Cache: metrics.Memory.Cache,
RSS: metrics.Memory.RSS,
Cache: metrics.Memory.Cache,
Swap: metrics.Memory.Swap.Usage,
Usage: metrics.Memory.Usage.Usage,
MaxUsage: metrics.Memory.Usage.Max,
Measured: []string{"RSS", "Cache", "Swap", "Usage"},
}
ts := time.Now().UTC().UnixNano()
@ -257,15 +273,19 @@ func (h *taskHandle) getV1TaskResourceUsage(metrics *v1.Metrics) *drivers.TaskRe
// Convert containerd V2 task metrics to TaskResourceUsage.
func (h *taskHandle) getV2TaskResourceUsage(metrics *v2.Metrics) *drivers.TaskResourceUsage {
totalPercent := h.totalCpuStats.Percent(float64(metrics.CPU.SystemUsec + metrics.CPU.UserUsec))
cs := &drivers.CpuStats{
SystemMode: float64(metrics.CPU.SystemUsec),
UserMode: float64(metrics.CPU.UserUsec),
TotalTicks: float64(metrics.CPU.UsageUsec),
SystemMode: h.systemCpuStats.Percent(float64(metrics.CPU.SystemUsec)),
UserMode: h.userCpuStats.Percent(float64(metrics.CPU.UserUsec)),
Percent: totalPercent,
TotalTicks: h.totalCpuStats.TicksConsumed(totalPercent),
Measured: []string{"Percent", "System Mode", "User Mode"},
}
ms := &drivers.MemoryStats{
Usage: metrics.Memory.Usage,
Swap: metrics.Memory.SwapUsage,
Swap: metrics.Memory.SwapUsage,
Usage: metrics.Memory.Usage,
Measured: []string{"Swap", "Usage"},
}
ts := time.Now().UTC().UnixNano()