mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-15 09:39:42 +00:00
85 lines
2.3 KiB
C++
85 lines
2.3 KiB
C++
#ifndef Metrics_H
|
|
#define Metrics_H
|
|
|
|
#include <prometheus/counter.h>
|
|
#include <prometheus/exposer.h>
|
|
#include <prometheus/gauge.h>
|
|
#include <prometheus/histogram.h>
|
|
#include <prometheus/registry.h>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include "timer.hpp"
|
|
// 指标前缀宏定义
|
|
#define METRIC_PREFIX "scheduler"
|
|
class Metrics;
|
|
|
|
// 配置结构体
|
|
struct MetricsConfig {
|
|
std::string endpoint;
|
|
std::string model_name; // 模型名称,如 "gpt-4"
|
|
size_t gpu_count; // GPU数量
|
|
};
|
|
|
|
// Metrics 类,根据配置初始化 Prometheus 指标
|
|
class Metrics {
|
|
public:
|
|
// 构造函数传入 MetricsConfig
|
|
Metrics(const MetricsConfig& config);
|
|
~Metrics();
|
|
|
|
// 禁止拷贝和赋值
|
|
Metrics(const Metrics&) = delete;
|
|
Metrics& operator=(const Metrics&) = delete;
|
|
|
|
std::function<void(Metrics*)> fn_every_sec;
|
|
|
|
// 指标指针
|
|
prometheus::Gauge* uptime_ms;
|
|
prometheus::Histogram* TTFT_ms;
|
|
prometheus::Histogram* TBT_ms;
|
|
prometheus::Histogram* schedule_time;
|
|
prometheus::Gauge* throughput_query;
|
|
prometheus::Gauge* throughput_generated_tokens;
|
|
prometheus::Counter* generated_tokens;
|
|
std::vector<prometheus::Gauge*> gpu_utilization_gauges;
|
|
|
|
// 计数器家族
|
|
prometheus::Counter* event_count(const std::string& type);
|
|
prometheus::Counter* query_count(const std::string& status);
|
|
prometheus::Counter* batch_count(const std::string& type);
|
|
|
|
private:
|
|
std::shared_ptr<prometheus::Registry> registry_;
|
|
prometheus::Exposer exposer_;
|
|
|
|
// 计数器家族
|
|
prometheus::Family<prometheus::Counter>* event_count_family_;
|
|
prometheus::Family<prometheus::Counter>* batch_count_family_;
|
|
prometheus::Family<prometheus::Counter>* query_count_family_;
|
|
|
|
// 线程和控制变量用于更新 uptime_ms
|
|
std::thread uptime_thread_;
|
|
std::atomic<bool> stop_uptime_thread_;
|
|
|
|
// 启动 uptime 更新线程
|
|
void StartUptimeUpdater();
|
|
// 停止 uptime 更新线程
|
|
void StopUptimeUpdater();
|
|
|
|
// 记录程序启动时间
|
|
std::chrono::steady_clock::time_point start_time_;
|
|
};
|
|
|
|
struct HistogramTimerWrapper {
|
|
prometheus::Histogram* histogram;
|
|
Timer timer;
|
|
inline HistogramTimerWrapper(prometheus::Histogram* histogram) : histogram(histogram), timer() { timer.start(); }
|
|
inline ~HistogramTimerWrapper() { histogram->Observe(timer.elapsedMs()); }
|
|
};
|
|
|
|
#endif // Metrics_H
|