#ifndef Metrics_H #define Metrics_H #include #include #include #include #include #include #include #include #include #include #include #include "timer.hpp" // 指标前缀宏定义 #define METRIC_PREFIX "scheduler" class Metrics; // 配置结构体 struct MetricsConfig { std::string endpoint; std::string model_name; // 模型名称,如 "gpt-4" size_t gpu_count; // GPU数量 }; // Metrics 类,根据配置初始化 Prometheus 指标 class Metrics { public: // 构造函数传入 MetricsConfig Metrics(const MetricsConfig &config); ~Metrics(); // 禁止拷贝和赋值 Metrics(const Metrics &) = delete; Metrics &operator=(const Metrics &) = delete; std::function fn_every_sec; // 指标指针 prometheus::Gauge *uptime_ms; prometheus::Histogram *TTFT_ms; prometheus::Histogram *TBT_ms; prometheus::Histogram *schedule_time; prometheus::Gauge *throughput_query; prometheus::Gauge *throughput_generated_tokens; prometheus::Counter *generated_tokens; std::vector gpu_utilization_gauges; // 计数器家族 prometheus::Counter *event_count(const std::string &type); prometheus::Counter *query_count(const std::string &status); prometheus::Counter *batch_count(const std::string &type); private: std::shared_ptr registry_; prometheus::Exposer exposer_; // 计数器家族 prometheus::Family *event_count_family_; prometheus::Family *batch_count_family_; prometheus::Family *query_count_family_; // 线程和控制变量用于更新 uptime_ms std::thread uptime_thread_; std::atomic stop_uptime_thread_; // 启动 uptime 更新线程 void StartUptimeUpdater(); // 停止 uptime 更新线程 void StopUptimeUpdater(); // 记录程序启动时间 std::chrono::steady_clock::time_point start_time_; }; struct HistogramTimerWrapper { prometheus::Histogram *histogram; Timer timer; inline HistogramTimerWrapper(prometheus::Histogram *histogram) : histogram(histogram), timer() { timer.start(); } inline ~HistogramTimerWrapper() { histogram->Observe(timer.elapsedMs()); } }; #endif // Metrics_H