Seregon/StratoSDK

StratoSDK is a framework with a declarative approach similar to Flutter/React, written and designed entirely for Rust.

Rust/27.3 KB/No license
crates/strato-renderer/src/profiler.rs
StratoSDK / crates / strato-renderer / src / profiler.rs
1//! Advanced performance profiling and monitoring system
2//!
3//! This module provides comprehensive performance monitoring including:
4//! - Real-time GPU and CPU performance metrics
5//! - Frame timing analysis and bottleneck detection
6//! - Memory usage tracking and leak detection
7//! - Resource utilization monitoring
8//! - Performance regression detection
9//! - Automated performance optimization suggestions
10//! - Historical performance data analysis
11//! - Multi-threaded profiling support
12 
13use anyhow::Result;
14use parking_lot::RwLock;
15use serde::{Deserialize, Serialize};
16use std::collections::{HashMap, VecDeque};
17use std::sync::{
18 atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering},
19 Arc, Mutex,
20};
21use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
22use strato_core::inspector;
23use thread_local::ThreadLocal;
24use tracing::{debug, info, instrument, warn};
25use wgpu::{
26 Buffer, BufferDescriptor, BufferUsages, CommandEncoder, Features, Maintain, MapMode, QuerySet,
27 QuerySetDescriptor, QueryType,
28};
29 
30use crate::device::ManagedDevice;
31use crate::resources::ResourceHandle;
32 
33/// Performance metric types
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35pub enum MetricType {
36 /// Frame timing metrics
37 FrameTime,
38 /// GPU utilization
39 GpuUtilization,
40 /// Memory usage
41 MemoryUsage,
42 /// Draw call count
43 DrawCalls,
44 /// Vertex count
45 VertexCount,
46 /// Texture memory
47 TextureMemory,
48 /// Buffer memory
49 BufferMemory,
50 /// Pipeline switches
51 PipelineSwitches,
52 /// Render pass count
53 RenderPasses,
54 /// Command buffer submissions
55 CommandSubmissions,
56}
57 
58/// Performance sample
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct PerformanceSample {
61 pub timestamp: u64,
62 pub metric_type: MetricType,
63 pub value: f64,
64 pub thread_id: u32,
65 pub frame_id: u64,
66}
67 
68/// Frame timing information
69#[derive(Debug, Clone)]
70pub struct FrameTiming {
71 pub frame_id: u64,
72 pub start_time: Instant,
73 pub end_time: Instant,
74 pub cpu_time: Duration,
75 pub gpu_time: Duration,
76 pub present_time: Duration,
77 pub draw_calls: u32,
78 pub vertices: u64,
79 pub triangles: u64,
80 pub render_passes: u32,
81 pub pipeline_switches: u32,
82}
83 
84impl Default for FrameTiming {
85 fn default() -> Self {
86 let now = Instant::now();
87 Self {
88 frame_id: 0,
89 start_time: now,
90 end_time: now,
91 cpu_time: Duration::ZERO,
92 gpu_time: Duration::ZERO,
93 present_time: Duration::ZERO,
94 draw_calls: 0,
95 vertices: 0,
96 triangles: 0,
97 render_passes: 0,
98 pipeline_switches: 0,
99 }
100 }
101}
102 
103/// GPU timing query
104pub struct GpuTimer {
105 device: Arc<ManagedDevice>,
106 query_set: QuerySet,
107 query_buffer: Buffer,
108 capacity: u32,
109 current_query: AtomicU32,
110 pending_queries: RwLock<HashMap<u32, String>>,
111}
112 
113/// CPU profiler for detailed timing
114pub struct CpuProfiler {
115 enabled: AtomicBool,
116 samples: RwLock<VecDeque<PerformanceSample>>,
117 active_timers: RwLock<HashMap<String, Instant>>,
118 max_samples: usize,
119 thread_local_data: ThreadLocal<Mutex<ThreadProfileData>>,
120}
121 
122/// Thread-local profiling data
123#[derive(Debug, Default)]
124struct ThreadProfileData {
125 samples: Vec<PerformanceSample>,
126 active_timers: HashMap<String, Instant>,
127 thread_id: u32,
128}
129 
130/// Memory profiler
131pub struct MemoryProfiler {
132 enabled: AtomicBool,
133 total_allocated: AtomicU64,
134 peak_allocated: AtomicU64,
135 allocation_count: AtomicU64,
136 deallocation_count: AtomicU64,
137 
138 // Memory tracking by type
139 buffer_memory: AtomicU64,
140 texture_memory: AtomicU64,
141 pipeline_memory: AtomicU64,
142 
143 // Historical data
144 memory_history: RwLock<VecDeque<MemorySample>>,
145 leak_detection: RwLock<HashMap<ResourceHandle, AllocationInfo>>,
146}
147 
148/// Memory allocation sample
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct MemorySample {
151 pub timestamp: u64,
152 pub total_allocated: u64,
153 pub buffer_memory: u64,
154 pub texture_memory: u64,
155 pub pipeline_memory: u64,
156}
157 
158/// Allocation tracking information
159#[derive(Debug, Clone)]
160struct AllocationInfo {
161 size: u64,
162 timestamp: Instant,
163 stack_trace: Option<String>,
164 resource_type: String,
165}
166 
167/// Performance analyzer for detecting bottlenecks
168pub struct PerformanceAnalyzer {
169 frame_history: RwLock<VecDeque<FrameTiming>>,
170 bottleneck_detector: BottleneckDetector,
171 regression_detector: RegressionDetector,
172 optimization_suggestions: RwLock<Vec<OptimizationSuggestion>>,
173 analysis_enabled: AtomicBool,
174}
175 
176/// Bottleneck detection system
177pub struct BottleneckDetector {
178 cpu_threshold: f64,
179 gpu_threshold: f64,
180 memory_threshold: f64,
181 detected_bottlenecks: RwLock<Vec<Bottleneck>>,
182}
183 
184/// Performance regression detector
185pub struct RegressionDetector {
186 baseline_metrics: RwLock<HashMap<MetricType, f64>>,
187 regression_threshold: f64,
188 detected_regressions: RwLock<Vec<PerformanceRegression>>,
189}
190 
191/// Detected bottleneck
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct Bottleneck {
194 pub bottleneck_type: BottleneckType,
195 pub severity: f32,
196 pub description: String,
197 pub suggested_fix: String,
198 pub detected_at: u64,
199}
200 
201/// Types of performance bottlenecks
202#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
203pub enum BottleneckType {
204 CpuBound,
205 GpuBound,
206 MemoryBound,
207 BandwidthBound,
208 DrawCallBound,
209 VertexBound,
210 PixelBound,
211}
212 
213/// Performance regression
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct PerformanceRegression {
216 pub metric_type: MetricType,
217 pub baseline_value: f64,
218 pub current_value: f64,
219 pub regression_percentage: f64,
220 pub detected_at: u64,
221}
222 
223/// Optimization suggestion
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct OptimizationSuggestion {
226 pub title: String,
227 pub description: String,
228 pub impact: OptimizationImpact,
229 pub difficulty: OptimizationDifficulty,
230 pub category: OptimizationCategory,
231}
232 
233/// Frame statistics
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct FrameStats {
236 pub total_frames: u64,
237 pub average_frame_time: f64,
238 pub min_frame_time: f64,
239 pub max_frame_time: f64,
240}
241 
242/// Performance report
243#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct PerformanceReport {
245 pub frame_stats: FrameStats,
246 pub cpu_samples: Vec<PerformanceSample>,
247 pub memory_stats: HashMap<String, u64>,
248 pub bottlenecks: Vec<Bottleneck>,
249 pub optimization_suggestions: Vec<OptimizationSuggestion>,
250}
251 
252/// Impact level of optimization
253#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
254pub enum OptimizationImpact {
255 Low,
256 Medium,
257 High,
258 Critical,
259}
260 
261/// Difficulty of implementing optimization
262#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
263pub enum OptimizationDifficulty {
264 Easy,
265 Medium,
266 Hard,
267 Expert,
268}
269 
270/// Category of optimization
271#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
272pub enum OptimizationCategory {
273 Memory,
274 Rendering,
275 Compute,
276 IO,
277 Threading,
278}
279 
280/// Main profiler system
281pub struct Profiler {
282 device: Arc<ManagedDevice>,
283 
284 // Sub-profilers
285 pub gpu_timer: Option<Arc<GpuTimer>>,
286 pub cpu_profiler: Arc<CpuProfiler>,
287 pub memory_profiler: Arc<MemoryProfiler>,
288 performance_analyzer: Arc<PerformanceAnalyzer>,
289 
290 // Configuration
291 enabled: AtomicBool,
292 detailed_profiling: AtomicBool,
293 auto_analysis: AtomicBool,
294 
295 // Current frame tracking
296 current_frame: AtomicU64,
297 frame_start_time: RwLock<Option<Instant>>,
298 
299 // Statistics
300 total_frames: AtomicU64,
301 average_frame_time: RwLock<f64>,
302 min_frame_time: RwLock<f64>,
303 max_frame_time: RwLock<f64>,
304}
305 
306impl GpuTimer {
307 /// Create a new GPU timer
308 pub fn new(device: Arc<ManagedDevice>, capacity: u32) -> Result<Self> {
309 let query_set = device.device.create_query_set(&QuerySetDescriptor {
310 label: Some("GpuTimer"),
311 ty: QueryType::Timestamp,
312 count: capacity * 2, // Start and end queries
313 });
314 
315 let query_buffer = device.device.create_buffer(&BufferDescriptor {
316 label: Some("GpuTimerBuffer"),
317 size: (capacity * 2 * 8) as u64, // 8 bytes per timestamp
318 usage: BufferUsages::QUERY_RESOLVE | BufferUsages::COPY_SRC,
319 mapped_at_creation: false,
320 });
321 
322 Ok(Self {
323 device,
324 query_set,
325 query_buffer,
326 capacity,
327 current_query: AtomicU32::new(0),
328 pending_queries: RwLock::new(HashMap::new()),
329 })
330 }
331 
332 /// Begin GPU timing
333 pub fn begin_timing(&self, encoder: &mut CommandEncoder, label: &str) -> Option<u32> {
334 let query_id = self.current_query.fetch_add(2, Ordering::Relaxed);
335 
336 if query_id + 1 >= self.capacity * 2 {
337 return None; // Out of queries
338 }
339 
340 encoder.write_timestamp(&self.query_set, query_id);
341 self.pending_queries
342 .write()
343 .insert(query_id, label.to_string());
344 
345 Some(query_id)
346 }
347 
348 /// End GPU timing
349 pub fn end_timing(&self, encoder: &mut CommandEncoder, query_id: u32) {
350 if query_id + 1 < self.capacity * 2 {
351 encoder.write_timestamp(&self.query_set, query_id + 1);
352 }
353 }
354 
355 /// Resolve timing queries
356 pub fn resolve_queries(&self, encoder: &mut CommandEncoder) {
357 let current = self.current_query.load(Ordering::Relaxed);
358 if current > 0 {
359 encoder.resolve_query_set(&self.query_set, 0..current, &self.query_buffer, 0);
360 }
361 }
362 
363 /// Get timing results (async)
364 pub async fn get_results(&self) -> Result<HashMap<String, Duration>> {
365 let mut results = HashMap::new();
366 let current = self.current_query.load(Ordering::Relaxed);
367 
368 if current == 0 {
369 return Ok(results);
370 }
371 
372 let buffer_slice = self.query_buffer.slice(0..(current * 8) as u64);
373 let (sender, receiver) = futures::channel::oneshot::channel();
374 
375 buffer_slice.map_async(MapMode::Read, move |result| {
376 sender.send(result).ok();
377 });
378 
379 self.device.device.poll(Maintain::Wait);
380 receiver.await??;
381 
382 let data = buffer_slice.get_mapped_range();
383 let timestamps: &[u64] = bytemuck::cast_slice(&data);
384 
385 let pending = self.pending_queries.read();
386 for (&query_id, label) in pending.iter() {
387 if query_id + 1 < current {
388 let start = timestamps[query_id as usize];
389 let end = timestamps[(query_id + 1) as usize];
390 let duration = Duration::from_nanos(end - start);
391 results.insert(label.clone(), duration);
392 }
393 }
394 
395 drop(data);
396 self.query_buffer.unmap();
397 
398 // Reset for next frame
399 self.current_query.store(0, Ordering::Relaxed);
400 self.pending_queries.write().clear();
401 
402 Ok(results)
403 }
404}
405 
406impl CpuProfiler {
407 /// Create a new CPU profiler
408 pub fn new(max_samples: usize) -> Self {
409 Self {
410 enabled: AtomicBool::new(true),
411 samples: RwLock::new(VecDeque::with_capacity(max_samples)),
412 active_timers: RwLock::new(HashMap::new()),
413 max_samples,
414 thread_local_data: ThreadLocal::new(),
415 }
416 }
417 
418 /// Begin timing a section
419 pub fn begin_section(&self, name: &str) {
420 if !self.enabled.load(Ordering::Relaxed) {
421 return;
422 }
423 
424 let thread_data = self.thread_local_data.get_or(|| {
425 Mutex::new(ThreadProfileData {
426 thread_id: 0, // Simplified - thread ID tracking removed
427 ..Default::default()
428 })
429 });
430 
431 let mut data = thread_data.lock().unwrap();
432 data.active_timers.insert(name.to_string(), Instant::now());
433 }
434 
435 /// End timing a section
436 pub fn end_section(&self, name: &str) {
437 if !self.enabled.load(Ordering::Relaxed) {
438 return;
439 }
440 
441 let thread_data = self.thread_local_data.get_or(|| {
442 Mutex::new(ThreadProfileData {
443 thread_id: 0, // Simplified - thread ID tracking removed
444 ..Default::default()
445 })
446 });
447 
448 let mut data = thread_data.lock().unwrap();
449 if let Some(start_time) = data.active_timers.remove(name) {
450 let duration = start_time.elapsed();
451 let sample = PerformanceSample {
452 timestamp: SystemTime::now()
453 .duration_since(UNIX_EPOCH)
454 .unwrap()
455 .as_nanos() as u64,
456 metric_type: MetricType::FrameTime,
457 value: duration.as_secs_f64() * 1000.0, // Convert to milliseconds
458 thread_id: data.thread_id,
459 frame_id: 0, // Will be set by profiler
460 };
461 
462 data.samples.push(sample);
463 }
464 }
465 
466 /// Collect samples from all threads
467 pub fn collect_samples(&self) -> Vec<PerformanceSample> {
468 let mut all_samples: Vec<PerformanceSample> = Vec::new();
469 
470 for thread_data in self.thread_local_data.iter() {
471 let mut data = thread_data.lock().unwrap();
472 all_samples.extend(data.samples.drain(..));
473 }
474 
475 // Add to global samples
476 let mut samples = self.samples.write();
477 for sample in &all_samples {
478 samples.push_back(sample.clone());
479 if samples.len() > self.max_samples {
480 samples.pop_front();
481 }
482 }
483 
484 all_samples
485 }
486 
487 /// Get average timing for a section
488 pub fn get_average_time(&self, _name: &str) -> Option<f64> {
489 let samples = self.samples.read();
490 let matching_samples: Vec<f64> = samples
491 .iter()
492 .filter(|s| s.metric_type == MetricType::FrameTime)
493 .map(|s| s.value)
494 .collect();
495 
496 if matching_samples.is_empty() {
497 None
498 } else {
499 Some(matching_samples.iter().sum::<f64>() / matching_samples.len() as f64)
500 }
501 }
502}
503 
504impl MemoryProfiler {
505 /// Create a new memory profiler
506 pub fn new() -> Self {
507 Self {
508 enabled: AtomicBool::new(true),
509 total_allocated: AtomicU64::new(0),
510 peak_allocated: AtomicU64::new(0),
511 allocation_count: AtomicU64::new(0),
512 deallocation_count: AtomicU64::new(0),
513 buffer_memory: AtomicU64::new(0),
514 texture_memory: AtomicU64::new(0),
515 pipeline_memory: AtomicU64::new(0),
516 memory_history: RwLock::new(VecDeque::with_capacity(1000)),
517 leak_detection: RwLock::new(HashMap::new()),
518 }
519 }
520 
521 /// Record allocation
522 pub fn record_allocation(&self, handle: ResourceHandle, size: u64, resource_type: &str) {
523 if !self.enabled.load(Ordering::Relaxed) {
524 return;
525 }
526 
527 self.total_allocated.fetch_add(size, Ordering::Relaxed);
528 self.allocation_count.fetch_add(1, Ordering::Relaxed);
529 
530 // Update peak
531 let current = self.total_allocated.load(Ordering::Relaxed);
532 let peak = self.peak_allocated.load(Ordering::Relaxed);
533 if current > peak {
534 self.peak_allocated.store(current, Ordering::Relaxed);
535 }
536 
537 // Update type-specific counters
538 match resource_type {
539 "buffer" => {
540 self.buffer_memory.fetch_add(size, Ordering::Relaxed);
541 }
542 "texture" => {
543 self.texture_memory.fetch_add(size, Ordering::Relaxed);
544 }
545 "pipeline" => {
546 self.pipeline_memory.fetch_add(size, Ordering::Relaxed);
547 }
548 _ => {}
549 }
550 
551 // Record for leak detection
552 let allocation_info = AllocationInfo {
553 size,
554 timestamp: Instant::now(),
555 stack_trace: None, // Could be implemented with backtrace crate
556 resource_type: resource_type.to_string(),
557 };
558 
559 self.leak_detection.write().insert(handle, allocation_info);
560 
561 // Record sample
562 self.record_memory_sample();
563 }
564 
565 /// Record deallocation
566 pub fn record_deallocation(&self, handle: ResourceHandle) {
567 if !self.enabled.load(Ordering::Relaxed) {
568 return;
569 }
570 
571 if let Some(info) = self.leak_detection.write().remove(&handle) {
572 self.total_allocated.fetch_sub(info.size, Ordering::Relaxed);
573 self.deallocation_count.fetch_add(1, Ordering::Relaxed);
574 
575 // Update type-specific counters
576 match info.resource_type.as_str() {
577 "buffer" => {
578 self.buffer_memory.fetch_sub(info.size, Ordering::Relaxed);
579 }
580 "texture" => {
581 self.texture_memory.fetch_sub(info.size, Ordering::Relaxed);
582 }
583 "pipeline" => {
584 self.pipeline_memory.fetch_sub(info.size, Ordering::Relaxed);
585 }
586 _ => {}
587 }
588 }
589 
590 self.record_memory_sample();
591 }
592 
593 /// Record memory sample
594 fn record_memory_sample(&self) {
595 let sample = MemorySample {
596 timestamp: SystemTime::now()
597 .duration_since(UNIX_EPOCH)
598 .unwrap()
599 .as_nanos() as u64,
600 total_allocated: self.total_allocated.load(Ordering::Relaxed),
601 buffer_memory: self.buffer_memory.load(Ordering::Relaxed),
602 texture_memory: self.texture_memory.load(Ordering::Relaxed),
603 pipeline_memory: self.pipeline_memory.load(Ordering::Relaxed),
604 };
605 
606 let mut history = self.memory_history.write();
607 history.push_back(sample);
608 if history.len() > 1000 {
609 history.pop_front();
610 }
611 }
612 
613 /// Detect memory leaks
614 pub fn detect_leaks(&self, max_age: Duration) -> Vec<ResourceHandle> {
615 let now = Instant::now();
616 let leak_detection = self.leak_detection.read();
617 
618 leak_detection
619 .iter()
620 .filter(|(_, info)| now.duration_since(info.timestamp) > max_age)
621 .map(|(&handle, _)| handle)
622 .collect()
623 }
624 
625 /// Get memory statistics
626 pub fn get_stats(&self) -> HashMap<String, u64> {
627 let mut stats = HashMap::new();
628 stats.insert(
629 "total_allocated".to_string(),
630 self.total_allocated.load(Ordering::Relaxed),
631 );
632 stats.insert(
633 "peak_allocated".to_string(),
634 self.peak_allocated.load(Ordering::Relaxed),
635 );
636 stats.insert(
637 "allocation_count".to_string(),
638 self.allocation_count.load(Ordering::Relaxed),
639 );
640 stats.insert(
641 "deallocation_count".to_string(),
642 self.deallocation_count.load(Ordering::Relaxed),
643 );
644 stats.insert(
645 "buffer_memory".to_string(),
646 self.buffer_memory.load(Ordering::Relaxed),
647 );
648 stats.insert(
649 "texture_memory".to_string(),
650 self.texture_memory.load(Ordering::Relaxed),
651 );
652 stats.insert(
653 "pipeline_memory".to_string(),
654 self.pipeline_memory.load(Ordering::Relaxed),
655 );
656 stats
657 }
658}
659 
660impl PerformanceAnalyzer {
661 /// Create a new performance analyzer
662 pub fn new() -> Self {
663 Self {
664 frame_history: RwLock::new(VecDeque::with_capacity(1000)),
665 bottleneck_detector: BottleneckDetector::new(),
666 regression_detector: RegressionDetector::new(),
667 optimization_suggestions: RwLock::new(Vec::new()),
668 analysis_enabled: AtomicBool::new(true),
669 }
670 }
671 
672 /// Analyze frame performance
673 pub fn analyze_frame(&self, _frame_timing: &FrameTiming) {
674 // Placeholder for frame analysis
675 }
676 
677 /// Analyze frame timing
678 pub fn analyze_frame_timing(&self, _frame_time: Duration) {
679 // Placeholder for frame timing analysis
680 }
681 
682 /// Get detected bottlenecks
683 pub fn get_bottlenecks(&self) -> Vec<Bottleneck> {
684 self.bottleneck_detector.detected_bottlenecks.read().clone()
685 }
686 
687 /// Get optimization suggestions
688 pub fn get_optimization_suggestions(&self) -> Vec<OptimizationSuggestion> {
689 self.optimization_suggestions.read().clone()
690 }
691 
692 /// Generate optimization suggestions (moved from duplicate impl)
693 fn generate_optimization_suggestions(&self, frame_time: Duration) {
694 let frame_time_ms = frame_time.as_secs_f64() * 1000.0;
695 
696 let mut suggestions = self.optimization_suggestions.write();
697 suggestions.clear();
698 
699 if frame_time_ms > 16.67 {
700 // 60 FPS threshold
701 suggestions.push(OptimizationSuggestion {
702 title: "Frame time exceeds 60 FPS target".to_string(),
703 description: "Consider reducing draw calls or optimizing shaders".to_string(),
704 impact: OptimizationImpact::High,
705 difficulty: OptimizationDifficulty::Medium,
706 category: OptimizationCategory::Rendering,
707 });
708 }
709 
710 if frame_time_ms > 33.33 {
711 // 30 FPS threshold
712 suggestions.push(OptimizationSuggestion {
713 title: "Critical performance issue detected".to_string(),
714 description: "Frame time is critically high, immediate optimization required"
715 .to_string(),
716 impact: OptimizationImpact::Critical,
717 difficulty: OptimizationDifficulty::Hard,
718 category: OptimizationCategory::Rendering,
719 });
720 }
721 }
722}
723 
724impl BottleneckDetector {
725 pub fn new() -> Self {
726 Self {
727 cpu_threshold: 16.0, // 16ms
728 gpu_threshold: 16.0, // 16ms
729 memory_threshold: 0.8, // 80%
730 detected_bottlenecks: RwLock::new(Vec::new()),
731 }
732 }
733 
734 /// Analyze frame time for bottlenecks
735 pub fn analyze_frame_time(&self, frame_time: Duration) {
736 let frame_time_ms = frame_time.as_secs_f64() * 1000.0;
737 
738 let mut bottlenecks = self.detected_bottlenecks.write();
739 bottlenecks.clear();
740 
741 if frame_time_ms > self.cpu_threshold {
742 bottlenecks.push(Bottleneck {
743 bottleneck_type: BottleneckType::CpuBound,
744 severity: (frame_time_ms / self.cpu_threshold) as f32,
745 description: "CPU processing is taking too long".to_string(),
746 suggested_fix: "Optimize CPU-bound operations or use multithreading".to_string(),
747 detected_at: SystemTime::now()
748 .duration_since(UNIX_EPOCH)
749 .unwrap()
750 .as_nanos() as u64,
751 });
752 }
753 }
754}
755 
756impl RegressionDetector {
757 pub fn new() -> Self {
758 Self {
759 baseline_metrics: RwLock::new(HashMap::new()),
760 regression_threshold: 0.1, // 10% regression
761 detected_regressions: RwLock::new(Vec::new()),
762 }
763 }
764 
765 /// Check for performance regression
766 pub fn check_regression(&self, metric_type: MetricType, current_value: f64) {
767 let mut baselines = self.baseline_metrics.write();
768 
769 if let Some(&baseline) = baselines.get(&metric_type) {
770 let regression = (current_value - baseline) / baseline;
771 
772 if regression > self.regression_threshold {
773 let mut regressions = self.detected_regressions.write();
774 regressions.push(PerformanceRegression {
775 metric_type,
776 baseline_value: baseline,
777 current_value,
778 regression_percentage: regression * 100.0,
779 detected_at: SystemTime::now()
780 .duration_since(UNIX_EPOCH)
781 .unwrap()
782 .as_nanos() as u64,
783 });
784 }
785 } else {
786 // Set as baseline if not exists
787 baselines.insert(metric_type, current_value);
788 }
789 }
790}
791 
792impl Profiler {
793 /// Create a new profiler
794 pub fn new(device: Arc<ManagedDevice>) -> Result<Self> {
795 let gpu_timer = if device.device.features().contains(Features::TIMESTAMP_QUERY) {
796 Some(Arc::new(GpuTimer::new(device.clone(), 1000)?))
797 } else {
798 warn!("Timestamp queries not enabled on device. GPU profiling disabled.");
799 None
800 };
801 
802 let cpu_profiler = Arc::new(CpuProfiler::new(10000));
803 let memory_profiler = Arc::new(MemoryProfiler::new());
804 let performance_analyzer = Arc::new(PerformanceAnalyzer::new());
805 
806 Ok(Self {
807 device,
808 gpu_timer,
809 cpu_profiler,
810 memory_profiler,
811 performance_analyzer,
812 enabled: AtomicBool::new(true),
813 detailed_profiling: AtomicBool::new(false),
814 auto_analysis: AtomicBool::new(true),
815 current_frame: AtomicU64::new(0),
816 frame_start_time: RwLock::new(None),
817 total_frames: AtomicU64::new(0),
818 average_frame_time: RwLock::new(0.0),
819 min_frame_time: RwLock::new(f64::MAX),
820 max_frame_time: RwLock::new(0.0),
821 })
822 }
823 
824 /// Begin frame profiling
825 pub fn begin_frame(&self) {
826 if !self.enabled.load(Ordering::Relaxed) {
827 return;
828 }
829 
830 let frame_id = self.current_frame.fetch_add(1, Ordering::Relaxed);
831 *self.frame_start_time.write() = Some(Instant::now());
832 
833 self.cpu_profiler.begin_section("frame");
834 
835 debug!("Begin frame {}", frame_id);
836 }
837 
838 /// End frame profiling
839 pub fn end_frame(&self) {
840 if !self.enabled.load(Ordering::Relaxed) {
841 return;
842 }
843 
844 self.cpu_profiler.end_section("frame");
845 
846 if let Some(start_time) = *self.frame_start_time.read() {
847 let frame_time = start_time.elapsed();
848 let frame_time_ms = frame_time.as_secs_f64() * 1000.0;
849 
850 // Update statistics
851 self.total_frames.fetch_add(1, Ordering::Relaxed);
852 
853 let mut avg = self.average_frame_time.write();
854 let total = self.total_frames.load(Ordering::Relaxed) as f64;
855 *avg = (*avg * (total - 1.0) + frame_time_ms) / total;
856 
857 let mut min = self.min_frame_time.write();
858 if frame_time_ms < *min {
859 *min = frame_time_ms;
860 }
861 
862 let mut max = self.max_frame_time.write();
863 if frame_time_ms > *max {
864 *max = frame_time_ms;
865 }
866 
867 // Analyze performance if enabled
868 if self.auto_analysis.load(Ordering::Relaxed) {
869 self.performance_analyzer.analyze_frame_timing(frame_time);
870 }
871 
872 inspector::inspector().record_frame_timeline(
873 self.current_frame.load(Ordering::Relaxed),
874 frame_time,
875 Duration::ZERO,
876 Some("CPU frame end".to_string()),
877 );
878 }
879 }
880 
881 /// Begin GPU timing
882 pub fn begin_gpu_timing(&self, encoder: &mut CommandEncoder, label: &str) -> Option<u32> {
883 if self.enabled.load(Ordering::Relaxed) {
884 if let Some(timer) = &self.gpu_timer {
885 timer.begin_timing(encoder, label)
886 } else {
887 None
888 }
889 } else {
890 None
891 }
892 }
893 
894 /// End GPU timing
895 pub fn end_gpu_timing(&self, encoder: &mut CommandEncoder, query_id: u32) {
896 if self.enabled.load(Ordering::Relaxed) {
897 if let Some(timer) = &self.gpu_timer {
898 timer.end_timing(encoder, query_id);
899 }
900 }
901 }
902 
903 /// Get comprehensive performance report
904 pub fn get_performance_report(&self) -> PerformanceReport {
905 let cpu_samples = self.cpu_profiler.collect_samples();
906 let memory_stats = self.memory_profiler.get_stats();
907 let bottlenecks = self.performance_analyzer.get_bottlenecks();
908 let suggestions = self.performance_analyzer.get_optimization_suggestions();
909 
910 PerformanceReport {
911 frame_stats: FrameStats {
912 total_frames: self.total_frames.load(Ordering::Relaxed),
913 average_frame_time: *self.average_frame_time.read(),
914 min_frame_time: *self.min_frame_time.read(),
915 max_frame_time: *self.max_frame_time.read(),
916 },
917 cpu_samples,
918 memory_stats,
919 bottlenecks,
920 optimization_suggestions: suggestions,
921 }
922 }
923 
924 /// Enable/disable profiling
925 pub fn set_enabled(&self, enabled: bool) {
926 self.enabled.store(enabled, Ordering::Relaxed);
927 }
928 
929 /// Enable/disable detailed profiling
930 pub fn set_detailed_profiling(&self, enabled: bool) {
931 self.detailed_profiling.store(enabled, Ordering::Relaxed);
932 }
933}
934 
935// All implementations have been consolidated above
936