git.seregonwar.com

crates/strato-renderer/src/profiler.rs

StratoSDK / crates / strato-renderer / src / profiler.rs

1	//! Advanced performance profiling and monitoring system
2	//!
3	//! This module provides comprehensive performance monitoring including:
4	//! - Real-time GPU and CPU performance metrics
5	//! - Frame timing analysis and bottleneck detection
6	//! - Memory usage tracking and leak detection
7	//! - Resource utilization monitoring
8	//! - Performance regression detection
9	//! - Automated performance optimization suggestions
10	//! - Historical performance data analysis
11	//! - Multi-threaded profiling support
12
13	use anyhow::Result;
14	use parking_lot::RwLock;
15	use serde::{Deserialize, Serialize};
16	use std::collections::{HashMap, VecDeque};
17	use std::sync::{
18	atomic::{AtomicBool, AtomicU32, AtomicU64, Ordering},
19	Arc, Mutex,
20	};
21	use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
22	use strato_core::inspector;
23	use thread_local::ThreadLocal;
24	use tracing::{debug, info, instrument, warn};
25	use wgpu::{
26	Buffer, BufferDescriptor, BufferUsages, CommandEncoder, Features, Maintain, MapMode, QuerySet,
27	QuerySetDescriptor, QueryType,
28	};
29
30	use crate::device::ManagedDevice;
31	use crate::resources::ResourceHandle;
32
33	/// Performance metric types
34	#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35	pub enum MetricType {
36	/// Frame timing metrics
37	FrameTime,
38	/// GPU utilization
39	GpuUtilization,
40	/// Memory usage
41	MemoryUsage,
42	/// Draw call count
43	DrawCalls,
44	/// Vertex count
45	VertexCount,
46	/// Texture memory
47	TextureMemory,
48	/// Buffer memory
49	BufferMemory,
50	/// Pipeline switches
51	PipelineSwitches,
52	/// Render pass count
53	RenderPasses,
54	/// Command buffer submissions
55	CommandSubmissions,
56	}
57
58	/// Performance sample
59	#[derive(Debug, Clone, Serialize, Deserialize)]
60	pub struct PerformanceSample {
61	pub timestamp: u64,
62	pub metric_type: MetricType,
63	pub value: f64,
64	pub thread_id: u32,
65	pub frame_id: u64,
66	}
67
68	/// Frame timing information
69	#[derive(Debug, Clone)]
70	pub struct FrameTiming {
71	pub frame_id: u64,
72	pub start_time: Instant,
73	pub end_time: Instant,
74	pub cpu_time: Duration,
75	pub gpu_time: Duration,
76	pub present_time: Duration,
77	pub draw_calls: u32,
78	pub vertices: u64,
79	pub triangles: u64,
80	pub render_passes: u32,
81	pub pipeline_switches: u32,
82	}
83
84	impl Default for FrameTiming {
85	fn default() -> Self {
86	let now = Instant::now();
87	Self {
88	frame_id: 0,
89	start_time: now,
90	end_time: now,
91	cpu_time: Duration::ZERO,
92	gpu_time: Duration::ZERO,
93	present_time: Duration::ZERO,
94	draw_calls: 0,
95	vertices: 0,
96	triangles: 0,
97	render_passes: 0,
98	pipeline_switches: 0,
99	}
100	}
101	}
102
103	/// GPU timing query
104	pub struct GpuTimer {
105	device: Arc<ManagedDevice>,
106	query_set: QuerySet,
107	query_buffer: Buffer,
108	capacity: u32,
109	current_query: AtomicU32,
110	pending_queries: RwLock<HashMap<u32, String>>,
111	}
112
113	/// CPU profiler for detailed timing
114	pub struct CpuProfiler {
115	enabled: AtomicBool,
116	samples: RwLock<VecDeque<PerformanceSample>>,
117	active_timers: RwLock<HashMap<String, Instant>>,
118	max_samples: usize,
119	thread_local_data: ThreadLocal<Mutex<ThreadProfileData>>,
120	}
121
122	/// Thread-local profiling data
123	#[derive(Debug, Default)]
124	struct ThreadProfileData {
125	samples: Vec<PerformanceSample>,
126	active_timers: HashMap<String, Instant>,
127	thread_id: u32,
128	}
129
130	/// Memory profiler
131	pub struct MemoryProfiler {
132	enabled: AtomicBool,
133	total_allocated: AtomicU64,
134	peak_allocated: AtomicU64,
135	allocation_count: AtomicU64,
136	deallocation_count: AtomicU64,
137
138	// Memory tracking by type
139	buffer_memory: AtomicU64,
140	texture_memory: AtomicU64,
141	pipeline_memory: AtomicU64,
142
143	// Historical data
144	memory_history: RwLock<VecDeque<MemorySample>>,
145	leak_detection: RwLock<HashMap<ResourceHandle, AllocationInfo>>,
146	}
147
148	/// Memory allocation sample
149	#[derive(Debug, Clone, Serialize, Deserialize)]
150	pub struct MemorySample {
151	pub timestamp: u64,
152	pub total_allocated: u64,
153	pub buffer_memory: u64,
154	pub texture_memory: u64,
155	pub pipeline_memory: u64,
156	}
157
158	/// Allocation tracking information
159	#[derive(Debug, Clone)]
160	struct AllocationInfo {
161	size: u64,
162	timestamp: Instant,
163	stack_trace: Option<String>,
164	resource_type: String,
165	}
166
167	/// Performance analyzer for detecting bottlenecks
168	pub struct PerformanceAnalyzer {
169	frame_history: RwLock<VecDeque<FrameTiming>>,
170	bottleneck_detector: BottleneckDetector,
171	regression_detector: RegressionDetector,
172	optimization_suggestions: RwLock<Vec<OptimizationSuggestion>>,
173	analysis_enabled: AtomicBool,
174	}
175
176	/// Bottleneck detection system
177	pub struct BottleneckDetector {
178	cpu_threshold: f64,
179	gpu_threshold: f64,
180	memory_threshold: f64,
181	detected_bottlenecks: RwLock<Vec<Bottleneck>>,
182	}
183
184	/// Performance regression detector
185	pub struct RegressionDetector {
186	baseline_metrics: RwLock<HashMap<MetricType, f64>>,
187	regression_threshold: f64,
188	detected_regressions: RwLock<Vec<PerformanceRegression>>,
189	}
190
191	/// Detected bottleneck
192	#[derive(Debug, Clone, Serialize, Deserialize)]
193	pub struct Bottleneck {
194	pub bottleneck_type: BottleneckType,
195	pub severity: f32,
196	pub description: String,
197	pub suggested_fix: String,
198	pub detected_at: u64,
199	}
200
201	/// Types of performance bottlenecks
202	#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
203	pub enum BottleneckType {
204	CpuBound,
205	GpuBound,
206	MemoryBound,
207	BandwidthBound,
208	DrawCallBound,
209	VertexBound,
210	PixelBound,
211	}
212
213	/// Performance regression
214	#[derive(Debug, Clone, Serialize, Deserialize)]
215	pub struct PerformanceRegression {
216	pub metric_type: MetricType,
217	pub baseline_value: f64,
218	pub current_value: f64,
219	pub regression_percentage: f64,
220	pub detected_at: u64,
221	}
222
223	/// Optimization suggestion
224	#[derive(Debug, Clone, Serialize, Deserialize)]
225	pub struct OptimizationSuggestion {
226	pub title: String,
227	pub description: String,
228	pub impact: OptimizationImpact,
229	pub difficulty: OptimizationDifficulty,
230	pub category: OptimizationCategory,
231	}
232
233	/// Frame statistics
234	#[derive(Debug, Clone, Serialize, Deserialize)]
235	pub struct FrameStats {
236	pub total_frames: u64,
237	pub average_frame_time: f64,
238	pub min_frame_time: f64,
239	pub max_frame_time: f64,
240	}
241
242	/// Performance report
243	#[derive(Debug, Clone, Serialize, Deserialize)]
244	pub struct PerformanceReport {
245	pub frame_stats: FrameStats,
246	pub cpu_samples: Vec<PerformanceSample>,
247	pub memory_stats: HashMap<String, u64>,
248	pub bottlenecks: Vec<Bottleneck>,
249	pub optimization_suggestions: Vec<OptimizationSuggestion>,
250	}
251
252	/// Impact level of optimization
253	#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
254	pub enum OptimizationImpact {
255	Low,
256	Medium,
257	High,
258	Critical,
259	}
260
261	/// Difficulty of implementing optimization
262	#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
263	pub enum OptimizationDifficulty {
264	Easy,
265	Medium,
266	Hard,
267	Expert,
268	}
269
270	/// Category of optimization
271	#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
272	pub enum OptimizationCategory {
273	Memory,
274	Rendering,
275	Compute,
276	IO,
277	Threading,
278	}
279
280	/// Main profiler system
281	pub struct Profiler {
282	device: Arc<ManagedDevice>,
283
284	// Sub-profilers
285	pub gpu_timer: Option<Arc<GpuTimer>>,
286	pub cpu_profiler: Arc<CpuProfiler>,
287	pub memory_profiler: Arc<MemoryProfiler>,
288	performance_analyzer: Arc<PerformanceAnalyzer>,
289
290	// Configuration
291	enabled: AtomicBool,
292	detailed_profiling: AtomicBool,
293	auto_analysis: AtomicBool,
294
295	// Current frame tracking
296	current_frame: AtomicU64,
297	frame_start_time: RwLock<Option<Instant>>,
298
299	// Statistics
300	total_frames: AtomicU64,
301	average_frame_time: RwLock<f64>,
302	min_frame_time: RwLock<f64>,
303	max_frame_time: RwLock<f64>,
304	}
305
306	impl GpuTimer {
307	/// Create a new GPU timer
308	pub fn new(device: Arc<ManagedDevice>, capacity: u32) -> Result<Self> {
309	let query_set = device.device.create_query_set(&QuerySetDescriptor {
310	label: Some("GpuTimer"),
311	ty: QueryType::Timestamp,
312	count: capacity * 2, // Start and end queries
313	});
314
315	let query_buffer = device.device.create_buffer(&BufferDescriptor {
316	label: Some("GpuTimerBuffer"),
317	size: (capacity * 2 * 8) as u64, // 8 bytes per timestamp
318	usage: BufferUsages::QUERY_RESOLVE \| BufferUsages::COPY_SRC,
319	mapped_at_creation: false,
320	});
321
322	Ok(Self {
323	device,
324	query_set,
325	query_buffer,
326	capacity,
327	current_query: AtomicU32::new(0),
328	pending_queries: RwLock::new(HashMap::new()),
329	})
330	}
331
332	/// Begin GPU timing
333	pub fn begin_timing(&self, encoder: &mut CommandEncoder, label: &str) -> Option<u32> {
334	let query_id = self.current_query.fetch_add(2, Ordering::Relaxed);
335
336	if query_id + 1 >= self.capacity * 2 {
337	return None; // Out of queries
338	}
339
340	encoder.write_timestamp(&self.query_set, query_id);
341	self.pending_queries
342	.write()
343	.insert(query_id, label.to_string());
344
345	Some(query_id)
346	}
347
348	/// End GPU timing
349	pub fn end_timing(&self, encoder: &mut CommandEncoder, query_id: u32) {
350	if query_id + 1 < self.capacity * 2 {
351	encoder.write_timestamp(&self.query_set, query_id + 1);
352	}
353	}
354
355	/// Resolve timing queries
356	pub fn resolve_queries(&self, encoder: &mut CommandEncoder) {
357	let current = self.current_query.load(Ordering::Relaxed);
358	if current > 0 {
359	encoder.resolve_query_set(&self.query_set, 0..current, &self.query_buffer, 0);
360	}
361	}
362
363	/// Get timing results (async)
364	pub async fn get_results(&self) -> Result<HashMap<String, Duration>> {
365	let mut results = HashMap::new();
366	let current = self.current_query.load(Ordering::Relaxed);
367
368	if current == 0 {
369	return Ok(results);
370	}
371
372	let buffer_slice = self.query_buffer.slice(0..(current * 8) as u64);
373	let (sender, receiver) = futures::channel::oneshot::channel();
374
375	buffer_slice.map_async(MapMode::Read, move \|result\| {
376	sender.send(result).ok();
377	});
378
379	self.device.device.poll(Maintain::Wait);
380	receiver.await??;
381
382	let data = buffer_slice.get_mapped_range();
383	let timestamps: &[u64] = bytemuck::cast_slice(&data);
384
385	let pending = self.pending_queries.read();
386	for (&query_id, label) in pending.iter() {
387	if query_id + 1 < current {
388	let start = timestamps[query_id as usize];
389	let end = timestamps[(query_id + 1) as usize];
390	let duration = Duration::from_nanos(end - start);
391	results.insert(label.clone(), duration);
392	}
393	}
394
395	drop(data);
396	self.query_buffer.unmap();
397
398	// Reset for next frame
399	self.current_query.store(0, Ordering::Relaxed);
400	self.pending_queries.write().clear();
401
402	Ok(results)
403	}
404	}
405
406	impl CpuProfiler {
407	/// Create a new CPU profiler
408	pub fn new(max_samples: usize) -> Self {
409	Self {
410	enabled: AtomicBool::new(true),
411	samples: RwLock::new(VecDeque::with_capacity(max_samples)),
412	active_timers: RwLock::new(HashMap::new()),
413	max_samples,
414	thread_local_data: ThreadLocal::new(),
415	}
416	}
417
418	/// Begin timing a section
419	pub fn begin_section(&self, name: &str) {
420	if !self.enabled.load(Ordering::Relaxed) {
421	return;
422	}
423
424	let thread_data = self.thread_local_data.get_or(\|\| {
425	Mutex::new(ThreadProfileData {
426	thread_id: 0, // Simplified - thread ID tracking removed
427	..Default::default()
428	})
429	});
430
431	let mut data = thread_data.lock().unwrap();
432	data.active_timers.insert(name.to_string(), Instant::now());
433	}
434
435	/// End timing a section
436	pub fn end_section(&self, name: &str) {
437	if !self.enabled.load(Ordering::Relaxed) {
438	return;
439	}
440
441	let thread_data = self.thread_local_data.get_or(\|\| {
442	Mutex::new(ThreadProfileData {
443	thread_id: 0, // Simplified - thread ID tracking removed
444	..Default::default()
445	})
446	});
447
448	let mut data = thread_data.lock().unwrap();
449	if let Some(start_time) = data.active_timers.remove(name) {
450	let duration = start_time.elapsed();
451	let sample = PerformanceSample {
452	timestamp: SystemTime::now()
453	.duration_since(UNIX_EPOCH)
454	.unwrap()
455	.as_nanos() as u64,
456	metric_type: MetricType::FrameTime,
457	value: duration.as_secs_f64() * 1000.0, // Convert to milliseconds
458	thread_id: data.thread_id,
459	frame_id: 0, // Will be set by profiler
460	};
461
462	data.samples.push(sample);
463	}
464	}
465
466	/// Collect samples from all threads
467	pub fn collect_samples(&self) -> Vec<PerformanceSample> {
468	let mut all_samples: Vec<PerformanceSample> = Vec::new();
469
470	for thread_data in self.thread_local_data.iter() {
471	let mut data = thread_data.lock().unwrap();
472	all_samples.extend(data.samples.drain(..));
473	}
474
475	// Add to global samples
476	let mut samples = self.samples.write();
477	for sample in &all_samples {
478	samples.push_back(sample.clone());
479	if samples.len() > self.max_samples {
480	samples.pop_front();
481	}
482	}
483
484	all_samples
485	}
486
487	/// Get average timing for a section
488	pub fn get_average_time(&self, _name: &str) -> Option<f64> {
489	let samples = self.samples.read();
490	let matching_samples: Vec<f64> = samples
491	.iter()
492	.filter(\|s\| s.metric_type == MetricType::FrameTime)
493	.map(\|s\| s.value)
494	.collect();
495
496	if matching_samples.is_empty() {
497	None
498	} else {
499	Some(matching_samples.iter().sum::<f64>() / matching_samples.len() as f64)
500	}
501	}
502	}
503
504	impl MemoryProfiler {
505	/// Create a new memory profiler
506	pub fn new() -> Self {
507	Self {
508	enabled: AtomicBool::new(true),
509	total_allocated: AtomicU64::new(0),
510	peak_allocated: AtomicU64::new(0),
511	allocation_count: AtomicU64::new(0),
512	deallocation_count: AtomicU64::new(0),
513	buffer_memory: AtomicU64::new(0),
514	texture_memory: AtomicU64::new(0),
515	pipeline_memory: AtomicU64::new(0),
516	memory_history: RwLock::new(VecDeque::with_capacity(1000)),
517	leak_detection: RwLock::new(HashMap::new()),
518	}
519	}
520
521	/// Record allocation
522	pub fn record_allocation(&self, handle: ResourceHandle, size: u64, resource_type: &str) {
523	if !self.enabled.load(Ordering::Relaxed) {
524	return;
525	}
526
527	self.total_allocated.fetch_add(size, Ordering::Relaxed);
528	self.allocation_count.fetch_add(1, Ordering::Relaxed);
529
530	// Update peak
531	let current = self.total_allocated.load(Ordering::Relaxed);
532	let peak = self.peak_allocated.load(Ordering::Relaxed);
533	if current > peak {
534	self.peak_allocated.store(current, Ordering::Relaxed);
535	}
536
537	// Update type-specific counters
538	match resource_type {
539	"buffer" => {
540	self.buffer_memory.fetch_add(size, Ordering::Relaxed);
541	}
542	"texture" => {
543	self.texture_memory.fetch_add(size, Ordering::Relaxed);
544	}
545	"pipeline" => {
546	self.pipeline_memory.fetch_add(size, Ordering::Relaxed);
547	}
548	_ => {}
549	}
550
551	// Record for leak detection
552	let allocation_info = AllocationInfo {
553	size,
554	timestamp: Instant::now(),
555	stack_trace: None, // Could be implemented with backtrace crate
556	resource_type: resource_type.to_string(),
557	};
558
559	self.leak_detection.write().insert(handle, allocation_info);
560
561	// Record sample
562	self.record_memory_sample();
563	}
564
565	/// Record deallocation
566	pub fn record_deallocation(&self, handle: ResourceHandle) {
567	if !self.enabled.load(Ordering::Relaxed) {
568	return;
569	}
570
571	if let Some(info) = self.leak_detection.write().remove(&handle) {
572	self.total_allocated.fetch_sub(info.size, Ordering::Relaxed);
573	self.deallocation_count.fetch_add(1, Ordering::Relaxed);
574
575	// Update type-specific counters
576	match info.resource_type.as_str() {
577	"buffer" => {
578	self.buffer_memory.fetch_sub(info.size, Ordering::Relaxed);
579	}
580	"texture" => {
581	self.texture_memory.fetch_sub(info.size, Ordering::Relaxed);
582	}
583	"pipeline" => {
584	self.pipeline_memory.fetch_sub(info.size, Ordering::Relaxed);
585	}
586	_ => {}
587	}
588	}
589
590	self.record_memory_sample();
591	}
592
593	/// Record memory sample
594	fn record_memory_sample(&self) {
595	let sample = MemorySample {
596	timestamp: SystemTime::now()
597	.duration_since(UNIX_EPOCH)
598	.unwrap()
599	.as_nanos() as u64,
600	total_allocated: self.total_allocated.load(Ordering::Relaxed),
601	buffer_memory: self.buffer_memory.load(Ordering::Relaxed),
602	texture_memory: self.texture_memory.load(Ordering::Relaxed),
603	pipeline_memory: self.pipeline_memory.load(Ordering::Relaxed),
604	};
605
606	let mut history = self.memory_history.write();
607	history.push_back(sample);
608	if history.len() > 1000 {
609	history.pop_front();
610	}
611	}
612
613	/// Detect memory leaks
614	pub fn detect_leaks(&self, max_age: Duration) -> Vec<ResourceHandle> {
615	let now = Instant::now();
616	let leak_detection = self.leak_detection.read();
617
618	leak_detection
619	.iter()
620	.filter(\|(_, info)\| now.duration_since(info.timestamp) > max_age)
621	.map(\|(&handle, _)\| handle)
622	.collect()
623	}
624
625	/// Get memory statistics
626	pub fn get_stats(&self) -> HashMap<String, u64> {
627	let mut stats = HashMap::new();
628	stats.insert(
629	"total_allocated".to_string(),
630	self.total_allocated.load(Ordering::Relaxed),
631	);
632	stats.insert(
633	"peak_allocated".to_string(),
634	self.peak_allocated.load(Ordering::Relaxed),
635	);
636	stats.insert(
637	"allocation_count".to_string(),
638	self.allocation_count.load(Ordering::Relaxed),
639	);
640	stats.insert(
641	"deallocation_count".to_string(),
642	self.deallocation_count.load(Ordering::Relaxed),
643	);
644	stats.insert(
645	"buffer_memory".to_string(),
646	self.buffer_memory.load(Ordering::Relaxed),
647	);
648	stats.insert(
649	"texture_memory".to_string(),
650	self.texture_memory.load(Ordering::Relaxed),
651	);
652	stats.insert(
653	"pipeline_memory".to_string(),
654	self.pipeline_memory.load(Ordering::Relaxed),
655	);
656	stats
657	}
658	}
659
660	impl PerformanceAnalyzer {
661	/// Create a new performance analyzer
662	pub fn new() -> Self {
663	Self {
664	frame_history: RwLock::new(VecDeque::with_capacity(1000)),
665	bottleneck_detector: BottleneckDetector::new(),
666	regression_detector: RegressionDetector::new(),
667	optimization_suggestions: RwLock::new(Vec::new()),
668	analysis_enabled: AtomicBool::new(true),
669	}
670	}
671
672	/// Analyze frame performance
673	pub fn analyze_frame(&self, _frame_timing: &FrameTiming) {
674	// Placeholder for frame analysis
675	}
676
677	/// Analyze frame timing
678	pub fn analyze_frame_timing(&self, _frame_time: Duration) {
679	// Placeholder for frame timing analysis
680	}
681
682	/// Get detected bottlenecks
683	pub fn get_bottlenecks(&self) -> Vec<Bottleneck> {
684	self.bottleneck_detector.detected_bottlenecks.read().clone()
685	}
686
687	/// Get optimization suggestions
688	pub fn get_optimization_suggestions(&self) -> Vec<OptimizationSuggestion> {
689	self.optimization_suggestions.read().clone()
690	}
691
692	/// Generate optimization suggestions (moved from duplicate impl)
693	fn generate_optimization_suggestions(&self, frame_time: Duration) {
694	let frame_time_ms = frame_time.as_secs_f64() * 1000.0;
695
696	let mut suggestions = self.optimization_suggestions.write();
697	suggestions.clear();
698
699	if frame_time_ms > 16.67 {
700	// 60 FPS threshold
701	suggestions.push(OptimizationSuggestion {
702	title: "Frame time exceeds 60 FPS target".to_string(),
703	description: "Consider reducing draw calls or optimizing shaders".to_string(),
704	impact: OptimizationImpact::High,
705	difficulty: OptimizationDifficulty::Medium,
706	category: OptimizationCategory::Rendering,
707	});
708	}
709
710	if frame_time_ms > 33.33 {
711	// 30 FPS threshold
712	suggestions.push(OptimizationSuggestion {
713	title: "Critical performance issue detected".to_string(),
714	description: "Frame time is critically high, immediate optimization required"
715	.to_string(),
716	impact: OptimizationImpact::Critical,
717	difficulty: OptimizationDifficulty::Hard,
718	category: OptimizationCategory::Rendering,
719	});
720	}
721	}
722	}
723
724	impl BottleneckDetector {
725	pub fn new() -> Self {
726	Self {
727	cpu_threshold: 16.0, // 16ms
728	gpu_threshold: 16.0, // 16ms
729	memory_threshold: 0.8, // 80%
730	detected_bottlenecks: RwLock::new(Vec::new()),
731	}
732	}
733
734	/// Analyze frame time for bottlenecks
735	pub fn analyze_frame_time(&self, frame_time: Duration) {
736	let frame_time_ms = frame_time.as_secs_f64() * 1000.0;
737
738	let mut bottlenecks = self.detected_bottlenecks.write();
739	bottlenecks.clear();
740
741	if frame_time_ms > self.cpu_threshold {
742	bottlenecks.push(Bottleneck {
743	bottleneck_type: BottleneckType::CpuBound,
744	severity: (frame_time_ms / self.cpu_threshold) as f32,
745	description: "CPU processing is taking too long".to_string(),
746	suggested_fix: "Optimize CPU-bound operations or use multithreading".to_string(),
747	detected_at: SystemTime::now()
748	.duration_since(UNIX_EPOCH)
749	.unwrap()
750	.as_nanos() as u64,
751	});
752	}
753	}
754	}
755
756	impl RegressionDetector {
757	pub fn new() -> Self {
758	Self {
759	baseline_metrics: RwLock::new(HashMap::new()),
760	regression_threshold: 0.1, // 10% regression
761	detected_regressions: RwLock::new(Vec::new()),
762	}
763	}
764
765	/// Check for performance regression
766	pub fn check_regression(&self, metric_type: MetricType, current_value: f64) {
767	let mut baselines = self.baseline_metrics.write();
768
769	if let Some(&baseline) = baselines.get(&metric_type) {
770	let regression = (current_value - baseline) / baseline;
771
772	if regression > self.regression_threshold {
773	let mut regressions = self.detected_regressions.write();
774	regressions.push(PerformanceRegression {
775	metric_type,
776	baseline_value: baseline,
777	current_value,
778	regression_percentage: regression * 100.0,
779	detected_at: SystemTime::now()
780	.duration_since(UNIX_EPOCH)
781	.unwrap()
782	.as_nanos() as u64,
783	});
784	}
785	} else {
786	// Set as baseline if not exists
787	baselines.insert(metric_type, current_value);
788	}
789	}
790	}
791
792	impl Profiler {
793	/// Create a new profiler
794	pub fn new(device: Arc<ManagedDevice>) -> Result<Self> {
795	let gpu_timer = if device.device.features().contains(Features::TIMESTAMP_QUERY) {
796	Some(Arc::new(GpuTimer::new(device.clone(), 1000)?))
797	} else {
798	warn!("Timestamp queries not enabled on device. GPU profiling disabled.");
799	None
800	};
801
802	let cpu_profiler = Arc::new(CpuProfiler::new(10000));
803	let memory_profiler = Arc::new(MemoryProfiler::new());
804	let performance_analyzer = Arc::new(PerformanceAnalyzer::new());
805
806	Ok(Self {
807	device,
808	gpu_timer,
809	cpu_profiler,
810	memory_profiler,
811	performance_analyzer,
812	enabled: AtomicBool::new(true),
813	detailed_profiling: AtomicBool::new(false),
814	auto_analysis: AtomicBool::new(true),
815	current_frame: AtomicU64::new(0),
816	frame_start_time: RwLock::new(None),
817	total_frames: AtomicU64::new(0),
818	average_frame_time: RwLock::new(0.0),
819	min_frame_time: RwLock::new(f64::MAX),
820	max_frame_time: RwLock::new(0.0),
821	})
822	}
823
824	/// Begin frame profiling
825	pub fn begin_frame(&self) {
826	if !self.enabled.load(Ordering::Relaxed) {
827	return;
828	}
829
830	let frame_id = self.current_frame.fetch_add(1, Ordering::Relaxed);
831	*self.frame_start_time.write() = Some(Instant::now());
832
833	self.cpu_profiler.begin_section("frame");
834
835	debug!("Begin frame {}", frame_id);
836	}
837
838	/// End frame profiling
839	pub fn end_frame(&self) {
840	if !self.enabled.load(Ordering::Relaxed) {
841	return;
842	}
843
844	self.cpu_profiler.end_section("frame");
845
846	if let Some(start_time) = *self.frame_start_time.read() {
847	let frame_time = start_time.elapsed();
848	let frame_time_ms = frame_time.as_secs_f64() * 1000.0;
849
850	// Update statistics
851	self.total_frames.fetch_add(1, Ordering::Relaxed);
852
853	let mut avg = self.average_frame_time.write();
854	let total = self.total_frames.load(Ordering::Relaxed) as f64;
855	avg = (avg * (total - 1.0) + frame_time_ms) / total;
856
857	let mut min = self.min_frame_time.write();
858	if frame_time_ms < *min {
859	*min = frame_time_ms;
860	}
861
862	let mut max = self.max_frame_time.write();
863	if frame_time_ms > *max {
864	*max = frame_time_ms;
865	}
866
867	// Analyze performance if enabled
868	if self.auto_analysis.load(Ordering::Relaxed) {
869	self.performance_analyzer.analyze_frame_timing(frame_time);
870	}
871
872	inspector::inspector().record_frame_timeline(
873	self.current_frame.load(Ordering::Relaxed),
874	frame_time,
875	Duration::ZERO,
876	Some("CPU frame end".to_string()),
877	);
878	}
879	}
880
881	/// Begin GPU timing
882	pub fn begin_gpu_timing(&self, encoder: &mut CommandEncoder, label: &str) -> Option<u32> {
883	if self.enabled.load(Ordering::Relaxed) {
884	if let Some(timer) = &self.gpu_timer {
885	timer.begin_timing(encoder, label)
886	} else {
887	None
888	}
889	} else {
890	None
891	}
892	}
893
894	/// End GPU timing
895	pub fn end_gpu_timing(&self, encoder: &mut CommandEncoder, query_id: u32) {
896	if self.enabled.load(Ordering::Relaxed) {
897	if let Some(timer) = &self.gpu_timer {
898	timer.end_timing(encoder, query_id);
899	}
900	}
901	}
902
903	/// Get comprehensive performance report
904	pub fn get_performance_report(&self) -> PerformanceReport {
905	let cpu_samples = self.cpu_profiler.collect_samples();
906	let memory_stats = self.memory_profiler.get_stats();
907	let bottlenecks = self.performance_analyzer.get_bottlenecks();
908	let suggestions = self.performance_analyzer.get_optimization_suggestions();
909
910	PerformanceReport {
911	frame_stats: FrameStats {
912	total_frames: self.total_frames.load(Ordering::Relaxed),
913	average_frame_time: *self.average_frame_time.read(),
914	min_frame_time: *self.min_frame_time.read(),
915	max_frame_time: *self.max_frame_time.read(),
916	},
917	cpu_samples,
918	memory_stats,
919	bottlenecks,
920	optimization_suggestions: suggestions,
921	}
922	}
923
924	/// Enable/disable profiling
925	pub fn set_enabled(&self, enabled: bool) {
926	self.enabled.store(enabled, Ordering::Relaxed);
927	}
928
929	/// Enable/disable detailed profiling
930	pub fn set_detailed_profiling(&self, enabled: bool) {
931	self.detailed_profiling.store(enabled, Ordering::Relaxed);
932	}
933	}
934
935	// All implementations have been consolidated above
936

Seregon/StratoSDK