StratoSDK is a framework with a declarative approach similar to Flutter/React, written and designed entirely for Rust.
| 1 | //! Advanced GPU device management system |
| 2 | //! |
| 3 | //! This module provides GPU device management including: |
| 4 | //! - Multi-adapter support with intelligent selection |
| 5 | //! - Hardware capability detection and optimization |
| 6 | //! - Automatic fallback mechanisms for compatibility |
| 7 | //! - Device loss recovery and hot-swapping |
| 8 | //! - Power management and thermal monitoring |
| 9 | //! - Vendor-specific optimizations (NVIDIA, AMD, Intel, Apple) |
| 10 | |
| 11 | use anyhow::{bail, Result}; |
| 12 | use parking_lot::RwLock; |
| 13 | use serde::{Deserialize, Serialize}; |
| 14 | use std::collections::HashMap; |
| 15 | use std::sync::{ |
| 16 | atomic::{AtomicBool, AtomicU64, Ordering}, |
| 17 | Arc, |
| 18 | }; |
| 19 | use std::time::{Duration, Instant}; |
| 20 | use strato_core::{logging::LogCategory, strato_debug, strato_error_rate_limited, strato_warn}; |
| 21 | use tracing::{debug, info, instrument, warn}; |
| 22 | use wgpu::{ |
| 23 | Adapter, Backends, Device, DeviceDescriptor, DeviceType, Dx12Compiler, Features, |
| 24 | Gles3MinorVersion, Instance, InstanceDescriptor, InstanceFlags, Limits, PowerPreference, Queue, |
| 25 | RequestAdapterOptions, RequestDeviceError, Surface, SurfaceConfiguration, |
| 26 | }; |
| 27 | |
| 28 | /// GPU vendor identification |
| 29 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] |
| 30 | pub enum GpuVendor { |
| 31 | Nvidia, |
| 32 | Amd, |
| 33 | Intel, |
| 34 | Apple, |
| 35 | Qualcomm, |
| 36 | ARM, |
| 37 | Unknown, |
| 38 | } |
| 39 | |
| 40 | impl From<u32> for GpuVendor { |
| 41 | fn from(vendor_id: u32) -> Self { |
| 42 | match vendor_id { |
| 43 | 0x10DE => GpuVendor::Nvidia, |
| 44 | 0x1002 | 0x1022 => GpuVendor::Amd, |
| 45 | 0x8086 => GpuVendor::Intel, |
| 46 | 0x106B => GpuVendor::Apple, |
| 47 | 0x5143 => GpuVendor::Qualcomm, |
| 48 | 0x13B5 => GpuVendor::ARM, |
| 49 | _ => GpuVendor::Unknown, |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | /// GPU performance tier classification |
| 55 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] |
| 56 | pub enum PerformanceTier { |
| 57 | Integrated = 0, |
| 58 | Entry = 1, |
| 59 | Mainstream = 2, |
| 60 | HighEnd = 3, |
| 61 | Enthusiast = 4, |
| 62 | Professional = 5, |
| 63 | } |
| 64 | |
| 65 | /// AdapterInfo re-export for public API |
| 66 | pub use wgpu::AdapterInfo; |
| 67 | |
| 68 | /// Comprehensive GPU capabilities and characteristics |
| 69 | #[derive(Debug, Clone)] |
| 70 | pub struct GpuCapabilities { |
| 71 | pub vendor: GpuVendor, |
| 72 | pub device_name: String, |
| 73 | pub device_id: u32, |
| 74 | pub vendor_id: u32, |
| 75 | pub performance_tier: PerformanceTier, |
| 76 | pub memory_size: u64, |
| 77 | pub memory_bandwidth: Option<u64>, |
| 78 | pub compute_units: Option<u32>, |
| 79 | pub base_clock: Option<u32>, |
| 80 | pub boost_clock: Option<u32>, |
| 81 | pub supports_ray_tracing: bool, |
| 82 | pub supports_mesh_shaders: bool, |
| 83 | pub supports_variable_rate_shading: bool, |
| 84 | pub max_texture_size: u32, |
| 85 | pub max_texture_array_layers: u32, |
| 86 | pub max_bind_groups: u32, |
| 87 | pub max_dynamic_uniform_buffers: u32, |
| 88 | pub max_storage_buffers: u32, |
| 89 | pub max_sampled_textures: u32, |
| 90 | pub max_samplers: u32, |
| 91 | pub max_storage_textures: u32, |
| 92 | pub max_vertex_buffers: u32, |
| 93 | pub max_vertex_attributes: u32, |
| 94 | pub max_push_constant_size: u32, |
| 95 | pub timestamp_period: f32, |
| 96 | pub supported_features: Features, |
| 97 | pub limits: Limits, |
| 98 | } |
| 99 | |
| 100 | impl GpuCapabilities { |
| 101 | /// Create capabilities from adapter info and limits |
| 102 | pub fn from_adapter(adapter: &Adapter) -> Self { |
| 103 | let info = adapter.get_info(); |
| 104 | let limits = adapter.limits(); |
| 105 | let features = adapter.features(); |
| 106 | |
| 107 | let vendor = GpuVendor::from(info.vendor); |
| 108 | let performance_tier = Self::classify_performance_tier(&info, &limits); |
| 109 | |
| 110 | Self { |
| 111 | vendor, |
| 112 | device_name: info.name.clone(), |
| 113 | device_id: info.device, |
| 114 | vendor_id: info.vendor, |
| 115 | performance_tier, |
| 116 | memory_size: Self::estimate_memory_size(&info, &limits), |
| 117 | memory_bandwidth: Self::estimate_memory_bandwidth(&info, &limits), |
| 118 | compute_units: Self::estimate_compute_units(&info), |
| 119 | base_clock: None, // Would need vendor-specific APIs |
| 120 | boost_clock: None, |
| 121 | supports_ray_tracing: features.contains(Features::RAY_TRACING_ACCELERATION_STRUCTURE), |
| 122 | supports_mesh_shaders: false, // Features::EXPERIMENTAL_FEATURES removed in newer wgpu |
| 123 | supports_variable_rate_shading: false, // Not exposed in wgpu yet |
| 124 | max_texture_size: limits.max_texture_dimension_2d, |
| 125 | max_texture_array_layers: limits.max_texture_array_layers, |
| 126 | max_bind_groups: limits.max_bind_groups, |
| 127 | max_dynamic_uniform_buffers: limits.max_dynamic_uniform_buffers_per_pipeline_layout, |
| 128 | max_storage_buffers: limits.max_storage_buffers_per_shader_stage, |
| 129 | max_sampled_textures: limits.max_sampled_textures_per_shader_stage, |
| 130 | max_samplers: limits.max_samplers_per_shader_stage, |
| 131 | max_storage_textures: limits.max_storage_textures_per_shader_stage, |
| 132 | max_vertex_buffers: limits.max_vertex_buffers, |
| 133 | max_vertex_attributes: limits.max_vertex_attributes, |
| 134 | max_push_constant_size: limits.max_push_constant_size, |
| 135 | timestamp_period: 1.0, // timestamp_period field removed, using default |
| 136 | supported_features: features, |
| 137 | limits, |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | fn classify_performance_tier(info: &AdapterInfo, limits: &Limits) -> PerformanceTier { |
| 142 | let memory_score = (limits.max_buffer_size / (1024 * 1024 * 1024)) as u32; // GB |
| 143 | let compute_score = |
| 144 | limits.max_compute_workgroup_size_x * limits.max_compute_workgroup_size_y; |
| 145 | |
| 146 | match info.device_type { |
| 147 | DeviceType::DiscreteGpu => { |
| 148 | if memory_score >= 16 && compute_score >= 1024 * 1024 { |
| 149 | PerformanceTier::Professional |
| 150 | } else if memory_score >= 8 && compute_score >= 512 * 512 { |
| 151 | PerformanceTier::Enthusiast |
| 152 | } else if memory_score >= 4 { |
| 153 | PerformanceTier::HighEnd |
| 154 | } else { |
| 155 | PerformanceTier::Mainstream |
| 156 | } |
| 157 | } |
| 158 | DeviceType::IntegratedGpu => { |
| 159 | if memory_score >= 4 { |
| 160 | PerformanceTier::Mainstream |
| 161 | } else { |
| 162 | PerformanceTier::Integrated |
| 163 | } |
| 164 | } |
| 165 | DeviceType::VirtualGpu => PerformanceTier::Entry, |
| 166 | DeviceType::Cpu => PerformanceTier::Entry, |
| 167 | DeviceType::Other => PerformanceTier::Entry, |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | fn estimate_memory_size(info: &AdapterInfo, limits: &Limits) -> u64 { |
| 172 | // Rough estimation based on buffer limits and device type |
| 173 | match info.device_type { |
| 174 | DeviceType::DiscreteGpu => { |
| 175 | let buffer_limit = limits.max_buffer_size; |
| 176 | // Discrete GPUs typically have dedicated VRAM |
| 177 | std::cmp::min(buffer_limit, 32 * 1024 * 1024 * 1024) // Cap at 32GB |
| 178 | } |
| 179 | DeviceType::IntegratedGpu => { |
| 180 | // Integrated GPUs share system memory |
| 181 | std::cmp::min(limits.max_buffer_size, 8 * 1024 * 1024 * 1024) // Cap at 8GB |
| 182 | } |
| 183 | _ => limits.max_buffer_size, |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | fn estimate_memory_bandwidth(_info: &AdapterInfo, _limits: &Limits) -> Option<u64> { |
| 188 | // Would need vendor-specific APIs or lookup tables |
| 189 | None |
| 190 | } |
| 191 | |
| 192 | fn estimate_compute_units(info: &AdapterInfo) -> Option<u32> { |
| 193 | // Would need vendor-specific detection |
| 194 | match GpuVendor::from(info.vendor) { |
| 195 | GpuVendor::Nvidia => { |
| 196 | // Could parse device name for SM count |
| 197 | None |
| 198 | } |
| 199 | GpuVendor::Amd => { |
| 200 | // Could parse device name for CU count |
| 201 | None |
| 202 | } |
| 203 | _ => None, |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | /// Get vendor-specific optimization hints |
| 208 | pub fn get_optimization_hints(&self) -> OptimizationHints { |
| 209 | match self.vendor { |
| 210 | GpuVendor::Nvidia => OptimizationHints { |
| 211 | preferred_workgroup_size: (32, 1, 1), // Warp size |
| 212 | prefers_texture_arrays: true, |
| 213 | supports_async_compute: true, |
| 214 | memory_coalescing_alignment: 128, |
| 215 | preferred_buffer_alignment: 256, |
| 216 | supports_fast_math: true, |
| 217 | }, |
| 218 | GpuVendor::Amd => OptimizationHints { |
| 219 | preferred_workgroup_size: (64, 1, 1), // Wavefront size |
| 220 | prefers_texture_arrays: true, |
| 221 | supports_async_compute: true, |
| 222 | memory_coalescing_alignment: 256, |
| 223 | preferred_buffer_alignment: 256, |
| 224 | supports_fast_math: true, |
| 225 | }, |
| 226 | GpuVendor::Intel => OptimizationHints { |
| 227 | preferred_workgroup_size: (16, 1, 1), // EU thread group |
| 228 | prefers_texture_arrays: false, |
| 229 | supports_async_compute: false, |
| 230 | memory_coalescing_alignment: 64, |
| 231 | preferred_buffer_alignment: 64, |
| 232 | supports_fast_math: false, |
| 233 | }, |
| 234 | GpuVendor::Apple => OptimizationHints { |
| 235 | preferred_workgroup_size: (32, 1, 1), // SIMD group size |
| 236 | prefers_texture_arrays: true, |
| 237 | supports_async_compute: true, |
| 238 | memory_coalescing_alignment: 16, |
| 239 | preferred_buffer_alignment: 16, |
| 240 | supports_fast_math: true, |
| 241 | }, |
| 242 | _ => OptimizationHints::default(), |
| 243 | } |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | /// Vendor-specific optimization hints |
| 248 | #[derive(Debug, Clone)] |
| 249 | pub struct OptimizationHints { |
| 250 | pub preferred_workgroup_size: (u32, u32, u32), |
| 251 | pub prefers_texture_arrays: bool, |
| 252 | pub supports_async_compute: bool, |
| 253 | pub memory_coalescing_alignment: u32, |
| 254 | pub preferred_buffer_alignment: u32, |
| 255 | pub supports_fast_math: bool, |
| 256 | } |
| 257 | |
| 258 | impl Default for OptimizationHints { |
| 259 | fn default() -> Self { |
| 260 | Self { |
| 261 | preferred_workgroup_size: (64, 1, 1), |
| 262 | prefers_texture_arrays: true, |
| 263 | supports_async_compute: false, |
| 264 | memory_coalescing_alignment: 128, |
| 265 | preferred_buffer_alignment: 256, |
| 266 | supports_fast_math: false, |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | /// Device selection criteria for automatic adapter selection |
| 272 | #[derive(Debug, Clone)] |
| 273 | pub struct DeviceSelectionCriteria { |
| 274 | pub prefer_discrete_gpu: bool, |
| 275 | pub min_memory_size: u64, |
| 276 | pub required_features: Features, |
| 277 | pub preferred_vendor: Option<GpuVendor>, |
| 278 | pub min_performance_tier: PerformanceTier, |
| 279 | pub require_timestamp_queries: bool, |
| 280 | pub require_pipeline_statistics: bool, |
| 281 | } |
| 282 | |
| 283 | impl Default for DeviceSelectionCriteria { |
| 284 | fn default() -> Self { |
| 285 | Self { |
| 286 | prefer_discrete_gpu: true, |
| 287 | min_memory_size: 1024 * 1024 * 1024, // 1GB |
| 288 | required_features: Features::empty(), |
| 289 | preferred_vendor: None, |
| 290 | min_performance_tier: PerformanceTier::Integrated, |
| 291 | require_timestamp_queries: false, |
| 292 | require_pipeline_statistics: false, |
| 293 | } |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | /// Device health monitoring |
| 298 | #[derive(Debug)] |
| 299 | pub struct DeviceHealth { |
| 300 | pub is_lost: AtomicBool, |
| 301 | pub last_error: RwLock<Option<String>>, |
| 302 | pub error_count: AtomicU64, |
| 303 | pub last_successful_operation: RwLock<Instant>, |
| 304 | pub temperature: RwLock<Option<f32>>, |
| 305 | pub power_usage: RwLock<Option<f32>>, |
| 306 | pub memory_usage: RwLock<Option<f32>>, |
| 307 | } |
| 308 | |
| 309 | impl Default for DeviceHealth { |
| 310 | fn default() -> Self { |
| 311 | Self { |
| 312 | is_lost: AtomicBool::new(false), |
| 313 | last_error: RwLock::new(None), |
| 314 | error_count: AtomicU64::new(0), |
| 315 | last_successful_operation: RwLock::new(Instant::now()), |
| 316 | temperature: RwLock::new(None), |
| 317 | power_usage: RwLock::new(None), |
| 318 | memory_usage: RwLock::new(None), |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | /// Advanced GPU device manager |
| 324 | #[derive(Debug)] |
| 325 | pub struct DeviceManager { |
| 326 | instance: Instance, |
| 327 | adapters: Vec<(Adapter, GpuCapabilities)>, |
| 328 | active_device: RwLock<Option<Arc<ManagedDevice>>>, |
| 329 | active_adapter_index: RwLock<Option<usize>>, |
| 330 | device_health: Arc<DeviceHealth>, |
| 331 | selection_criteria: RwLock<DeviceSelectionCriteria>, |
| 332 | fallback_chain: RwLock<Vec<usize>>, // Indices into adapters |
| 333 | monitoring_enabled: AtomicBool, |
| 334 | } |
| 335 | |
| 336 | /// Managed device wrapper with additional metadata |
| 337 | #[derive(Debug)] |
| 338 | pub struct ManagedDevice { |
| 339 | pub device: Device, |
| 340 | pub queue: Queue, |
| 341 | pub capabilities: GpuCapabilities, |
| 342 | pub optimization_hints: OptimizationHints, |
| 343 | pub creation_time: Instant, |
| 344 | pub health: Arc<DeviceHealth>, |
| 345 | } |
| 346 | |
| 347 | impl DeviceManager { |
| 348 | /// Get all available adapters |
| 349 | pub fn adapters(&self) -> &[(Adapter, GpuCapabilities)] { |
| 350 | &self.adapters |
| 351 | } |
| 352 | |
| 353 | /// Create a new device manager |
| 354 | #[instrument(skip(instance, surface))] |
| 355 | pub async fn new(instance: Option<Instance>, surface: Option<&Surface<'_>>) -> Result<Self> { |
| 356 | let instance = instance.unwrap_or_else(|| { |
| 357 | Instance::new(InstanceDescriptor { |
| 358 | backends: Backends::all(), |
| 359 | flags: InstanceFlags::default(), |
| 360 | dx12_shader_compiler: Dx12Compiler::Fxc, |
| 361 | gles_minor_version: Gles3MinorVersion::Automatic, |
| 362 | }) |
| 363 | }); |
| 364 | |
| 365 | info!("Enumerating GPU adapters..."); |
| 366 | let adapters = Self::enumerate_adapters(&instance, surface).await?; |
| 367 | |
| 368 | if adapters.is_empty() { |
| 369 | bail!("No compatible GPU adapters found"); |
| 370 | } |
| 371 | |
| 372 | info!("Found {} compatible GPU adapter(s)", adapters.len()); |
| 373 | for (i, (_, caps)) in adapters.iter().enumerate() { |
| 374 | info!( |
| 375 | " [{}] {} ({:?}, {:?})", |
| 376 | i, caps.device_name, caps.vendor, caps.performance_tier |
| 377 | ); |
| 378 | } |
| 379 | |
| 380 | let fallback_chain = Self::create_fallback_chain(&adapters); |
| 381 | |
| 382 | Ok(Self { |
| 383 | instance, |
| 384 | adapters, |
| 385 | active_device: RwLock::new(None), |
| 386 | active_adapter_index: RwLock::new(None), |
| 387 | device_health: Arc::new(DeviceHealth::default()), |
| 388 | selection_criteria: RwLock::new(DeviceSelectionCriteria::default()), |
| 389 | fallback_chain: RwLock::new(fallback_chain), |
| 390 | monitoring_enabled: AtomicBool::new(true), |
| 391 | }) |
| 392 | } |
| 393 | |
| 394 | /// Enumerate and analyze all available adapters |
| 395 | async fn enumerate_adapters( |
| 396 | instance: &Instance, |
| 397 | surface: Option<&Surface<'_>>, |
| 398 | ) -> Result<Vec<(Adapter, GpuCapabilities)>> { |
| 399 | let mut adapters = Vec::new(); |
| 400 | |
| 401 | // Try all power preferences to find all adapters |
| 402 | for power_pref in [PowerPreference::HighPerformance, PowerPreference::LowPower] { |
| 403 | if let Some(adapter) = instance |
| 404 | .request_adapter(&RequestAdapterOptions { |
| 405 | power_preference: power_pref, |
| 406 | compatible_surface: surface, |
| 407 | force_fallback_adapter: false, |
| 408 | }) |
| 409 | .await |
| 410 | { |
| 411 | let capabilities = GpuCapabilities::from_adapter(&adapter); |
| 412 | |
| 413 | // Check if we already have this adapter |
| 414 | if !adapters |
| 415 | .iter() |
| 416 | .any(|(_, caps): &(Adapter, GpuCapabilities)| { |
| 417 | caps.device_id == capabilities.device_id |
| 418 | && caps.vendor_id == capabilities.vendor_id |
| 419 | }) |
| 420 | { |
| 421 | adapters.push((adapter, capabilities)); |
| 422 | } |
| 423 | } |
| 424 | } |
| 425 | |
| 426 | // Also try fallback adapter |
| 427 | if let Some(adapter) = instance |
| 428 | .request_adapter(&RequestAdapterOptions { |
| 429 | power_preference: PowerPreference::default(), |
| 430 | compatible_surface: surface, |
| 431 | force_fallback_adapter: true, |
| 432 | }) |
| 433 | .await |
| 434 | { |
| 435 | let capabilities = GpuCapabilities::from_adapter(&adapter); |
| 436 | |
| 437 | if !adapters.iter().any(|(_, caps)| { |
| 438 | caps.device_id == capabilities.device_id && caps.vendor_id == capabilities.vendor_id |
| 439 | }) { |
| 440 | adapters.push((adapter, capabilities)); |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | Ok(adapters) |
| 445 | } |
| 446 | |
| 447 | /// Create fallback chain ordered by preference |
| 448 | fn create_fallback_chain(adapters: &[(Adapter, GpuCapabilities)]) -> Vec<usize> { |
| 449 | let mut indices: Vec<usize> = (0..adapters.len()).collect(); |
| 450 | |
| 451 | // Sort by performance tier (descending), then by memory size (descending) |
| 452 | indices.sort_by(|&a, &b| { |
| 453 | let caps_a = &adapters[a].1; |
| 454 | let caps_b = &adapters[b].1; |
| 455 | |
| 456 | caps_b |
| 457 | .performance_tier |
| 458 | .cmp(&caps_a.performance_tier) |
| 459 | .then(caps_b.memory_size.cmp(&caps_a.memory_size)) |
| 460 | }); |
| 461 | |
| 462 | indices |
| 463 | } |
| 464 | |
| 465 | /// Initialize device with automatic selection |
| 466 | #[instrument] |
| 467 | pub async fn initialize_device(&self) -> Result<Arc<ManagedDevice>> { |
| 468 | let criteria = self.selection_criteria.read().clone(); |
| 469 | self.initialize_device_with_criteria(criteria).await |
| 470 | } |
| 471 | |
| 472 | /// Initialize device with specific criteria |
| 473 | #[instrument] |
| 474 | pub async fn initialize_device_with_criteria( |
| 475 | &self, |
| 476 | criteria: DeviceSelectionCriteria, |
| 477 | ) -> Result<Arc<ManagedDevice>> { |
| 478 | let fallback_chain = self.fallback_chain.read().clone(); |
| 479 | |
| 480 | for &adapter_idx in &fallback_chain { |
| 481 | let (adapter, capabilities) = &self.adapters[adapter_idx]; |
| 482 | |
| 483 | if !Self::meets_criteria(capabilities, &criteria) { |
| 484 | debug!( |
| 485 | "Adapter {} doesn't meet criteria, skipping", |
| 486 | capabilities.device_name |
| 487 | ); |
| 488 | continue; |
| 489 | } |
| 490 | |
| 491 | match self.create_device(adapter, capabilities, &criteria).await { |
| 492 | Ok(device) => { |
| 493 | info!( |
| 494 | "Successfully initialized device: {}", |
| 495 | capabilities.device_name |
| 496 | ); |
| 497 | let managed_device = Arc::new(device); |
| 498 | *self.active_device.write() = Some(managed_device.clone()); |
| 499 | *self.active_adapter_index.write() = Some(adapter_idx); |
| 500 | return Ok(managed_device); |
| 501 | } |
| 502 | Err(e) => { |
| 503 | warn!( |
| 504 | "Failed to create device {}: {}", |
| 505 | capabilities.device_name, e |
| 506 | ); |
| 507 | continue; |
| 508 | } |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | bail!("Failed to initialize any compatible device"); |
| 513 | } |
| 514 | |
| 515 | /// Check if capabilities meet selection criteria |
| 516 | fn meets_criteria(capabilities: &GpuCapabilities, criteria: &DeviceSelectionCriteria) -> bool { |
| 517 | if capabilities.memory_size < criteria.min_memory_size { |
| 518 | return false; |
| 519 | } |
| 520 | |
| 521 | if capabilities.performance_tier < criteria.min_performance_tier { |
| 522 | return false; |
| 523 | } |
| 524 | |
| 525 | if !capabilities |
| 526 | .supported_features |
| 527 | .contains(criteria.required_features) |
| 528 | { |
| 529 | return false; |
| 530 | } |
| 531 | |
| 532 | if let Some(preferred_vendor) = criteria.preferred_vendor { |
| 533 | if capabilities.vendor != preferred_vendor { |
| 534 | return false; |
| 535 | } |
| 536 | } |
| 537 | |
| 538 | if criteria.require_timestamp_queries |
| 539 | && !capabilities |
| 540 | .supported_features |
| 541 | .contains(Features::TIMESTAMP_QUERY) |
| 542 | { |
| 543 | return false; |
| 544 | } |
| 545 | |
| 546 | if criteria.require_pipeline_statistics |
| 547 | && !capabilities |
| 548 | .supported_features |
| 549 | .contains(Features::PIPELINE_STATISTICS_QUERY) |
| 550 | { |
| 551 | return false; |
| 552 | } |
| 553 | |
| 554 | true |
| 555 | } |
| 556 | |
| 557 | /// Create managed device from adapter |
| 558 | async fn create_device( |
| 559 | &self, |
| 560 | adapter: &Adapter, |
| 561 | capabilities: &GpuCapabilities, |
| 562 | criteria: &DeviceSelectionCriteria, |
| 563 | ) -> Result<ManagedDevice> { |
| 564 | info!("Creating device for adapter: {}", capabilities.device_name); |
| 565 | |
| 566 | let mut required_features = criteria.required_features; |
| 567 | |
| 568 | // Enable timestamp queries if requested |
| 569 | if criteria.require_timestamp_queries { |
| 570 | required_features |= Features::TIMESTAMP_QUERY; |
| 571 | } |
| 572 | |
| 573 | // Enable pipeline statistics if requested |
| 574 | if criteria.require_pipeline_statistics { |
| 575 | required_features |= Features::PIPELINE_STATISTICS_QUERY; |
| 576 | } |
| 577 | |
| 578 | let required_limits = Limits::default(); |
| 579 | |
| 580 | // Set up error callback for Vulkan validation errors |
| 581 | let device_descriptor = DeviceDescriptor { |
| 582 | label: Some(&format!("StratoUI Device - {}", capabilities.device_name)), |
| 583 | required_features, |
| 584 | required_limits: required_limits.clone(), |
| 585 | }; |
| 586 | |
| 587 | match adapter.request_device(&device_descriptor, None).await { |
| 588 | Ok((device, queue)) => { |
| 589 | // Set up error callback to handle Vulkan validation errors with rate limiting |
| 590 | device.on_uncaptured_error(Box::new(|error| { |
| 591 | match error { |
| 592 | wgpu::Error::Validation { description, .. } => { |
| 593 | // Rate limit Vulkan validation errors, especially VUID-vkQueueSubmit |
| 594 | if description.contains("VUID-vkQueueSubmit") |
| 595 | || description.contains("pSignalSemaphores") |
| 596 | { |
| 597 | strato_error_rate_limited!( |
| 598 | LogCategory::Vulkan, |
| 599 | "Vulkan validation warning (known WGPU issue): {}", |
| 600 | description |
| 601 | ); |
| 602 | } else { |
| 603 | strato_error_rate_limited!( |
| 604 | LogCategory::Vulkan, |
| 605 | "Vulkan validation error: {}", |
| 606 | description |
| 607 | ); |
| 608 | } |
| 609 | } |
| 610 | wgpu::Error::OutOfMemory { .. } => { |
| 611 | strato_error_rate_limited!( |
| 612 | LogCategory::Vulkan, |
| 613 | "GPU out of memory: {}", |
| 614 | error |
| 615 | ); |
| 616 | } |
| 617 | _ => { |
| 618 | strato_warn!(LogCategory::Vulkan, "GPU error: {}", error); |
| 619 | } |
| 620 | } |
| 621 | })); |
| 622 | |
| 623 | let health = Arc::new(DeviceHealth::default()); |
| 624 | health |
| 625 | .last_successful_operation |
| 626 | .write() |
| 627 | .clone_from(&Instant::now()); |
| 628 | |
| 629 | let optimization_hints = capabilities.get_optimization_hints(); |
| 630 | |
| 631 | strato_debug!( |
| 632 | LogCategory::Renderer, |
| 633 | "Successfully created device '{}' with {} MB memory", |
| 634 | capabilities.device_name, |
| 635 | capabilities.memory_size / (1024 * 1024) |
| 636 | ); |
| 637 | |
| 638 | Ok(ManagedDevice { |
| 639 | device, |
| 640 | queue, |
| 641 | capabilities: capabilities.clone(), |
| 642 | optimization_hints, |
| 643 | creation_time: Instant::now(), |
| 644 | health, |
| 645 | }) |
| 646 | } |
| 647 | Err(e) => { |
| 648 | strato_error_rate_limited!( |
| 649 | LogCategory::Vulkan, |
| 650 | "Failed to create device for adapter '{}': {}", |
| 651 | capabilities.device_name, |
| 652 | e |
| 653 | ); |
| 654 | |
| 655 | // All device creation errors are treated the same way |
| 656 | bail!("Failed to create device: {}", e); |
| 657 | } |
| 658 | } |
| 659 | } |
| 660 | |
| 661 | /// Get current active device |
| 662 | pub fn get_device(&self) -> Option<Arc<ManagedDevice>> { |
| 663 | self.active_device.read().clone() |
| 664 | } |
| 665 | |
| 666 | /// Get current active adapter |
| 667 | pub fn get_active_adapter(&self) -> Option<&Adapter> { |
| 668 | self.active_adapter_index |
| 669 | .read() |
| 670 | .map(|idx| &self.adapters[idx].0) |
| 671 | } |
| 672 | |
| 673 | /// Check device health and attempt recovery if needed |
| 674 | #[instrument] |
| 675 | pub async fn check_device_health(&self) -> Result<()> { |
| 676 | if let Some(device) = self.get_device() { |
| 677 | if device.health.is_lost.load(Ordering::Relaxed) { |
| 678 | warn!("Device lost detected, attempting recovery..."); |
| 679 | self.recover_device().await?; |
| 680 | } |
| 681 | } |
| 682 | Ok(()) |
| 683 | } |
| 684 | |
| 685 | /// Attempt to recover from device loss |
| 686 | async fn recover_device(&self) -> Result<()> { |
| 687 | info!("Attempting device recovery..."); |
| 688 | |
| 689 | // Clear current device |
| 690 | *self.active_device.write() = None; |
| 691 | *self.active_adapter_index.write() = None; |
| 692 | |
| 693 | // Try to reinitialize with same criteria |
| 694 | let criteria = self.selection_criteria.read().clone(); |
| 695 | self.initialize_device_with_criteria(criteria).await?; |
| 696 | |
| 697 | info!("Device recovery successful"); |
| 698 | Ok(()) |
| 699 | } |
| 700 | |
| 701 | /// Update selection criteria |
| 702 | pub fn update_selection_criteria(&self, criteria: DeviceSelectionCriteria) { |
| 703 | *self.selection_criteria.write() = criteria; |
| 704 | } |
| 705 | |
| 706 | /// Get adapter capabilities |
| 707 | pub fn get_adapter_capabilities(&self) -> Vec<GpuCapabilities> { |
| 708 | self.adapters.iter().map(|(_, caps)| caps.clone()).collect() |
| 709 | } |
| 710 | |
| 711 | /// Get the best available device |
| 712 | pub fn get_best_device(&self) -> Option<Arc<ManagedDevice>> { |
| 713 | self.get_device() |
| 714 | } |
| 715 | |
| 716 | /// Get device statistics |
| 717 | pub fn get_device_stats(&self) -> Option<DeviceStats> { |
| 718 | self.get_device().map(|device| DeviceStats { |
| 719 | device_name: device.capabilities.device_name.clone(), |
| 720 | vendor: device.capabilities.vendor, |
| 721 | performance_tier: device.capabilities.performance_tier, |
| 722 | uptime: device.creation_time.elapsed(), |
| 723 | error_count: device.health.error_count.load(Ordering::Relaxed), |
| 724 | is_healthy: !device.health.is_lost.load(Ordering::Relaxed), |
| 725 | memory_usage: device.health.memory_usage.read().clone(), |
| 726 | temperature: device.health.temperature.read().clone(), |
| 727 | power_usage: device.health.power_usage.read().clone(), |
| 728 | }) |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | /// Device statistics for monitoring |
| 733 | #[derive(Debug, Clone)] |
| 734 | pub struct DeviceStats { |
| 735 | pub device_name: String, |
| 736 | pub vendor: GpuVendor, |
| 737 | pub performance_tier: PerformanceTier, |
| 738 | pub uptime: Duration, |
| 739 | pub error_count: u64, |
| 740 | pub is_healthy: bool, |
| 741 | pub memory_usage: Option<f32>, |
| 742 | pub temperature: Option<f32>, |
| 743 | pub power_usage: Option<f32>, |
| 744 | } |
| 745 | |
| 746 | #[cfg(test)] |
| 747 | mod tests { |
| 748 | use super::*; |
| 749 | |
| 750 | #[tokio::test] |
| 751 | async fn test_device_manager_creation() { |
| 752 | let manager = DeviceManager::new(None, None).await; |
| 753 | assert!(manager.is_ok()); |
| 754 | } |
| 755 | |
| 756 | #[test] |
| 757 | fn test_gpu_vendor_detection() { |
| 758 | assert_eq!(GpuVendor::from(0x10DE), GpuVendor::Nvidia); |
| 759 | assert_eq!(GpuVendor::from(0x1002), GpuVendor::Amd); |
| 760 | assert_eq!(GpuVendor::from(0x8086), GpuVendor::Intel); |
| 761 | } |
| 762 | |
| 763 | #[test] |
| 764 | fn test_optimization_hints() { |
| 765 | let caps = GpuCapabilities { |
| 766 | vendor: GpuVendor::Nvidia, |
| 767 | device_name: "Test GPU".to_string(), |
| 768 | device_id: 0, |
| 769 | vendor_id: 0x10DE, |
| 770 | performance_tier: PerformanceTier::HighEnd, |
| 771 | memory_size: 8 * 1024 * 1024 * 1024, |
| 772 | memory_bandwidth: None, |
| 773 | compute_units: None, |
| 774 | base_clock: None, |
| 775 | boost_clock: None, |
| 776 | supports_ray_tracing: false, |
| 777 | supports_mesh_shaders: false, |
| 778 | supports_variable_rate_shading: false, |
| 779 | max_texture_size: 16384, |
| 780 | max_texture_array_layers: 2048, |
| 781 | max_bind_groups: 8, |
| 782 | max_dynamic_uniform_buffers: 8, |
| 783 | max_storage_buffers: 8, |
| 784 | max_sampled_textures: 16, |
| 785 | max_samplers: 16, |
| 786 | max_storage_textures: 8, |
| 787 | max_vertex_buffers: 8, |
| 788 | max_vertex_attributes: 16, |
| 789 | max_push_constant_size: 128, |
| 790 | timestamp_period: 1.0, |
| 791 | supported_features: Features::empty(), |
| 792 | limits: Limits::default(), |
| 793 | }; |
| 794 | |
| 795 | let hints = caps.get_optimization_hints(); |
| 796 | assert_eq!(hints.preferred_workgroup_size, (32, 1, 1)); |
| 797 | assert!(hints.prefers_texture_arrays); |
| 798 | } |
| 799 | } |
| 800 |