Maybe it works!

This commit is contained in:
Joshua Coles 2025-02-12 12:02:41 +00:00
parent f102e44cc4
commit cd0b4a1bbf
6 changed files with 261 additions and 3 deletions

60
Cargo.lock generated
View File

@ -230,7 +230,9 @@ name = "exo-rs"
version = "0.1.0"
dependencies = [
"network-interface",
"phf",
"prost",
"regex",
"serde",
"serde_json",
"socket2",
@ -241,6 +243,7 @@ dependencies = [
"tonic-build",
"tracing",
"tracing-subscriber",
"uuid",
]
[[package]]
@ -670,6 +673,48 @@ dependencies = [
"indexmap 2.7.1",
]
[[package]]
name = "phf"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_macros",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project"
version = "1.1.9"
@ -952,6 +997,12 @@ dependencies = [
"libc",
]
[[package]]
name = "siphasher"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
name = "slab"
version = "0.4.9"
@ -1292,6 +1343,15 @@ version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "uuid"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0"
dependencies = [
"getrandom 0.3.1",
]
[[package]]
name = "valuable"
version = "0.1.1"

View File

@ -15,6 +15,9 @@ tracing-subscriber = "0.3"
socket2 = "0.5.8"
system-configuration = "0.6.1"
network-interface = "2.0.0"
uuid = { version = "1.13.1", features = ["v4"] }
regex = "1.11.1"
phf = { version = "0.11.3", features = ["macros"] }
[build-dependencies]
tonic-build = "0.12.3"

View File

@ -0,0 +1,110 @@
use crate::topology::DeviceFlops;
use phf::phf_map;
const TFLOPS: f64 = 1.00;
pub static CHIP_FLOPS: phf::Map<&'static str, DeviceFlops> = phf_map! {
// Source: https://www.cpu-monkey.com
// Note: currently no distinction between variants of M3 Max and M3 Pro, we pick the lower one to be conservative
/// M chips
"Apple M1" => DeviceFlops { fp32: 2.29*TFLOPS, fp16: 4.58*TFLOPS, int8: 9.16*TFLOPS },
"Apple M1 Pro" => DeviceFlops { fp32: 5.30*TFLOPS, fp16: 10.60*TFLOPS, int8: 21.20*TFLOPS },
"Apple M1 Max" => DeviceFlops { fp32: 10.60*TFLOPS, fp16: 21.20*TFLOPS, int8: 42.40*TFLOPS },
"Apple M1 Ultra" => DeviceFlops { fp32: 21.20*TFLOPS, fp16: 42.40*TFLOPS, int8: 84.80*TFLOPS },
"Apple M2" => DeviceFlops { fp32: 3.55*TFLOPS, fp16: 7.10*TFLOPS, int8: 14.20*TFLOPS },
"Apple M2 Pro" => DeviceFlops { fp32: 5.68*TFLOPS, fp16: 11.36*TFLOPS, int8: 22.72*TFLOPS },
"Apple M2 Max" => DeviceFlops { fp32: 13.49*TFLOPS, fp16: 26.98*TFLOPS, int8: 53.96*TFLOPS },
"Apple M2 Ultra" => DeviceFlops { fp32: 26.98*TFLOPS, fp16: 53.96*TFLOPS, int8: 107.92*TFLOPS },
"Apple M3" => DeviceFlops { fp32: 3.55*TFLOPS, fp16: 7.10*TFLOPS, int8: 14.20*TFLOPS },
"Apple M3 Pro" => DeviceFlops { fp32: 4.97*TFLOPS, fp16: 9.94*TFLOPS, int8: 19.88*TFLOPS },
"Apple M3 Max" => DeviceFlops { fp32: 14.20*TFLOPS, fp16: 28.40*TFLOPS, int8: 56.80*TFLOPS },
"Apple M4" => DeviceFlops { fp32: 4.26*TFLOPS, fp16: 8.52*TFLOPS, int8: 17.04*TFLOPS },
"Apple M4 Pro" => DeviceFlops { fp32: 5.72*TFLOPS, fp16: 11.44*TFLOPS, int8: 22.88*TFLOPS },
"Apple M4 Max" => DeviceFlops { fp32: 18.03*TFLOPS, fp16: 36.07*TFLOPS, int8: 72.14*TFLOPS },
/// A chips
"Apple A13 Bionic" => DeviceFlops { fp32: 0.69*TFLOPS, fp16: 1.38*TFLOPS, int8: 2.76*TFLOPS },
"Apple A14 Bionic" => DeviceFlops { fp32: 0.75*TFLOPS, fp16: 1.50*TFLOPS, int8: 3.00*TFLOPS },
"Apple A15 Bionic" => DeviceFlops { fp32: 1.37*TFLOPS, fp16: 2.74*TFLOPS, int8: 5.48*TFLOPS },
"Apple A16 Bionic" => DeviceFlops { fp32: 1.79*TFLOPS, fp16: 3.58*TFLOPS, int8: 7.16*TFLOPS },
"Apple A17 Pro" => DeviceFlops { fp32: 2.15*TFLOPS, fp16: 4.30*TFLOPS, int8: 8.60*TFLOPS },
/// NVIDIA GPUs
// RTX 40 series
"NVIDIA GEFORCE RTX 4090" => DeviceFlops { fp32: 82.58*TFLOPS, fp16: 165.16*TFLOPS, int8: 330.32*TFLOPS },
"NVIDIA GEFORCE RTX 4080" => DeviceFlops { fp32: 48.74*TFLOPS, fp16: 97.48*TFLOPS, int8: 194.96*TFLOPS },
"NVIDIA GEFORCE RTX 4080 SUPER" => DeviceFlops { fp32: 52.0*TFLOPS, fp16: 104.0*TFLOPS, int8: 208.0*TFLOPS },
"NVIDIA GEFORCE RTX 4070 TI SUPER" => DeviceFlops { fp32: 40.0*TFLOPS, fp16: 80.0*TFLOPS, int8: 160.0*TFLOPS },
"NVIDIA GEFORCE RTX 4070 TI" => DeviceFlops { fp32: 39.43*TFLOPS, fp16: 78.86*TFLOPS, int8: 157.72*TFLOPS },
"NVIDIA GEFORCE RTX 4070 SUPER" => DeviceFlops { fp32: 30.0*TFLOPS, fp16: 60.0*TFLOPS, int8: 120.0*TFLOPS },
"NVIDIA GEFORCE RTX 4070" => DeviceFlops { fp32: 29.0*TFLOPS, fp16: 58.0*TFLOPS, int8: 116.0*TFLOPS },
"NVIDIA GEFORCE RTX 4060 TI 16GB" => DeviceFlops { fp32: 22.0*TFLOPS, fp16: 44.0*TFLOPS, int8: 88.0*TFLOPS },
"NVIDIA GEFORCE RTX 4060 TI" => DeviceFlops { fp32: 22.0*TFLOPS, fp16: 44.0*TFLOPS, int8: 88.0*TFLOPS },
// RTX 30 series
"NVIDIA GEFORCE RTX 3050" => DeviceFlops { fp32: 9.11*TFLOPS, fp16: 18.22*TFLOPS, int8: 36.44*TFLOPS },
"NVIDIA GEFORCE RTX 3060" => DeviceFlops { fp32: 13.0*TFLOPS, fp16: 26.0*TFLOPS, int8: 52.0*TFLOPS },
"NVIDIA GEFORCE RTX 3060 TI" => DeviceFlops { fp32: 16.2*TFLOPS, fp16: 32.4*TFLOPS, int8: 64.8*TFLOPS },
"NVIDIA GEFORCE RTX 3070" => DeviceFlops { fp32: 20.3*TFLOPS, fp16: 40.6*TFLOPS, int8: 81.2*TFLOPS },
"NVIDIA GEFORCE RTX 3070 TI" => DeviceFlops { fp32: 21.8*TFLOPS, fp16: 43.6*TFLOPS, int8: 87.2*TFLOPS },
"NVIDIA GEFORCE RTX 3080 (10 GB)" => DeviceFlops { fp32: 29.8*TFLOPS, fp16: 59.6*TFLOPS, int8: 119.2*TFLOPS },
"NVIDIA GEFORCE RTX 3080 (12 GB)" => DeviceFlops { fp32: 30.6*TFLOPS, fp16: 61.2*TFLOPS, int8: 122.4*TFLOPS },
"NVIDIA GEFORCE RTX 3080 TI" => DeviceFlops { fp32: 34.1*TFLOPS, fp16: 68.2*TFLOPS, int8: 136.4*TFLOPS },
"NVIDIA GEFORCE RTX 3090" => DeviceFlops { fp32: 35.6*TFLOPS, fp16: 71.2*TFLOPS, int8: 142.4*TFLOPS },
"NVIDIA GEFORCE RTX 3090 TI" => DeviceFlops { fp32: 40.0*TFLOPS, fp16: 80.0*TFLOPS, int8: 160.0*TFLOPS },
// RTX 20 series
"NVIDIA GEFORCE RTX 2060" => DeviceFlops { fp32: 6.45*TFLOPS, fp16: 12.9*TFLOPS, int8: 25.8*TFLOPS },
"NVIDIA GEFORCE RTX 2060 SUPER" => DeviceFlops { fp32: 7.2*TFLOPS, fp16: 14.4*TFLOPS, int8: 28.8*TFLOPS },
"NVIDIA GEFORCE RTX 2070" => DeviceFlops { fp32: 7.46*TFLOPS, fp16: 14.93*TFLOPS, int8: 29.86*TFLOPS },
"NVIDIA GEFORCE RTX 2070 SUPER" => DeviceFlops { fp32: 9.06*TFLOPS, fp16: 18.12*TFLOPS, int8: 36.24*TFLOPS },
"NVIDIA GEFORCE RTX 2080" => DeviceFlops { fp32: 10.07*TFLOPS, fp16: 20.14*TFLOPS, int8: 40.28*TFLOPS },
"NVIDIA GEFORCE RTX 2080 TI" => DeviceFlops { fp32: 13.45*TFLOPS, fp16: 26.9*TFLOPS, int8: 40.28*TFLOPS },
"NVIDIA GEFORCE RTX 2080 SUPER" => DeviceFlops { fp32: 11.15*TFLOPS, fp16: 22.30*TFLOPS, int8: 44.60*TFLOPS },
"NVIDIA TITAN RTX" => DeviceFlops { fp32: 16.31*TFLOPS, fp16: 32.62*TFLOPS, int8: 65.24*TFLOPS },
// GTX 10 series
"NVIDIA GEFORCE GTX 1050 TI" => DeviceFlops { fp32: 2.0*TFLOPS, fp16: 4.0*TFLOPS, int8: 8.0*TFLOPS },
"NVIDIA GEFORCE GTX 1070" => DeviceFlops { fp32: 6.463*TFLOPS, fp16: 0.101*TFLOPS, int8: 25.852*TFLOPS },
"NVIDIA GEFORCE GTX 1080" => DeviceFlops { fp32: 8.873*TFLOPS, fp16: 0.138*TFLOPS, int8: 35.492*TFLOPS },
"NVIDIA GEFORCE GTX 1080 TI" => DeviceFlops { fp32: 11.34*TFLOPS, fp16: 0.177*TFLOPS, int8: 45.36*TFLOPS },
// GTX 16 series
"NVIDIA GeForce GTX 1660 TI" => DeviceFlops { fp32: 4.8*TFLOPS, fp16: 9.6*TFLOPS, int8: 19.2*TFLOPS },
// QUADRO RTX Ampere series
"NVIDIA RTX A2000" => DeviceFlops { fp32: 7.99*TFLOPS, fp16: 7.99*TFLOPS, int8: 31.91*TFLOPS },
"NVIDIA RTX A4000" => DeviceFlops { fp32: 19.17*TFLOPS, fp16: 19.17*TFLOPS, int8: 76.68*TFLOPS },
"NVIDIA RTX A4500" => DeviceFlops { fp32: 23.65*TFLOPS, fp16: 23.65*TFLOPS, int8: 94.6*TFLOPS },
"NVIDIA RTX A5000" => DeviceFlops { fp32: 27.8*TFLOPS, fp16: 27.8*TFLOPS, int8: 111.2*TFLOPS },
"NVIDIA RTX A6000" => DeviceFlops { fp32: 38.71*TFLOPS, fp16: 38.71*TFLOPS, int8: 154.84*TFLOPS },
// NVIDIA Ada Lovelace Architecture-Based
"NVIDIA RTX 4000 ADA GENERATION" => DeviceFlops { fp32: 26.7*TFLOPS, fp16: 26.7*TFLOPS, int8: 258.0*TFLOPS },
// Common Server GPUs
"NVIDIA A40 48GB PCIE" => DeviceFlops { fp32: 37.4*TFLOPS, fp16: 149.7*TFLOPS, int8: 299.3*TFLOPS },
"NVIDIA A100 40GB PCIE" => DeviceFlops { fp32: 19.5*TFLOPS, fp16: 312.0*TFLOPS, int8: 624.0*TFLOPS },
"NVIDIA A800 40GB PCIE" => DeviceFlops { fp32: 19.5*TFLOPS, fp16: 312.0*TFLOPS, int8: 624.0*TFLOPS },
"NVIDIA A100 80GB PCIE" => DeviceFlops { fp32: 19.5*TFLOPS, fp16: 312.0*TFLOPS, int8: 624.0*TFLOPS },
"NVIDIA A800 80GB PCIE" => DeviceFlops { fp32: 19.5*TFLOPS, fp16: 312.0*TFLOPS, int8: 624.0*TFLOPS },
"NVIDIA A100 80GB SXM" => DeviceFlops { fp32: 19.5*TFLOPS, fp16: 312.0*TFLOPS, int8: 624.0*TFLOPS },
"NVIDIA A800 80GB SXM" => DeviceFlops { fp32: 19.5*TFLOPS, fp16: 312.0*TFLOPS, int8: 624.0*TFLOPS },
/// AMD GPUs
// RX 6000 series
"AMD Radeon RX 6900 XT" => DeviceFlops { fp32: 23.04*TFLOPS, fp16: 46.08*TFLOPS, int8: 92.16*TFLOPS },
"AMD Radeon RX 6800 XT" => DeviceFlops { fp32: 20.74*TFLOPS, fp16: 41.48*TFLOPS, int8: 82.96*TFLOPS },
"AMD Radeon RX 6800" => DeviceFlops { fp32: 16.17*TFLOPS, fp16: 32.34*TFLOPS, int8: 64.68*TFLOPS },
"AMD Radeon RX 6700 XT" => DeviceFlops { fp32: 13.21*TFLOPS, fp16: 26.42*TFLOPS, int8: 52.84*TFLOPS },
"AMD Radeon RX 6700" => DeviceFlops { fp32: 11.4*TFLOPS, fp16: 22.8*TFLOPS, int8: 45.6*TFLOPS },
"AMD Radeon RX 6600 XT" => DeviceFlops { fp32: 10.6*TFLOPS, fp16: 21.2*TFLOPS, int8: 42.4*TFLOPS },
"AMD Radeon RX 6600" => DeviceFlops { fp32: 8.93*TFLOPS, fp16: 17.86*TFLOPS, int8: 35.72*TFLOPS },
"AMD Radeon RX 6500 XT" => DeviceFlops { fp32: 5.77*TFLOPS, fp16: 11.54*TFLOPS, int8: 23.08*TFLOPS },
"AMD Radeon RX 6400" => DeviceFlops { fp32: 3.57*TFLOPS, fp16: 7.14*TFLOPS, int8: 14.28*TFLOPS },
// RX 7000 series
"AMD Radeon RX 7900 XTX" => DeviceFlops { fp32: 61.4*TFLOPS, fp16: 122.8*TFLOPS, int8: 245.6*TFLOPS },
"AMD Radeon RX 7900 XT" => DeviceFlops { fp32: 53.4*TFLOPS, fp16: 106.8*TFLOPS, int8: 213.6*TFLOPS },
"AMD Radeon RX 7800 XT" => DeviceFlops { fp32: 42.6*TFLOPS, fp16: 85.2*TFLOPS, int8: 170.4*TFLOPS },
"AMD Radeon RX 7700 XT" => DeviceFlops { fp32: 34.2*TFLOPS, fp16: 68.4*TFLOPS, int8: 136.8*TFLOPS },
"AMD Radeon RX 7600" => DeviceFlops { fp32: 21.5*TFLOPS, fp16: 43.0*TFLOPS, int8: 86.0*TFLOPS },
"AMD Radeon RX 7500" => DeviceFlops { fp32: 16.2*TFLOPS, fp16: 32.4*TFLOPS, int8: 64.8*TFLOPS },
};
pub fn look_up(chip: &str) -> Option<DeviceFlops> {
CHIP_FLOPS.get(chip)
.or_else(|| CHIP_FLOPS.get(&format!("Laptop GPU {}", chip)))
.or_else(|| CHIP_FLOPS.get(&format!("{} Laptop GPU", chip)))
.cloned()
}

View File

@ -6,6 +6,7 @@ use std::net::SocketAddr;
use std::time::Duration;
use tokio::net::UdpSocket;
use tokio::task::JoinHandle;
use uuid::Uuid;
mod broadcast;
mod udp_listen;
@ -47,6 +48,22 @@ pub struct NodeInfo {
pub device_capabilities: DeviceCapabilities,
}
impl Default for NodeInfo {
fn default() -> Self {
NodeInfo {
node_id: Uuid::new_v4().to_string(),
discovery_listen_port: 0,
broadcast_port: 0,
broadcast_interval: Default::default(),
grpc_port: 0,
allowed_peer_ids: None,
allowed_interfaces: None,
discovery_timeout: Default::default(),
device_capabilities: DeviceCapabilities::determine(),
}
}
}
pub struct UdpDiscovery {
node_info: NodeInfo,
discovery_handle: JoinHandle<()>,

View File

@ -2,6 +2,7 @@ mod topology;
mod orchestration;
mod discovery;
mod network;
mod device_capability_data;
use serde::{Deserialize, Serialize};
use serde_json::Value;
@ -14,6 +15,7 @@ use crate::node_service::{
use node_service::node_service_server::{NodeService, NodeServiceServer};
use node_service::TensorRequest;
use topology::Topology;
use crate::discovery::{NodeInfo, UdpDiscovery};
pub mod node_service {
tonic::include_proto!("node_service"); // The string specified here must match the proto package name
@ -193,6 +195,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let grpc_addr = "[::1]:50051".parse()?;
let node = Node::default();
let udp_discovery = UdpDiscovery::new(NodeInfo::default());
// TODO: Also implement discovery

View File

@ -1,5 +1,7 @@
use std::collections::HashMap;
use std::process::Command;
use serde::{Deserialize, Serialize};
use crate::device_capability_data;
#[derive(Debug, Deserialize, Serialize, Clone)]
pub struct Topology {
@ -27,10 +29,73 @@ pub struct DeviceCapabilities {
}
#[derive(Debug, Deserialize, Serialize, Clone)]
struct SystemProfilerOutputData {
#[serde(rename = "SPHardwareDataType")]
hardware: Vec<SPHardwareDataType>
}
#[derive(Debug, Deserialize, Serialize, Clone)]
struct SPHardwareDataType {
#[serde(rename = "_name")]
name: String,
activation_lock_status: String,
boot_rom_version: String,
chip_type: String,
machine_model: String,
machine_name: String,
model_number: String,
number_processors: String,
os_loader_version: String,
physical_memory: String,
#[serde(rename = "platform_UUID")]
platform_uuid: String,
#[serde(rename = "provisioning_UDID")]
provisioning_udid: String,
serial_number: String
}
impl DeviceCapabilities {
pub fn determine() -> DeviceCapabilities {
let s = Command::new("system_profiler")
.arg("SPHardwareDataType")
.arg("-json")
.output()
.unwrap()
.stdout;
let mut data = serde_json::from_slice::<SystemProfilerOutputData>(&s).unwrap();
let hardware = data.hardware.remove(0);
let model = hardware.machine_name;
let chip = hardware.chip_type;
let memory = {
let parts: Vec<&str> = hardware.physical_memory.split_ascii_whitespace().collect();
if parts.len() >= 2 {
let value = parts[0].parse::<u64>().unwrap_or(0);
if parts[1] == "GB" {
value * 1024
} else {
value
}
} else {
0
}
};
DeviceCapabilities {
flops: device_capability_data::look_up(&chip).expect("Failed to find FLOPS data for chip"),
model,
chip,
memory,
}
}
}
#[derive(Debug, Deserialize, Serialize, Clone, Default)]
pub struct DeviceFlops {
fp32: u64,
fp16: u64,
int8: u64,
pub fp32: f64,
pub fp16: f64,
pub int8: f64,
}
#[derive(Debug, Deserialize, Serialize, Clone, Hash, Eq, PartialEq)]