From 8d4c1ed96d137fcb301c23e8fa1e033fa96b5942 Mon Sep 17 00:00:00 2001 From: Kirill Shakirov <38155247+Nyanraltotlapun@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:21:14 +0100 Subject: [PATCH] Work on clinet, OpenCL code enablement and client config stuff. --- nyash_client/Cargo.lock | 63 +++ nyash_client/Cargo.toml | 4 +- nyash_client/src/client_config.rs | 74 +++ nyash_client/src/gen_test_data.py | 18 - nyash_client/src/main.rs | 429 ++++++++++++------ nyash_client/src/num_utils.rs | 2 +- .../src/open_cl/nyash_aes_xts256_plain.cl | 22 +- .../src/open_cl/test_aes_xts256_plain.cl | 2 +- 8 files changed, 456 insertions(+), 158 deletions(-) create mode 100644 nyash_client/src/client_config.rs delete mode 100644 nyash_client/src/gen_test_data.py diff --git a/nyash_client/Cargo.lock b/nyash_client/Cargo.lock index 903a4c9..2a90c69 100644 --- a/nyash_client/Cargo.lock +++ b/nyash_client/Cargo.lock @@ -94,12 +94,24 @@ version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "libc" version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + [[package]] name = "nodrop" version = "0.1.14" @@ -138,6 +150,8 @@ name = "nyash_client" version = "0.1.0" dependencies = [ "ocl", + "serde", + "serde_json", ] [[package]] @@ -222,6 +236,49 @@ version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + [[package]] name = "syn" version = "2.0.111" @@ -258,3 +315,9 @@ name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "zmij" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac93432f5b761b22864c774aac244fa5c0fd877678a4c37ebf6cf42208f9c9ec" diff --git a/nyash_client/Cargo.toml b/nyash_client/Cargo.toml index 890a7eb..31c320a 100644 --- a/nyash_client/Cargo.toml +++ b/nyash_client/Cargo.toml @@ -4,4 +4,6 @@ version = "0.1.0" edition = "2024" [dependencies] -ocl = "0.19" \ No newline at end of file +ocl = "0.19" +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.149" diff --git a/nyash_client/src/client_config.rs b/nyash_client/src/client_config.rs new file mode 100644 index 0000000..43a3720 --- /dev/null +++ b/nyash_client/src/client_config.rs @@ -0,0 +1,74 @@ +use ocl::{Device, Platform}; +use serde::{Deserialize, Serialize}; +use serde_json; +use std::error::Error; + +#[derive(Clone, Deserialize, Serialize, Debug)] +pub struct DevConf { + pub dev_name: String, + pub platform_name: String, + pub id: usize, + pub work_size: usize, + pub batch_size: u32, +} + +impl DevConf { + // Constructor with parameters + pub fn from_cl_dev(dev_pl: (Device, Platform), id: usize) -> Self { + Self { + dev_name: dev_pl.0.name().unwrap_or("Noname".to_string()), + platform_name: dev_pl.1.name().unwrap_or("Noname".to_string()), + id: id, + work_size: 0, + batch_size: 0, + } + } +} + +#[derive(Clone, Deserialize, Serialize, Debug)] +pub struct AppConfig { + pub devices: Vec, + pub dev_fill: u8, +} + +impl AppConfig { + // Constructor with parameters + pub fn from_dev_list(all_devices: &Vec<(Device, Platform)>, devs_nums: Vec) -> Self { + Self { + devices: devs_nums + .iter() + .map(|id| DevConf::from_cl_dev(all_devices[*id], *id)) + .collect(), + dev_fill: 100, + } + } + + pub fn device_name_exist(&self, dev_name: &str) -> bool { + match self.devices.iter().find(|d| d.dev_name == dev_name) { + Some(_) => true, + None => false, + } + } + + pub fn device_exist(&self, dev: &Device) -> bool { + match dev.name() { + Ok(dev_name) => match self.devices.iter().find(|d| d.dev_name == dev_name) { + Some(_) => true, + None => false, + }, + Err(_) => false, + } + } +} + +pub fn load_config(file_name: &str) -> Result> { + let file_data = std::fs::read_to_string(file_name)?; + let app_conf: AppConfig = serde_json::from_str(file_data.as_str())?; + return Ok(app_conf); +} + +pub fn save_config(file_name: &str, app_conf: &AppConfig) -> Result<(), Box> { + let conf_str = serde_json::to_string(app_conf)?; + std::fs::write(file_name, conf_str)?; + return Ok(()); +} diff --git a/nyash_client/src/gen_test_data.py b/nyash_client/src/gen_test_data.py deleted file mode 100644 index ce2bcd4..0000000 --- a/nyash_client/src/gen_test_data.py +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -def main(): - start_num = 0x8adb7b7e8a722df091ecea988a4ad2234836636a102ceb688b3985f89bf40002 - num1 = start_num+1 - num2 = start_num+312 - print(start_num.to_bytes(32).hex()) - print(num1.to_bytes(32).hex()) - print(num2.to_bytes(32).hex()) - - -if __name__ == '__main__': - main() diff --git a/nyash_client/src/main.rs b/nyash_client/src/main.rs index 2790d92..cbea613 100644 --- a/nyash_client/src/main.rs +++ b/nyash_client/src/main.rs @@ -2,9 +2,16 @@ extern crate ocl; use ocl::{ Buffer, Context, Device, DeviceType, Kernel, Platform, Program, Queue, SpatialDims, flags, }; -use std::io; -mod num_utils; +use serde::de::value::Error; +use std::{ + io, + str::{self, FromStr}, +}; +use crate::client_config::{AppConfig, DevConf}; + +mod client_config; +mod num_utils; /// Exploded version. Boom! /// /// The functions above use `ProQue` and other abstractions to greatly reduce @@ -98,140 +105,112 @@ fn list_devices(dev_type: DeviceType) -> Vec<(Device, Platform)> { return devices; } -struct CtxBuffers { - start_key: Buffer, - u_data: Buffer, - enc_data: Buffer, - key_found: Buffer, -} +// fn init_devices( +// devices: Vec<(Device, Platform, DevConfig)>, +// kern_name: String, +// prog_src: String, +// inc_dirs: Vec, +// ) -> Vec { +// let mut contexts: Vec = Vec::with_capacity(devices.len()); +// for (dev, plt, dev_cfg) in devices { +// let ctx = match Context::builder() +// .platform(plt) +// .devices(dev.clone()) +// .build() +// { +// Ok(c) => c, +// Err(_) => continue, +// }; -struct DevConfig { - train_work_size: bool, - global_work_size: SpatialDims, -} +// let prg = match Program::builder().devices(dev).src(&prog_src).build(&ctx) { +// Ok(p) => p, +// Err(_) => continue, +// }; -struct ExecContext { - cfg: DevConfig, - ctx: Context, - kernel: Kernel, - prog: Program, - queue: Queue, - buffers: CtxBuffers, -} +// let queue = match Queue::new(&ctx, dev, None) { +// Ok(q) => q, +// Err(_) => continue, +// }; -fn init_devices( - devices: Vec<(Device, Platform, DevConfig)>, - kern_name: String, - prog_src: String, - inc_dirs: Vec, -) -> Vec { +// // Create Buffers: +// let start_key_b = match Buffer::::builder() +// .queue(queue.clone()) +// .flags(flags::MEM_READ_ONLY) +// .len(8) +// .fill_val(0u32) +// .build() +// { +// Ok(buf) => buf, +// Err(_) => continue, +// }; - let mut contexts: Vec = Vec::with_capacity(devices.len()); - for (dev, plt, dev_cfg) in devices { - let ctx = match Context::builder() - .platform(plt) - .devices(dev.clone()) - .build() - { - Ok(c) => c, - Err(_) => continue, - }; +// let u_data_b = match Buffer::::builder() +// .queue(queue.clone()) +// .flags(flags::MEM_READ_ONLY) +// .len(4) +// .fill_val(0u32) +// .build() +// { +// Ok(buf) => buf, +// Err(_) => continue, +// }; - let prg = match Program::builder().devices(dev).src(&prog_src).build(&ctx) { - Ok(p) => p, - Err(_) => continue, - }; +// let enc_data_b = match Buffer::::builder() +// .queue(queue.clone()) +// .flags(flags::MEM_READ_ONLY) +// .len(4) +// .fill_val(0u32) +// .build() +// { +// Ok(buf) => buf, +// Err(_) => continue, +// }; - let queue = match Queue::new(&ctx, dev, None) { - Ok(q) => q, - Err(_) => continue, - }; +// let key_found_b = match Buffer::::builder() +// .queue(queue.clone()) +// .flags(flags::MEM_WRITE_ONLY) +// .len(1) +// .fill_val(0u32) +// .build() +// { +// Ok(buf) => buf, +// Err(_) => continue, +// }; - // Create Buffers: - let start_key_b = match Buffer::::builder() - .queue(queue.clone()) - .flags(flags::MEM_READ_ONLY) - .len(8) - .fill_val(0u32) - .build() - { - Ok(buf) => buf, - Err(_) => continue, - }; +// // (3) Create a kernel with arguments matching those in the source above: +// let kernel = match Kernel::builder() +// .program(&prg) +// .name(&kern_name) +// .queue(queue.clone()) +// .global_work_size(dev_cfg.global_work_size) +// .arg(&start_key_b) +// .arg(&u_data_b) +// .arg(&enc_data_b) +// .arg(&key_found_b) +// .build() +// { +// Ok(kern) => kern, +// Err(_) => continue, +// }; - let u_data_b = match Buffer::::builder() - .queue(queue.clone()) - .flags(flags::MEM_READ_ONLY) - .len(4) - .fill_val(0u32) - .build() - { - Ok(buf) => buf, - Err(_) => continue, - }; - - let enc_data_b = match Buffer::::builder() - .queue(queue.clone()) - .flags(flags::MEM_READ_ONLY) - .len(4) - .fill_val(0u32) - .build() - { - Ok(buf) => buf, - Err(_) => continue, - }; - - let key_found_b = match Buffer::::builder() - .queue(queue.clone()) - .flags(flags::MEM_WRITE_ONLY) - .len(1) - .fill_val(0u32) - .build() - { - Ok(buf) => buf, - Err(_) => continue, - }; - - // (3) Create a kernel with arguments matching those in the source above: - let kernel = match Kernel::builder() - .program(&prg) - .name(&kern_name) - .queue(queue.clone()) - .global_work_size(dev_cfg.global_work_size) - .arg(&start_key_b) - .arg(&u_data_b) - .arg(&enc_data_b) - .arg(&key_found_b) - .build() { - Ok(kern) => kern, - Err(_) => continue, - }; - - contexts.push(ExecContext { - cfg: dev_cfg, - ctx: ctx, - kernel: kernel, - prog: prg, - queue: queue, - buffers: CtxBuffers { - start_key: start_key_b, - u_data: u_data_b, - enc_data: enc_data_b, - key_found: key_found_b, - }, - }); - } - return contexts; -} - -fn main() { - println!("Hello, world nya!"); - //use ocl::{Buffer, Context, Device, Kernel, Platform, Program, Queue, flags}; - let dev_type = dev_type_from_str("GPU").expect("pur"); - - // Get devices to be used for key search - let mut all_devices: Vec<(Device, Platform)> = list_devices(dev_type); +// contexts.push(ExecContext { +// cfg: dev_cfg, +// ctx: ctx, +// kernel: kernel, +// prog: prg, +// queue: queue, +// buffers: CtxBuffers { +// start_key: start_key_b, +// u_data: u_data_b, +// enc_data: enc_data_b, +// key_found: key_found_b, +// }, +// }); +// } +// return contexts; +// } +fn dev_sel_dialog(all_devices: &Vec<(Device, Platform)>) -> Vec { let devs_nums = loop { print_devices(&all_devices); match choose_devices(all_devices.len()) { @@ -241,10 +220,200 @@ fn main() { } } }; - let devices: Vec<(Device, Platform)> = devs_nums.iter().map(|&i| all_devices[i]).collect(); - all_devices.clear(); + return devs_nums; +} + +fn get_devices_conf(file_name: &str) -> Result<(Vec<(Device, Platform)>, AppConfig), String> { + let dev_type = dev_type_from_str("GPU").expect("Unexpected device type!"); + + // Get devices to be used for key search + let all_devices: Vec<(Device, Platform)> = list_devices(dev_type); + if all_devices.len() == 0 { + return Err("Cannot find any usable devices.".to_string()); + }; + + let app_conf = match client_config::load_config(file_name) { + Ok(readed_config) => { + let dev_found = all_devices + .iter() + .filter(|dp| readed_config.device_exist(&dp.0)) + .count(); + if dev_found < readed_config.devices.len() { + println!("Devices from config not found in the system!"); + let devs_nums = dev_sel_dialog(&all_devices); + let res = AppConfig::from_dev_list(&all_devices, devs_nums); + client_config::save_config(file_name, &res); + res + } else { + readed_config + } + } + Err(_) => { + println!("Cannot find config file {}", file_name); + let devs_nums = dev_sel_dialog(&all_devices); + AppConfig::from_dev_list(&all_devices, devs_nums) + } + }; + + let selected_devs = all_devices + .iter() + .filter(|dp| app_conf.device_exist(&dp.0)) + .cloned() + .collect(); + + return Ok((selected_devs, app_conf)); +} + +struct CtxBuffers { + batch_size: u32, + tweak_i: u64, + tweak_j: u32, + start_key: Buffer, + uenc_data: Buffer, + target_data: Buffer, + key_found: Buffer, +} + +struct ExecData { + start_key: Vec, + uenc_data: Vec, + target_data: Vec, + key_found: Vec, + batch_size: u32, + work_size: usize, +} +struct ExecContext { + ctx: Context, + kernel: Kernel, + prog: Program, + queue: Queue, + buffers: CtxBuffers, + exec_data: ExecData, +} + +fn init_program( + cl_device: Device, + cl_platform: Platform, + cl_src: &str, + cl_cmplr_opt: &str, +) -> Result<(Context, Program, Queue), ocl::Error> { + let cl_context = Context::builder() + .platform(cl_platform) + .devices(cl_device.clone()) + .build()?; + + let cl_program = Program::builder() + .devices(cl_device) + .src(cl_src) + .cmplr_opt(cl_cmplr_opt) + .build(&cl_context) + .unwrap(); + + let cl_queue: Queue = Queue::new(&cl_context, cl_device, None)?; + + return Ok((cl_context, cl_program, cl_queue)); +} + +fn init_buffers(cl_queue: Queue) -> Result { + let cl_buffer_start_key = Buffer::::builder() + .queue(cl_queue.clone()) + .flags(flags::MEM_READ_ONLY) + .len(8) + .fill_val(0u32) + .build()?; + + let cl_buffer_uenc_data = Buffer::::builder() + .queue(cl_queue.clone()) + .flags(flags::MEM_READ_ONLY) + .len(4) + .fill_val(0u32) + .build()?; + + let cl_buffer_target_data = Buffer::::builder() + .queue(cl_queue.clone()) + .flags(flags::MEM_READ_ONLY) + .len(4) + .fill_val(0u32) + .build()?; + + let cl_buffer_key_found = Buffer::::builder() + .queue(cl_queue.clone()) + .flags(flags::MEM_WRITE_ONLY) + .len(9) + .fill_val(0u32) + .build()?; + + Ok(CtxBuffers { + batch_size: 0, + tweak_i: 0, + tweak_j: 0, + start_key: cl_buffer_start_key, + uenc_data: cl_buffer_uenc_data, + target_data: cl_buffer_target_data, + key_found: cl_buffer_key_found, + }) +} + +fn init_kernel( + work_size: usize, + cl_program: Program, + cl_queue: Queue, + buffs: &CtxBuffers, +) -> Result { + Kernel::builder() + .program(&cl_program) + .name("search_key") + .queue(cl_queue.clone()) + .global_work_size(work_size) + .arg(&buffs.batch_size) + .arg(&buffs.tweak_i) + .arg(&buffs.tweak_j) + .arg(&buffs.start_key) + .arg(&buffs.uenc_data) + .arg(&buffs.target_data) + .arg(&buffs.key_found) + .build() +} + +fn do_work(ex_ctx: &mut ExecContext) -> Result { + ex_ctx.buffers.batch_size = ex_ctx.exec_data.batch_size; + ex_ctx + .buffers + .start_key + .cmd() + .queue(&ex_ctx.queue) + .offset(0) + .write(&ex_ctx.exec_data.start_key) + .enq()?; + + // (4) Run the kernel + unsafe { + ex_ctx + .kernel + .cmd() + .queue(&ex_ctx.queue) + .global_work_size(ex_ctx.exec_data.work_size) + .enq()?; + } + + ex_ctx + .buffers + .key_found + .cmd() + .queue(&ex_ctx.queue) + .offset(0) + .read(&mut ex_ctx.exec_data.key_found) + .enq()?; + + if ex_ctx.exec_data.key_found[0] == 0{Ok(false)} + else {Ok(true)} +} + +fn main() { + println!("Hello, world nya!"); + //use ocl::{Buffer, Context, Device, Kernel, Platform, Program, Queue, flags}; + let devices = get_devices_conf("test.json"); println!("{:?}", devices); - // let devices: Vec<_> = platforms.iter().flat_map(|p| Device::list(p, Some(dev_type)).iter()).collect(); // let device = Device::first(platform)?; diff --git a/nyash_client/src/num_utils.rs b/nyash_client/src/num_utils.rs index 20e754e..6a153d8 100644 --- a/nyash_client/src/num_utils.rs +++ b/nyash_client/src/num_utils.rs @@ -158,7 +158,7 @@ mod num_utils_tests { .build(&cl_context) .unwrap(); let cl_queue = Queue::new(&cl_context, cl_device, None).unwrap(); - + let cl_buffer_num = Buffer::::builder() .queue(cl_queue.clone()) .flags(flags::MEM_READ_ONLY) diff --git a/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl b/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl index 9cb5d8a..582a0be 100644 --- a/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl +++ b/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl @@ -32,7 +32,7 @@ // g_key_found uint[9] - 0 element - flag that sets to 1 if key found. // Other 8 elements is found key -__kernel void search_key_test(const uint batch_size, const ulong g_Ti, const uint g_Tj, +__kernel void search_key(const uint batch_size, const ulong g_Ti, const uint g_Tj, __global const uint8* g_start_enc_key, __global const uint4* g_uenc_data, __global const uint4* g_target_data, @@ -48,11 +48,12 @@ __kernel void search_key_test(const uint batch_size, const ulong g_Ti, const uin uint d_ks[44]; // data expanded key uint t_ks[44]; // tweak expanded key + // set disk sector number uint sec_n[4] = {0}; sec_n[0] = ((uint*)&g_Ti)[0]; sec_n[1] = ((uint*)&g_Ti)[1]; - uint Tj = g_Tj; + uint Tj = g_Tj; // AES block number vstore4(*g_uenc_data, 0, uenc_data); vstore8(*g_start_enc_key, 0, enc_key); @@ -60,8 +61,9 @@ __kernel void search_key_test(const uint batch_size, const ulong g_Ti, const uin // Set initial start key for every work thread uint k_data_carry = add_uint_to_bigint4_ (enc_key, (g_id*batch_size)); - uint k_tweak_carry = add_one_to_bigint4_ (&enc_key[4]); - if (k_tweak_carry != 0u) return; // if reached max key value exit thread + // uint k_tweak_carry = add_uint_to_bigint4_ (&enc_key[4], k_data_carry); + // No need to store tweak carry + if (add_uint_to_bigint4_ (&enc_key[4], k_data_carry) != 0u) return; // if reached max key value exit thread // Generate tweak aes128_set_encrypt_key (t_ks, &enc_key[4]); @@ -88,14 +90,20 @@ __kernel void search_key_test(const uint batch_size, const ulong g_Ti, const uin // Tweak changes only once in 2^128 times if (k_data_carry != 0u) { + // Increment tweak part - k_tweak_carry = add_one_to_bigint4_ (&enc_key[4]); - if (k_tweak_carry != 0u) return; // if reached max key value exit thread + // k_tweak_carry = add_one_to_bigint4_ (&enc_key[4]); + add_one_to_bigint4_ (&enc_key[4]); // no need to store tweak carry + + // *** I commented next line because, its a really!! lol! rare event, + // *** and in a worse case we just do a bit of noneed work. + // *** But additional check on every itaration actually mesurable cost. + // if (k_tweak_carry != 0u) return; // if reached max key value exit thread + // Gen new tweak aes128_set_encrypt_key (t_ks, &enc_key[4]); aes_xts256_gen_tweak (t_ks, sec_n, Tj, tweak); } - } } diff --git a/nyash_client/src/open_cl/test_aes_xts256_plain.cl b/nyash_client/src/open_cl/test_aes_xts256_plain.cl index 9e95001..b97a829 100644 --- a/nyash_client/src/open_cl/test_aes_xts256_plain.cl +++ b/nyash_client/src/open_cl/test_aes_xts256_plain.cl @@ -27,7 +27,7 @@ __kernel void encrypt_data(__global const ulong* g_Ti, __global const uint* g_Tj uint d_ks[44]; uint t_ks[44]; uint tweak[4]; - uint enc_key[8]; + uint enc_key[8];add_uint_to_bigint4_ uint u_data[4]; uint enc_data[4] = { 0 };