From 8288930872c0d288e4d5af5340ed481978a5eb93 Mon Sep 17 00:00:00 2001 From: Kirill Shakirov <38155247+Nyanraltotlapun@users.noreply.github.com> Date: Sat, 4 Apr 2026 15:27:29 +0200 Subject: [PATCH] Fixes and rework to some ocl code, using u32 for batch size. --- nyash_client/Cargo.lock | 83 +++++++++---------- nyash_client/Cargo.toml | 6 +- nyash_client/src/client.rs | 24 +++--- nyash_client/src/client_config.rs | 2 +- nyash_client/src/ocl_utils.rs | 34 ++++++-- nyash_client/src/open_cl/num_utils.cl | 19 +++++ .../src/open_cl/nyash_aes_xts256_plain.cl | 9 +- nyash_client/src/open_cl/test_num_utils.cl | 3 - 8 files changed, 106 insertions(+), 74 deletions(-) diff --git a/nyash_client/Cargo.lock b/nyash_client/Cargo.lock index ae24894..431365f 100644 --- a/nyash_client/Cargo.lock +++ b/nyash_client/Cargo.lock @@ -403,9 +403,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", @@ -418,7 +418,6 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -465,9 +464,9 @@ checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" [[package]] name = "indexmap" -version = "2.13.0" +version = "2.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -486,9 +485,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "lazy_static" @@ -504,15 +503,15 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "log" @@ -528,9 +527,9 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mime" @@ -550,9 +549,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -665,9 +664,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "percent-encoding" @@ -712,12 +711,6 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "prettyplease" version = "0.2.37" @@ -730,9 +723,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -792,9 +785,9 @@ dependencies = [ [[package]] name = "pulldown-cmark" -version = "0.13.1" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" dependencies = [ "bitflags 2.11.0", "memchr", @@ -812,9 +805,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -875,9 +868,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags 2.11.0", "errno", @@ -947,9 +940,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "slab" @@ -975,9 +968,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.111" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -992,9 +985,9 @@ checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" [[package]] name = "tempfile" -version = "3.25.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", "getrandom", @@ -1025,9 +1018,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd" dependencies = [ "bytes", "libc", @@ -1041,9 +1034,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.1" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", @@ -1218,9 +1211,9 @@ checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-xid" @@ -1400,6 +1393,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.13" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac93432f5b761b22864c774aac244fa5c0fd877678a4c37ebf6cf42208f9c9ec" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/nyash_client/Cargo.toml b/nyash_client/Cargo.toml index 5f6629c..a8ea812 100644 --- a/nyash_client/Cargo.toml +++ b/nyash_client/Cargo.toml @@ -10,7 +10,7 @@ path = "src/client.rs" [dependencies] flate2 = "1.1.9" -ocl = { version = "0.19" } +ocl = { version = "0.19", features = ["opencl_version_2_0"]} prost = "0.14.3" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.149" @@ -23,7 +23,3 @@ tonic-prost-build = "0.14.5" ocl-include = "0.6" flate2 = "1.1.9" - -[target.aarch64-unknown-linux-gnu] -linker = "aarch64-linux-gnu-gcc" - diff --git a/nyash_client/src/client.rs b/nyash_client/src/client.rs index ab2f0f9..f4a7076 100644 --- a/nyash_client/src/client.rs +++ b/nyash_client/src/client.rs @@ -100,7 +100,7 @@ async fn commit_work( return Ok(response.into_inner()); } -fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u64, usize) { +fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u32, usize) { let mut nyan_exec_dat = ocl_utils::ExecData { start_key: vec![1u32; 4], tweak_key: vec![2u32; 4], @@ -114,20 +114,22 @@ fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u64, usize) { }; let total_work: u64 = 128000000; - let work_sizes: [usize; 9] = [64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]; - let mut work_time = [0f64; 9]; + let work_sizes: [usize; 8] = [128, 256, 512, 1024, 2048, 4096, 8192, 16384]; + let mut work_time = [0f64; 8]; ocl_utils::set_target_data(exec_context, &mut nyan_exec_dat).expect("Error set target data!"); let mut preffered_work_size: usize = work_sizes[0]; - let mut preffered_batch_size: u64 = 0; - for i in 0..9 { + let mut preffered_batch_size: u32 = 0; + for i in 0..8 { let test_work_s = work_sizes[i]; - let batch_size: u64 = total_work / test_work_s as u64; + let batch_size: u32 = (total_work / test_work_s as u64) as u32; nyan_exec_dat.work_size = test_work_s; nyan_exec_dat.batch_size = batch_size; println!("Benchmarking work size {}", test_work_s); for _j in 0..3 { + println!("Run number {}", _j); + //exec_context.reinit_kernel(test_work_s).expect("Error reinit kernel!"); let (_, exec_time) = ocl_utils::do_work(exec_context, &mut nyan_exec_dat).expect("Error running tests!"); work_time[i] += exec_time; @@ -142,7 +144,8 @@ fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u64, usize) { } preffered_work_size = work_sizes[i]; // calculate batch size so it correspond to 30 sec job - preffered_batch_size = (batch_size as f64 * (20.0 / work_time[i])) as u64; + let p_b_u64 = (batch_size as f64 * (10.0 / work_time[i])) as u64; + preffered_batch_size = p_b_u64.min(u32::MAX as u64) as u32; println!("batch_size {}, work_time {}, preffered_batch_size {}, preffered_work_size {}", batch_size, work_time[i], @@ -290,7 +293,8 @@ async fn main() -> Result<(), Box> { ])) .to_vec(); - let mut batch_size = work_data.work_size / nyan_exec_dat.work_size as u64; + let bs_u64 = work_data.work_size / nyan_exec_dat.work_size as u64; + let mut batch_size = bs_u64.min(u32::MAX as u64) as u32; if (work_data.work_size % nyan_exec_dat.work_size as u64) != 0 { batch_size += 1; } @@ -298,6 +302,7 @@ async fn main() -> Result<(), Box> { nyan_exec_dat.batch_size = batch_size; println!("Crunching numbers..."); + //exec_context.reinit_kernel(nyan_exec_dat.work_size).expect("Error reinit kernel!"); match ocl_utils::do_work(&mut exec_context, &mut nyan_exec_dat) { Err(_) => println!("Error doing work!"), Ok((k_f, work_time)) => { @@ -308,8 +313,7 @@ async fn main() -> Result<(), Box> { let g_k_p_s = (work_data.work_size as f64 / work_time) / 1000000000.0; if giga_keys_per_second != 0f64 { - giga_keys_per_second -= giga_keys_per_second / 10.0; - giga_keys_per_second += g_k_p_s / 10.0; + giga_keys_per_second = giga_keys_per_second*0.9 + g_k_p_s*0.1; } else { giga_keys_per_second = g_k_p_s; } diff --git a/nyash_client/src/client_config.rs b/nyash_client/src/client_config.rs index 2eb8221..f394786 100644 --- a/nyash_client/src/client_config.rs +++ b/nyash_client/src/client_config.rs @@ -10,7 +10,7 @@ pub struct DevConf { pub platform_name: String, pub id: usize, pub work_size: usize, - pub batch_size: u64, + pub batch_size: u32, } impl DevConf { diff --git a/nyash_client/src/ocl_utils.rs b/nyash_client/src/ocl_utils.rs index d30bb7d..773842c 100644 --- a/nyash_client/src/ocl_utils.rs +++ b/nyash_client/src/ocl_utils.rs @@ -7,7 +7,7 @@ use crate::num_utils; pub struct CtxBuffers { tweak_params: Buffer, - batch_size: Buffer, + batch_size: Buffer, start_key: Buffer, tweak_key: Buffer, uenc_data: Buffer, @@ -23,7 +23,7 @@ pub struct ExecData { pub tweak_i: u64, pub tweak_j: u32, pub key_found: Vec, - pub batch_size: u64, + pub batch_size: u32, pub work_size: usize, } impl ExecData { @@ -99,11 +99,11 @@ pub fn init_buffers(cl_queue: &Queue) -> Result { .fill_val(0u32) .build()?; - let cl_buffer_batch_size = Buffer::::builder() + let cl_buffer_batch_size = Buffer::::builder() .queue(cl_queue.clone()) .flags(flags::MEM_READ_ONLY) .len(1) - .fill_val(0u64) + .fill_val(0u32) .build()?; let cl_buffer_start_key = Buffer::::builder() @@ -201,6 +201,16 @@ impl ExecContext { buffers: nya_cl_buffers, }) } + + pub fn reinit_kernel(&mut self, global_work_size: usize) -> Result<(), ocl::Error> { + self.kernel = init_kernel( + global_work_size, + &self._prog, + &self.queue, + &self.buffers, + )?; + Ok(()) + } } pub fn set_target_data(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(), ocl::Error> { @@ -235,7 +245,7 @@ pub fn set_target_data(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Resu .write(&ex_data.target_data) .enq()?; - ex_ctx.queue.finish()?; + //ex_ctx.queue.finish()?; return Ok(()); } @@ -246,6 +256,7 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool let start_time = std::time::Instant::now(); + //println!("Copy batch_size buffer..."); // tranfer batch_size ex_ctx .buffers @@ -257,6 +268,7 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool .enq()?; + //println!("Copy start_key buffer..."); // transfer start key to device ex_ctx .buffers @@ -267,6 +279,7 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool .write(&ex_data.start_key) .enq()?; + //println!("Copy tweak_key buffer..."); // transfet tweak key ex_ctx .buffers @@ -287,7 +300,9 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool // .fill(0u32, None) // .enq()?; - + //println!("Copy data to GPU..."); + //ex_ctx.queue.finish()?; + //println!("Run kernel..."); // (4) Run the kernel unsafe { ex_ctx @@ -298,6 +313,10 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool .enq()?; } + //println!("Waiting for kernel to finish work..."); + //ex_ctx.queue.finish()?; + + //println!("Copy data back from GPU..."); // read key_foun buffer ex_ctx .buffers @@ -308,6 +327,9 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool .read(&mut ex_data.key_found) .enq()?; + //println!("Copy results back..."); + //ex_ctx.queue.finish()?; + let exec_duration = start_time.elapsed().as_secs_f64(); //ex_ctx.queue.finish()?; diff --git a/nyash_client/src/open_cl/num_utils.cl b/nyash_client/src/open_cl/num_utils.cl index c6e1425..c7cdd7e 100644 --- a/nyash_client/src/open_cl/num_utils.cl +++ b/nyash_client/src/open_cl/num_utils.cl @@ -45,6 +45,25 @@ inline uint add_one_to_bigint4_(uint *_n) return t.i[1]; } +inline uint add_ulong_to_bigint4_ (uint* _n, const ulong b) +{ + ul_ui_union bu = {b}; + ul_ui_union t; + t.l = (ulong)_n[0] + (ulong)bu.i[0]; + _n[0] = t.i[0]; + + t.l = (ulong)t.i[1] + (ulong)_n[1] + (ulong)bu.i[1]; + _n[1] = t.i[0]; + + t.l = (ulong)_n[2] + (ulong)t.i[1]; + _n[2] = t.i[0]; + + t.l = (ulong)_n[3] + (ulong)t.i[1]; + _n[3] = t.i[0]; + + return t.i[1]; +} + inline uint add_uint_to_bigint4_ (uint* _n, const uint b) { ul_ui_union t; diff --git a/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl b/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl index 4f5c98d..feb07c3 100644 --- a/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl +++ b/nyash_client/src/open_cl/nyash_aes_xts256_plain.cl @@ -41,7 +41,7 @@ // g_params[1-2] - ulong g_Ti // g_params[3] - g_Tj __kernel void search_key(__global const uint* g_tweak_params, - __global const ulong* g_batch_size, + __global const uint* g_batch_size, __global const uint4* g_start_enc_key, __global const uint4* g_tweak_key, __global const uint4* g_uenc_data, @@ -59,7 +59,7 @@ __kernel void search_key(__global const uint* g_tweak_params, uint t_ks[44]; // tweak expanded key //set batch_size - ulong batch_size = g_batch_size[0]; + uint batch_size = g_batch_size[0]; // set disk sector number uint sec_n[4] = {0}; @@ -73,7 +73,8 @@ __kernel void search_key(__global const uint* g_tweak_params, vstore4(*g_tweak_key, 0, tweak); // Set initial start key for every work thread - uint k_data_carry = add_uint_to_bigint4_ (enc_key, (g_id*batch_size)); + ulong th_shift = (ulong)g_id * (ulong)batch_size; + uint k_data_carry = add_ulong_to_bigint4_ (enc_key, th_shift); if (k_data_carry != 0u) return; // if reached max key value exit thread // Generate tweak @@ -81,7 +82,7 @@ __kernel void search_key(__global const uint* g_tweak_params, aes_xts256_gen_tweak (t_ks, sec_n, Tj, tweak); //if (g_id == 0) g_key_found[1] = 1; - for (ulong batch_id = 0ul; batch_id < batch_size; batch_id++) + for (uint batch_id = 0u; batch_id < batch_size; batch_id++) { //if (g_id == 0) g_key_found[1] = 2; // Set encrypt key diff --git a/nyash_client/src/open_cl/test_num_utils.cl b/nyash_client/src/open_cl/test_num_utils.cl index a8292a0..a6c2db0 100644 --- a/nyash_client/src/open_cl/test_num_utils.cl +++ b/nyash_client/src/open_cl/test_num_utils.cl @@ -22,6 +22,3 @@ __kernel void test_add(__global const uint* g_num_to_add, } - - -