Fixes and rework to some ocl code, using u32 for batch size.

This commit is contained in:
Kirill Shakirov
2026-04-04 15:27:29 +02:00
parent 6297474142
commit 8288930872
8 changed files with 106 additions and 74 deletions
+38 -45
View File
@@ -403,9 +403,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "hyper"
version = "1.8.1"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
dependencies = [
"atomic-waker",
"bytes",
@@ -418,7 +418,6 @@ dependencies = [
"httpdate",
"itoa",
"pin-project-lite",
"pin-utils",
"smallvec",
"tokio",
"want",
@@ -465,9 +464,9 @@ checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
[[package]]
name = "indexmap"
version = "2.13.0"
version = "2.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
@@ -486,9 +485,9 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.17"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
[[package]]
name = "lazy_static"
@@ -504,15 +503,15 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]]
name = "libc"
version = "0.2.178"
version = "0.2.184"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
[[package]]
name = "log"
@@ -528,9 +527,9 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
[[package]]
name = "memchr"
version = "2.7.6"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "mime"
@@ -550,9 +549,9 @@ dependencies = [
[[package]]
name = "mio"
version = "1.1.1"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
dependencies = [
"libc",
"wasi",
@@ -665,9 +664,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.21.3"
version = "1.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
[[package]]
name = "percent-encoding"
@@ -712,12 +711,6 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "prettyplease"
version = "0.2.37"
@@ -730,9 +723,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.103"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
@@ -792,9 +785,9 @@ dependencies = [
[[package]]
name = "pulldown-cmark"
version = "0.13.1"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83c41efbf8f90ac44de7f3a868f0867851d261b56291732d0cbf7cceaaeb55a6"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.0",
"memchr",
@@ -812,9 +805,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.42"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
@@ -875,9 +868,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "1.1.3"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
dependencies = [
"bitflags 2.11.0",
"errno",
@@ -947,9 +940,9 @@ dependencies = [
[[package]]
name = "simd-adler32"
version = "0.3.8"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
[[package]]
name = "slab"
@@ -975,9 +968,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.111"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
@@ -992,9 +985,9 @@ checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
[[package]]
name = "tempfile"
version = "3.25.0"
version = "3.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
dependencies = [
"fastrand",
"getrandom",
@@ -1025,9 +1018,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.50.0"
version = "1.51.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd"
dependencies = [
"bytes",
"libc",
@@ -1041,9 +1034,9 @@ dependencies = [
[[package]]
name = "tokio-macros"
version = "2.6.1"
version = "2.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
dependencies = [
"proc-macro2",
"quote",
@@ -1218,9 +1211,9 @@ checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-ident"
version = "1.0.22"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-xid"
@@ -1400,6 +1393,6 @@ dependencies = [
[[package]]
name = "zmij"
version = "1.0.13"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac93432f5b761b22864c774aac244fa5c0fd877678a4c37ebf6cf42208f9c9ec"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+1 -5
View File
@@ -10,7 +10,7 @@ path = "src/client.rs"
[dependencies]
flate2 = "1.1.9"
ocl = { version = "0.19" }
ocl = { version = "0.19", features = ["opencl_version_2_0"]}
prost = "0.14.3"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.149"
@@ -23,7 +23,3 @@ tonic-prost-build = "0.14.5"
ocl-include = "0.6"
flate2 = "1.1.9"
[target.aarch64-unknown-linux-gnu]
linker = "aarch64-linux-gnu-gcc"
+14 -10
View File
@@ -100,7 +100,7 @@ async fn commit_work(
return Ok(response.into_inner());
}
fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u64, usize) {
fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u32, usize) {
let mut nyan_exec_dat = ocl_utils::ExecData {
start_key: vec![1u32; 4],
tweak_key: vec![2u32; 4],
@@ -114,20 +114,22 @@ fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u64, usize) {
};
let total_work: u64 = 128000000;
let work_sizes: [usize; 9] = [64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384];
let mut work_time = [0f64; 9];
let work_sizes: [usize; 8] = [128, 256, 512, 1024, 2048, 4096, 8192, 16384];
let mut work_time = [0f64; 8];
ocl_utils::set_target_data(exec_context, &mut nyan_exec_dat).expect("Error set target data!");
let mut preffered_work_size: usize = work_sizes[0];
let mut preffered_batch_size: u64 = 0;
for i in 0..9 {
let mut preffered_batch_size: u32 = 0;
for i in 0..8 {
let test_work_s = work_sizes[i];
let batch_size: u64 = total_work / test_work_s as u64;
let batch_size: u32 = (total_work / test_work_s as u64) as u32;
nyan_exec_dat.work_size = test_work_s;
nyan_exec_dat.batch_size = batch_size;
println!("Benchmarking work size {}", test_work_s);
for _j in 0..3 {
println!("Run number {}", _j);
//exec_context.reinit_kernel(test_work_s).expect("Error reinit kernel!");
let (_, exec_time) =
ocl_utils::do_work(exec_context, &mut nyan_exec_dat).expect("Error running tests!");
work_time[i] += exec_time;
@@ -142,7 +144,8 @@ fn benchmark(exec_context: &mut ocl_utils::ExecContext) -> (u64, usize) {
}
preffered_work_size = work_sizes[i];
// calculate batch size so it correspond to 30 sec job
preffered_batch_size = (batch_size as f64 * (20.0 / work_time[i])) as u64;
let p_b_u64 = (batch_size as f64 * (10.0 / work_time[i])) as u64;
preffered_batch_size = p_b_u64.min(u32::MAX as u64) as u32;
println!("batch_size {}, work_time {}, preffered_batch_size {}, preffered_work_size {}",
batch_size,
work_time[i],
@@ -290,7 +293,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
]))
.to_vec();
let mut batch_size = work_data.work_size / nyan_exec_dat.work_size as u64;
let bs_u64 = work_data.work_size / nyan_exec_dat.work_size as u64;
let mut batch_size = bs_u64.min(u32::MAX as u64) as u32;
if (work_data.work_size % nyan_exec_dat.work_size as u64) != 0 {
batch_size += 1;
}
@@ -298,6 +302,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
nyan_exec_dat.batch_size = batch_size;
println!("Crunching numbers...");
//exec_context.reinit_kernel(nyan_exec_dat.work_size).expect("Error reinit kernel!");
match ocl_utils::do_work(&mut exec_context, &mut nyan_exec_dat) {
Err(_) => println!("Error doing work!"),
Ok((k_f, work_time)) => {
@@ -308,8 +313,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let g_k_p_s = (work_data.work_size as f64 / work_time) / 1000000000.0;
if giga_keys_per_second != 0f64 {
giga_keys_per_second -= giga_keys_per_second / 10.0;
giga_keys_per_second += g_k_p_s / 10.0;
giga_keys_per_second = giga_keys_per_second*0.9 + g_k_p_s*0.1;
} else {
giga_keys_per_second = g_k_p_s;
}
+1 -1
View File
@@ -10,7 +10,7 @@ pub struct DevConf {
pub platform_name: String,
pub id: usize,
pub work_size: usize,
pub batch_size: u64,
pub batch_size: u32,
}
impl DevConf {
+28 -6
View File
@@ -7,7 +7,7 @@ use crate::num_utils;
pub struct CtxBuffers {
tweak_params: Buffer<u32>,
batch_size: Buffer<u64>,
batch_size: Buffer<u32>,
start_key: Buffer<u32>,
tweak_key: Buffer<u32>,
uenc_data: Buffer<u32>,
@@ -23,7 +23,7 @@ pub struct ExecData {
pub tweak_i: u64,
pub tweak_j: u32,
pub key_found: Vec<u32>,
pub batch_size: u64,
pub batch_size: u32,
pub work_size: usize,
}
impl ExecData {
@@ -99,11 +99,11 @@ pub fn init_buffers(cl_queue: &Queue) -> Result<CtxBuffers, ocl::Error> {
.fill_val(0u32)
.build()?;
let cl_buffer_batch_size = Buffer::<u64>::builder()
let cl_buffer_batch_size = Buffer::<u32>::builder()
.queue(cl_queue.clone())
.flags(flags::MEM_READ_ONLY)
.len(1)
.fill_val(0u64)
.fill_val(0u32)
.build()?;
let cl_buffer_start_key = Buffer::<u32>::builder()
@@ -201,6 +201,16 @@ impl ExecContext {
buffers: nya_cl_buffers,
})
}
pub fn reinit_kernel(&mut self, global_work_size: usize) -> Result<(), ocl::Error> {
self.kernel = init_kernel(
global_work_size,
&self._prog,
&self.queue,
&self.buffers,
)?;
Ok(())
}
}
pub fn set_target_data(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(), ocl::Error> {
@@ -235,7 +245,7 @@ pub fn set_target_data(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Resu
.write(&ex_data.target_data)
.enq()?;
ex_ctx.queue.finish()?;
//ex_ctx.queue.finish()?;
return Ok(());
}
@@ -246,6 +256,7 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool
let start_time = std::time::Instant::now();
//println!("Copy batch_size buffer...");
// tranfer batch_size
ex_ctx
.buffers
@@ -257,6 +268,7 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool
.enq()?;
//println!("Copy start_key buffer...");
// transfer start key to device
ex_ctx
.buffers
@@ -267,6 +279,7 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool
.write(&ex_data.start_key)
.enq()?;
//println!("Copy tweak_key buffer...");
// transfet tweak key
ex_ctx
.buffers
@@ -287,7 +300,9 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool
// .fill(0u32, None)
// .enq()?;
//println!("Copy data to GPU...");
//ex_ctx.queue.finish()?;
//println!("Run kernel...");
// (4) Run the kernel
unsafe {
ex_ctx
@@ -298,6 +313,10 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool
.enq()?;
}
//println!("Waiting for kernel to finish work...");
//ex_ctx.queue.finish()?;
//println!("Copy data back from GPU...");
// read key_foun buffer
ex_ctx
.buffers
@@ -308,6 +327,9 @@ pub fn do_work(ex_ctx: &mut ExecContext, ex_data: &mut ExecData) -> Result<(bool
.read(&mut ex_data.key_found)
.enq()?;
//println!("Copy results back...");
//ex_ctx.queue.finish()?;
let exec_duration = start_time.elapsed().as_secs_f64();
//ex_ctx.queue.finish()?;
+19
View File
@@ -45,6 +45,25 @@ inline uint add_one_to_bigint4_(uint *_n)
return t.i[1];
}
inline uint add_ulong_to_bigint4_ (uint* _n, const ulong b)
{
ul_ui_union bu = {b};
ul_ui_union t;
t.l = (ulong)_n[0] + (ulong)bu.i[0];
_n[0] = t.i[0];
t.l = (ulong)t.i[1] + (ulong)_n[1] + (ulong)bu.i[1];
_n[1] = t.i[0];
t.l = (ulong)_n[2] + (ulong)t.i[1];
_n[2] = t.i[0];
t.l = (ulong)_n[3] + (ulong)t.i[1];
_n[3] = t.i[0];
return t.i[1];
}
inline uint add_uint_to_bigint4_ (uint* _n, const uint b)
{
ul_ui_union t;
@@ -41,7 +41,7 @@
// g_params[1-2] - ulong g_Ti
// g_params[3] - g_Tj
__kernel void search_key(__global const uint* g_tweak_params,
__global const ulong* g_batch_size,
__global const uint* g_batch_size,
__global const uint4* g_start_enc_key,
__global const uint4* g_tweak_key,
__global const uint4* g_uenc_data,
@@ -59,7 +59,7 @@ __kernel void search_key(__global const uint* g_tweak_params,
uint t_ks[44]; // tweak expanded key
//set batch_size
ulong batch_size = g_batch_size[0];
uint batch_size = g_batch_size[0];
// set disk sector number
uint sec_n[4] = {0};
@@ -73,7 +73,8 @@ __kernel void search_key(__global const uint* g_tweak_params,
vstore4(*g_tweak_key, 0, tweak);
// Set initial start key for every work thread
uint k_data_carry = add_uint_to_bigint4_ (enc_key, (g_id*batch_size));
ulong th_shift = (ulong)g_id * (ulong)batch_size;
uint k_data_carry = add_ulong_to_bigint4_ (enc_key, th_shift);
if (k_data_carry != 0u) return; // if reached max key value exit thread
// Generate tweak
@@ -81,7 +82,7 @@ __kernel void search_key(__global const uint* g_tweak_params,
aes_xts256_gen_tweak (t_ks, sec_n, Tj, tweak);
//if (g_id == 0) g_key_found[1] = 1;
for (ulong batch_id = 0ul; batch_id < batch_size; batch_id++)
for (uint batch_id = 0u; batch_id < batch_size; batch_id++)
{
//if (g_id == 0) g_key_found[1] = 2;
// Set encrypt key
@@ -22,6 +22,3 @@ __kernel void test_add(__global const uint* g_num_to_add,
}