Add an experimental CUDA program to find the hashHEAD master

Since the CPU can only do things so fast, I'd like to use GPU acceleration to speed up the process. This simple CUDA program is able to find hashes up to about 5 characters long, but doesn't seem to work after that. Eventually I'd like to integrate this with the main program instead of compiling a separate executable.
author: cflip <cflip@cflip.net> 2022-12-11 19:50:23 -0700
committer: cflip <cflip@cflip.net> 2022-12-11 19:50:23 -0700
commit: 4be0c03f018337ab0b4c4cd48ddd940fac72f229 (patch)
tree: 6b471707df04b17c56b8a42aebc3e0e8bbeb9857
parent: cfa10e706bb86f846ff3c827614d5a32af5a91fd (diff)
1 files changed, 69 insertions, 0 deletions
diff --git a/hashstrings.cu b/hashstrings.cu
new file mode 100644
index 0000000..6c72170
--- /dev/null
+++ b/hashstrings.cu
@@ -0,0 +1,69 @@
+#include <stdio.h>
+#include <math.h>
+
+#define STRING_LENGTH 4
+
+const int n = 26;
+const int num_elements = pow(n, STRING_LENGTH);
+const int threads_per_block = 256;
+
+__device__ long compute_hash_code(const char* str, int n)
+{
+	long result = 0;
+	for (int i = 0; i < n; i++)
+		result = 31 * result + str[i];
+	return result;
+}
+
+__global__ void find_from_generated_strings(long* hash_to_find, char* result_str,
+		int total_elements)
+{
+	char str[STRING_LENGTH + 1];
+
+	int i = threadIdx.x + blockIdx.x * blockDim.x;
+	if (i >= total_elements)
+		return;
+
+	for (int j = 0; j < STRING_LENGTH; j++) {
+		int string_index = STRING_LENGTH - (j + 1);
+		int char_index = (i / (int)pow(n, j)) % n;
+		str[string_index] = 'a' + char_index;
+	}
+
+	if (compute_hash_code(str, STRING_LENGTH) == *hash_to_find) {
+		memcpy(result_str, str, STRING_LENGTH);
+		return;
+	}
+}
+
+int main()
+{
+	long hash_to_find = 3446974;
+	char result[STRING_LENGTH + 1];
+
+	long* to_find_on_device;
+	char* result_string_on_device;
+
+	cudaMalloc(&to_find_on_device, sizeof(long));
+	cudaMalloc(&result_string_on_device, STRING_LENGTH + 1);
+
+	cudaMemcpy(to_find_on_device, &hash_to_find, sizeof(int),
+			cudaMemcpyHostToDevice);
+
+	const int num_blocks = (num_elements + threads_per_block - 1) /
+		threads_per_block;
+	printf("We have %d items to compute\n", num_elements);
+	printf("Running with %d blocks and %d threads per block\n", num_blocks,
+			threads_per_block);
+	find_from_generated_strings<<<num_blocks, threads_per_block>>>(to_find_on_device,
+			result_string_on_device, num_elements);
+
+	cudaMemcpy(result, result_string_on_device, STRING_LENGTH + 1, cudaMemcpyDeviceToHost);
+
+	printf("%s\n", result);
+
+	cudaFree(to_find_on_device);
+	cudaFree(result_string_on_device);
+
+	return 0;
+}
author	cflip <cflip@cflip.net>	2022-12-11 19:50:23 -0700
committer	cflip <cflip@cflip.net>	2022-12-11 19:50:23 -0700
commit	4be0c03f018337ab0b4c4cd48ddd940fac72f229 (patch)
tree	6b471707df04b17c56b8a42aebc3e0e8bbeb9857
parent	cfa10e706bb86f846ff3c827614d5a32af5a91fd (diff)