summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcflip <cflip@cflip.net>2022-12-11 19:50:23 -0700
committercflip <cflip@cflip.net>2022-12-11 19:50:23 -0700
commit4be0c03f018337ab0b4c4cd48ddd940fac72f229 (patch)
tree6b471707df04b17c56b8a42aebc3e0e8bbeb9857
parentcfa10e706bb86f846ff3c827614d5a32af5a91fd (diff)
Add an experimental CUDA program to find the hashHEADmaster
Since the CPU can only do things so fast, I'd like to use GPU acceleration to speed up the process. This simple CUDA program is able to find hashes up to about 5 characters long, but doesn't seem to work after that. Eventually I'd like to integrate this with the main program instead of compiling a separate executable.
-rw-r--r--hashstrings.cu69
1 files changed, 69 insertions, 0 deletions
diff --git a/hashstrings.cu b/hashstrings.cu
new file mode 100644
index 0000000..6c72170
--- /dev/null
+++ b/hashstrings.cu
@@ -0,0 +1,69 @@
+#include <stdio.h>
+#include <math.h>
+
+#define STRING_LENGTH 4
+
+const int n = 26;
+const int num_elements = pow(n, STRING_LENGTH);
+const int threads_per_block = 256;
+
+__device__ long compute_hash_code(const char* str, int n)
+{
+ long result = 0;
+ for (int i = 0; i < n; i++)
+ result = 31 * result + str[i];
+ return result;
+}
+
+__global__ void find_from_generated_strings(long* hash_to_find, char* result_str,
+ int total_elements)
+{
+ char str[STRING_LENGTH + 1];
+
+ int i = threadIdx.x + blockIdx.x * blockDim.x;
+ if (i >= total_elements)
+ return;
+
+ for (int j = 0; j < STRING_LENGTH; j++) {
+ int string_index = STRING_LENGTH - (j + 1);
+ int char_index = (i / (int)pow(n, j)) % n;
+ str[string_index] = 'a' + char_index;
+ }
+
+ if (compute_hash_code(str, STRING_LENGTH) == *hash_to_find) {
+ memcpy(result_str, str, STRING_LENGTH);
+ return;
+ }
+}
+
+int main()
+{
+ long hash_to_find = 3446974;
+ char result[STRING_LENGTH + 1];
+
+ long* to_find_on_device;
+ char* result_string_on_device;
+
+ cudaMalloc(&to_find_on_device, sizeof(long));
+ cudaMalloc(&result_string_on_device, STRING_LENGTH + 1);
+
+ cudaMemcpy(to_find_on_device, &hash_to_find, sizeof(int),
+ cudaMemcpyHostToDevice);
+
+ const int num_blocks = (num_elements + threads_per_block - 1) /
+ threads_per_block;
+ printf("We have %d items to compute\n", num_elements);
+ printf("Running with %d blocks and %d threads per block\n", num_blocks,
+ threads_per_block);
+ find_from_generated_strings<<<num_blocks, threads_per_block>>>(to_find_on_device,
+ result_string_on_device, num_elements);
+
+ cudaMemcpy(result, result_string_on_device, STRING_LENGTH + 1, cudaMemcpyDeviceToHost);
+
+ printf("%s\n", result);
+
+ cudaFree(to_find_on_device);
+ cudaFree(result_string_on_device);
+
+ return 0;
+}