diff options
author | cflip <cflip@cflip.net> | 2022-12-11 19:50:23 -0700 |
---|---|---|
committer | cflip <cflip@cflip.net> | 2022-12-11 19:50:23 -0700 |
commit | 4be0c03f018337ab0b4c4cd48ddd940fac72f229 (patch) | |
tree | 6b471707df04b17c56b8a42aebc3e0e8bbeb9857 /hashstrings.cu | |
parent | cfa10e706bb86f846ff3c827614d5a32af5a91fd (diff) |
Since the CPU can only do things so fast, I'd like to use GPU
acceleration to speed up the process. This simple CUDA program is able
to find hashes up to about 5 characters long, but doesn't seem to work
after that.
Eventually I'd like to integrate this with the main program instead of
compiling a separate executable.
Diffstat (limited to 'hashstrings.cu')
-rw-r--r-- | hashstrings.cu | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/hashstrings.cu b/hashstrings.cu new file mode 100644 index 0000000..6c72170 --- /dev/null +++ b/hashstrings.cu @@ -0,0 +1,69 @@ +#include <stdio.h> +#include <math.h> + +#define STRING_LENGTH 4 + +const int n = 26; +const int num_elements = pow(n, STRING_LENGTH); +const int threads_per_block = 256; + +__device__ long compute_hash_code(const char* str, int n) +{ + long result = 0; + for (int i = 0; i < n; i++) + result = 31 * result + str[i]; + return result; +} + +__global__ void find_from_generated_strings(long* hash_to_find, char* result_str, + int total_elements) +{ + char str[STRING_LENGTH + 1]; + + int i = threadIdx.x + blockIdx.x * blockDim.x; + if (i >= total_elements) + return; + + for (int j = 0; j < STRING_LENGTH; j++) { + int string_index = STRING_LENGTH - (j + 1); + int char_index = (i / (int)pow(n, j)) % n; + str[string_index] = 'a' + char_index; + } + + if (compute_hash_code(str, STRING_LENGTH) == *hash_to_find) { + memcpy(result_str, str, STRING_LENGTH); + return; + } +} + +int main() +{ + long hash_to_find = 3446974; + char result[STRING_LENGTH + 1]; + + long* to_find_on_device; + char* result_string_on_device; + + cudaMalloc(&to_find_on_device, sizeof(long)); + cudaMalloc(&result_string_on_device, STRING_LENGTH + 1); + + cudaMemcpy(to_find_on_device, &hash_to_find, sizeof(int), + cudaMemcpyHostToDevice); + + const int num_blocks = (num_elements + threads_per_block - 1) / + threads_per_block; + printf("We have %d items to compute\n", num_elements); + printf("Running with %d blocks and %d threads per block\n", num_blocks, + threads_per_block); + find_from_generated_strings<<<num_blocks, threads_per_block>>>(to_find_on_device, + result_string_on_device, num_elements); + + cudaMemcpy(result, result_string_on_device, STRING_LENGTH + 1, cudaMemcpyDeviceToHost); + + printf("%s\n", result); + + cudaFree(to_find_on_device); + cudaFree(result_string_on_device); + + return 0; +} |