hashstrings.cu


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

#include <stdio.h>
#include <math.h>

#define STRING_LENGTH 4

const int n = 26;
const int num_elements = pow(n, STRING_LENGTH);
const int threads_per_block = 256;

__device__ long compute_hash_code(const char* str, int n)
{
	long result = 0;
	for (int i = 0; i < n; i++)
		result = 31 * result + str[i];
	return result;
}

__global__ void find_from_generated_strings(long* hash_to_find, char* result_str,
		int total_elements)
{
	char str[STRING_LENGTH + 1];

	int i = threadIdx.x + blockIdx.x * blockDim.x;
	if (i >= total_elements)
		return;

	for (int j = 0; j < STRING_LENGTH; j++) {
		int string_index = STRING_LENGTH - (j + 1);
		int char_index = (i / (int)pow(n, j)) % n;
		str[string_index] = 'a' + char_index;
	}

	if (compute_hash_code(str, STRING_LENGTH) == *hash_to_find) {
		memcpy(result_str, str, STRING_LENGTH);
		return;
	}
}

int main()
{
	long hash_to_find = 3446974;
	char result[STRING_LENGTH + 1];

	long* to_find_on_device;
	char* result_string_on_device;

	cudaMalloc(&to_find_on_device, sizeof(long));
	cudaMalloc(&result_string_on_device, STRING_LENGTH + 1);

	cudaMemcpy(to_find_on_device, &hash_to_find, sizeof(int),
			cudaMemcpyHostToDevice);

	const int num_blocks = (num_elements + threads_per_block - 1) /
		threads_per_block;
	printf("We have %d items to compute\n", num_elements);
	printf("Running with %d blocks and %d threads per block\n", num_blocks,
			threads_per_block);
	find_from_generated_strings<<<num_blocks, threads_per_block>>>(to_find_on_device,
			result_string_on_device, num_elements);

	cudaMemcpy(result, result_string_on_device, STRING_LENGTH + 1, cudaMemcpyDeviceToHost);

	printf("%s\n", result);

	cudaFree(to_find_on_device);
	cudaFree(result_string_on_device);

	return 0;
}