Skip to content
Snippets Groups Projects
Commit ac8038ae authored by Brandon Rodriguez's avatar Brandon Rodriguez
Browse files

Add some initial CUDA image manipulation test functions

parent c9680c19
Branches
No related merge requests found
......@@ -31,6 +31,137 @@ const Image::PixelStruct Image::pixelPink = Image::PixelStruct(1,0,1);
const Image::PixelStruct Image::pixelLiBlue = Image::PixelStruct(0,1,1);
//region CUDA Methods.
/**
* Displays various properties of all CUDA devices found on machine.
**/
void display_cuda_device_properties() {
// Check available CUDA devices.
cudaDeviceProp cuda_device_properties;
int cuda_device_count;
cudaGetDeviceCount(&cuda_device_count);
for (int index = 0; index < cuda_device_count; index++) {
cudaGetDeviceProperties(&cuda_device_properties, index);
printf("\n\n\n\n");
printf("==== General Information for CUDA Device %d ====\n", index);
printf("Name: %s\n", cuda_device_properties.name);
printf("Compute Capabilities: %d.%d\n", cuda_device_properties.major, cuda_device_properties.minor);
printf("Clock Rate: %d\n", cuda_device_properties.clockRate);
printf("\n\n");
printf("==== Memory Information for CUDA Device %d ====\n", index);
printf("Total Global Memory: %ld\n", cuda_device_properties.totalGlobalMem);
printf("Total Constant Memory: %ld\n", cuda_device_properties.totalConstMem);
printf("Max Memory Pitch: %ld\n", cuda_device_properties.memPitch);
printf("Texture Alignment: %ld\n", cuda_device_properties.textureAlignment);
printf("\n\n");
printf("==== Multiprocessing Information for CUDA Device %d ====\n", index);
printf("Multiprocessor Count: %d\n", cuda_device_properties.multiProcessorCount);
printf("Shared Memory Per MP: %ld\n", cuda_device_properties.sharedMemPerBlock);
printf("Registers Per MP: %d\n", cuda_device_properties.regsPerBlock);
printf("Threads in Warp: %d\n", cuda_device_properties.warpSize);
printf("Max Threads Per Block: %d\n", cuda_device_properties.maxThreadsPerBlock);
printf("Max Thread Dimensions: (%d, %d, %d)\n",
cuda_device_properties.maxThreadsDim[0],
cuda_device_properties.maxThreadsDim[1],
cuda_device_properties.maxThreadsDim[2]
);
printf("Max Grid Dimensions: (%d, %d, %d)\n",
cuda_device_properties.maxGridSize[0],
cuda_device_properties.maxGridSize[1],
cuda_device_properties.maxGridSize[2]
);
printf("\n\n\n\n");
}
}
/**
* Updates image so that every other pixel alternates between pure black and pure white.
* Purely for testing purposes.
*/
// __global__ void cuda_pixel_alternating(Image::PixelStruct* pixel_arr, int pixel_index) {
__global__ void cuda_alternating_pixels(unsigned long total_pixels, unsigned long chunk_size, int chunk_counter, Image::PixelStruct* pixel_arr) {
// Calculate current index.
unsigned long pixel_index = blockIdx.x * blockDim.x + threadIdx.x;
// Adjust index for current chunk.
pixel_index += (chunk_size * chunk_counter);
// Only proceed if GPU thread corresponds to a pixel in the image. Ignore all extra threads.
if (pixel_index <= total_pixels) {
// Manipulate pixel data based on index.
if (pixel_index % 2 == 0) {
// Set to black.
pixel_arr[pixel_index].r = 0.f;
pixel_arr[pixel_index].g = 0.f;
pixel_arr[pixel_index].b = 0.f;
} else {
// Set to white.
pixel_arr[pixel_index].r = 255.f;
pixel_arr[pixel_index].g = 255.f;
pixel_arr[pixel_index].b = 255.f;
}
}
}
/**
* Updates image so that blocks of pixels alternate between pure black and pure white.
* Effectively, it's similar to cuda_alternating_pixels, except that all image sizes should look roughly the same.
* Purely for testing purposes.
*/
// __global__ void cuda_pixel_alternating(Image::PixelStruct* pixel_arr, int pixel_index) {
__global__ void cuda_alternating_blocks(unsigned long total_pixels, unsigned long chunk_size, int chunk_counter, int pixel_block_size, int image_width, int image_height, Image::PixelStruct* pixel_arr) {
// Calculate current index.
unsigned long pixel_index = blockIdx.x * blockDim.x + threadIdx.x;
// Adjust index for current chunk.
pixel_index += (chunk_size * chunk_counter);
// Only proceed if GPU thread corresponds to a pixel in the image. Ignore all extra threads.
if (pixel_index <= total_pixels) {
// Calculate if falls under black or white pixel block.
int row = pixel_index / image_width;
int col = pixel_index % image_width;
int row_grouping = row / pixel_block_size;
int col_grouping = col / pixel_block_size;
float color;
if (row_grouping % 2 == 0) {
if ((col_grouping % 2) == 0) {
color = 255.f;
} else {
color = 0.f;
}
} else {
if ((col_grouping % 2) == 0) {
color = 0.f;
} else {
color = 255.f;
}
}
// Set pixel color.
pixel_arr[pixel_index].r = color;
pixel_arr[pixel_index].g = color;
pixel_arr[pixel_index].b = color;
// int color = row * col * 2;
// printf("Width: %d Height: %d Index: %ld Row: %d Col: %d Color: %d\n", image_width, image_height, pixel_index, row, col, color);
// pixel_arr[pixel_index].r = static_cast<float>(color) / 255.f;
// pixel_arr[pixel_index].g = static_cast<float>(color) / 255.f;
// pixel_arr[pixel_index].b = static_cast<float>(color) / 255.f;
}
}
//endregion CUDA Methods.
//region Constructors.
/**
......@@ -293,4 +424,76 @@ void Image::save() {
}
}
/**
* Method to actually do work on image.
*/
void Image::compute() {
logger.debug("Image::compute():");
logger.debug("");
// Get required CUDA device data.
// For now, lazily gets first CUDA device and assumes it's the desired device to use.
// TODO: Handle multiple CUDA devices.
cudaDeviceProp cuda_device_properties;
cudaGetDeviceProperties(&cuda_device_properties, 0);
int cuda_device_max_blocks = cuda_device_properties.maxThreadsDim[0];
int cuda_device_max_threads = cuda_device_properties.maxThreadsPerBlock;
unsigned long cuda_chunk_size = cuda_device_max_blocks * cuda_device_max_threads;
logger.info("CUDA Max Blocks: " + std::to_string(cuda_device_max_blocks));
logger.info("CUDA Max Threads: " + std::to_string(cuda_device_max_threads));
logger.info("CUDA Chunk Size: " + std::to_string(cuda_chunk_size));
logger.info("CUDA Global Mem: " + std::to_string(cuda_device_properties.totalGlobalMem));
// Calculate some image data.
int pixel_struct_size = sizeof(PixelStruct);
unsigned long total_pixels = width * height;
unsigned long struct_byte_count = total_pixels * pixel_struct_size;
logger.info("Total Image Size: " + std::to_string(total_pixels));
logger.info("Pixel Struct Size: " + std::to_string(struct_byte_count));
// We handle image in chunks, in case we're dealing with large images with more pixels than CUDA device blocks/threads.
int total_image_chunks = (total_pixels / cuda_chunk_size) + 1;
logger.info("Total Image Chunks: " + std::to_string(total_image_chunks));
// Calculate desired blocks and threads.
int cuda_device_desired_block_count = total_pixels / cuda_device_max_threads;
if (cuda_device_desired_block_count <= 0) {
cuda_device_desired_block_count = 1;
}
// TODO: Handle if desired block count exceeds device maximum count.
// Process image by iterating over all chunks.
for (int chunk_index = 0; chunk_index < total_image_chunks; chunk_index++) {
logger.info("Iterating over image chunk " + std::to_string(chunk_index));
logger.info("Accounts for pixels " + std::to_string(cuda_chunk_size * chunk_index) + " through " + std::to_string((cuda_chunk_size * chunk_index) + cuda_chunk_size) );
// Allocate memory on GPU.
PixelStruct* gpu_pixel_arr;
cudaMalloc((void**) &gpu_pixel_arr, struct_byte_count);
cudaMemcpy(gpu_pixel_arr, pixel_arr, struct_byte_count, cudaMemcpyHostToDevice);
// Run GPU kernel logic.
// cuda_alternating_pixels<<<cuda_device_desired_block_count, cuda_device_max_threads>>>(total_pixels, cuda_chunk_size, chunk_index, gpu_pixel_arr);
int pixel_block_size = width / 10;
if (pixel_block_size <= 1) {
pixel_block_size = 2;
}
cuda_alternating_blocks<<<cuda_device_desired_block_count, cuda_device_max_threads>>>(total_pixels, cuda_chunk_size, chunk_index, pixel_block_size, width, height, gpu_pixel_arr);
// Synchronize all GPU computations before iterating over next chunk.
cudaDeviceSynchronize();
// Retrieve results.
cudaMemcpy(pixel_arr, gpu_pixel_arr, struct_byte_count, cudaMemcpyDeviceToHost);
cudaFree(gpu_pixel_arr);
}
logger.debug("Image::compute() finished.");
logger.debug("");
}
//endregion Public Methods.
......@@ -21,7 +21,7 @@
*/
class Image {
private:
public:
/**
* Struct to act as a "container" of sorts for each individual rgb pixel.
* An array of these creates our entire image.
......@@ -46,6 +46,7 @@ class Image {
};
private:
// Private Variables.
std::string input_path;
std::string output_path;
......@@ -107,6 +108,12 @@ class Image {
*/
void save();
/**
* Method to actually do work on image.
*/
void compute();
//endregion Public Methods.
};
......@@ -161,8 +161,12 @@ void process_file(std::string path_str) {
// Track time to process.
time_t start_time = time(0);
// Import image data to class.
Image image(path_str);
image.display_properties();
// Manipulate and save image data.
image.compute();
image.save();
// Calculate time processing file.
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment