Giovedì 7 dicembre 2017
Compito B del 10/2/2016: mirror
Codice host:
#include "mirror.h"
#include <stdlib.h>
#include <assert.h>
#define LOCAL_SIZE 8
#define KERNEL_NAME "mirror"
// ---------------------------------------------------------------------
// mirror
// ---------------------------------------------------------------------
// data-parallel GPU version
void mirror(unsigned char* in, int w, int h,
unsigned char** out, int* ow, int* oh,
clut_device* dev, double* td) {
int err; // error code
cl_kernel kernel; // execution kernel
cl_mem din; // input matrix on device
cl_mem dout; // output matrix on device
cl_event evt; // performance measurement event
// allocate output matrix in host
*out = malloc(w*2*h*sizeof(unsigned char));
if (*out == NULL)
clut_panic("failed to allocate output matrix on host memory");
*ow = w;
*oh = 2*h;
// create the compute kernel
kernel = clCreateKernel(dev->program, KERNEL_NAME, &err);
clut_check_err(err, "failed to create kernel");
// allocate input matrix on device as a copy of input matrix on host
din = clCreateBuffer(dev->context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
h*w*sizeof(unsigned char), in, NULL);
if (!din) clut_panic("failed to allocate input matrix on device memory");
// allocate output matrix on device
dout = clCreateBuffer(dev->context,
CL_MEM_WRITE_ONLY,
2*h*w*sizeof(unsigned char), NULL, NULL);
if (!dout) clut_panic("failed to allocate output matrix on device memory");
// set the arguments to our compute kernel
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &din);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dout);
err |= clSetKernelArg(kernel, 2, sizeof(int), &h);
err |= clSetKernelArg(kernel, 3, sizeof(int), &w);
clut_check_err(err, "failed to set kernel arguments");
// execute the kernel over the range of our 2D input data set
size_t local_dim[] = { LOCAL_SIZE, LOCAL_SIZE };
size_t global_dim[] = { w, h };
global_dim[0] = ((global_dim[0]+LOCAL_SIZE-1)/LOCAL_SIZE)*LOCAL_SIZE; // round up
global_dim[1] = ((global_dim[1]+LOCAL_SIZE-1)/LOCAL_SIZE)*LOCAL_SIZE; // round up
err = clEnqueueNDRangeKernel(dev->queue, kernel, 2,
NULL, global_dim, local_dim, 0, NULL, &evt);
clut_check_err(err, "failed to execute kernel");
// copy result from device to host
err = clEnqueueReadBuffer(dev->queue, dout, CL_TRUE, 0,
2*h*w*sizeof(unsigned char), *out, 0, NULL, NULL);
clut_check_err(err, "failed to read output result");
// return kernel execution time
*td = clut_get_duration(evt);
// cleanup
clReleaseMemObject(din);
clReleaseMemObject(dout);
clReleaseKernel(kernel);
}
Codice guest:
#define IDX(x,y,w) ((y)*(w)+(x))
__kernel void mirror(__global unsigned char* I,
__global unsigned char* O,
int h, int w) {
int x = get_global_id(0);
int y = get_global_id(1);
if (x >= w || y >= h) return;
unsigned char pixel = I[IDX(x,y,w)];
O[IDX(x,y,w)] = pixel;
O[IDX(x,2*h-y-1,w)] = pixel;
}