Programmazione Funzionale e Parallela

Corso di Laurea in Ingegneria Informatica e Automatica - A.A. 2017-2018

HomePage | Avvisi | Diario lezioni | Materiale didattico | Esami | Forum | Login

Giovedì 7 dicembre 2017


Compito B del 10/2/2016: mirror

Codice host:
mirror.c
#include "mirror.h"
#include <stdlib.h>
#include <assert.h>

#define LOCAL_SIZE  8
#define KERNEL_NAME "mirror"

// ---------------------------------------------------------------------
// mirror
// ---------------------------------------------------------------------
// data-parallel GPU version

void mirror(unsigned char* in, int w, int h,
            unsigned char** out, int* ow, int* oh,
            clut_device* dev, double* td) {

    int       err;      // error code
    cl_kernel kernel;   // execution kernel
    cl_mem    din;      // input matrix on device
    cl_mem    dout;     // output matrix on device
    cl_event  evt;      // performance measurement event

    // allocate output matrix in host
    *out = malloc(w*2*h*sizeof(unsigned char));
    if (*out == NULL)
        clut_panic("failed to allocate output matrix on host memory");
    *ow = w;
    *oh = 2*h;

    // create the compute kernel
    kernel = clCreateKernel(dev->program, KERNEL_NAME, &err);
    clut_check_err(err, "failed to create kernel");              
               
    // allocate input matrix on device as a copy of input matrix on host
    din = clCreateBuffer(dev->context,
                         CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                         h*w*sizeof(unsigned char), in, NULL);
    if (!din) clut_panic("failed to allocate input matrix on device memory");

    // allocate output matrix on device
    dout = clCreateBuffer(dev->context,
                          CL_MEM_WRITE_ONLY,
                          2*h*w*sizeof(unsigned char), NULL, NULL);
    if (!dout) clut_panic("failed to allocate output matrix on device memory");

    // set the arguments to our compute kernel
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &din);
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dout);
    err |= clSetKernelArg(kernel, 2, sizeof(int), &h);
    err |= clSetKernelArg(kernel, 3, sizeof(int), &w);
    clut_check_err(err, "failed to set kernel arguments");

    // execute the kernel over the range of our 2D input data set
    size_t local_dim[]  = { LOCAL_SIZE, LOCAL_SIZE };
    size_t global_dim[] = { w, h };
    global_dim[0] = ((global_dim[0]+LOCAL_SIZE-1)/LOCAL_SIZE)*LOCAL_SIZE; // round up
    global_dim[1] = ((global_dim[1]+LOCAL_SIZE-1)/LOCAL_SIZE)*LOCAL_SIZE; // round up

    err = clEnqueueNDRangeKernel(dev->queue, kernel, 2,
                                 NULL, global_dim, local_dim, 0, NULL, &evt);
    clut_check_err(err, "failed to execute kernel");

    // copy result from device to host
    err = clEnqueueReadBuffer(dev->queue, dout, CL_TRUE, 0,
                              2*h*w*sizeof(unsigned char), *out, 0, NULL, NULL);
    clut_check_err(err, "failed to read output result");

    // return kernel execution time
    *td = clut_get_duration(evt);

    // cleanup
    clReleaseMemObject(din);
    clReleaseMemObject(dout);
    clReleaseKernel(kernel);
}


Codice guest:
mirror.cl
#define IDX(x,y,w) ((y)*(w)+(x))

__kernel void mirror(__global unsigned char* I,
                     __global unsigned char* O,
                     int h, int w) {

    int x = get_global_id(0);
    int y = get_global_id(1);

    if (x >= w || y >= h) return;
   
    unsigned char pixel = I[IDX(x,y,w)];
    O[IDX(x,y,w)] = pixel;
    O[IDX(x,2*h-y-1,w)] = pixel;
}

Valid XHTML 1.0 Transitional :: Valid CSS :: Powered by WikkaWiki
Page was generated in 0.0580 seconds