Programmazione Funzionale e Parallela

Corso di Laurea in Ingegneria Informatica e Automatica - A.A. 2017-2018

HomePage | Avvisi | Diario lezioni | Materiale didattico | Esami | Forum | Login

Giovedì 20 dicembre 2017


Esempio reverse a gruppi visto in classe.

reverseloc.c
#include <stdio.h>
#include "clut.h"

#define KERNEL_NAME "reverseloc"
#define LOCAL_SIZE 64
#define N LOCAL_SIZE*1000000
#define DEBUG 0

int main() {
    int       err;      // error code
    cl_kernel kernel;   // execution kernel
    cl_mem    dv;       // vector on device
    cl_event  evt;      // performance measurement event

    clut_device dev;
    clut_open_device(&dev, "reverseloc.cl");
   
    int *v = malloc(N*sizeof(int)), i, n = N;
    if (!v) clut_panic("failed to allocate memory on host");
   
    for (i=1; i<=n; ++i) v[i-1] = i;

    // create the compute kernel
    kernel = clCreateKernel(dev.program, KERNEL_NAME, &err);
    clut_check_err(err, "failed to create kernel");

    // allocate ivector on device as a copy of input matrix on host
    dv = clCreateBuffer(dev.context,
                         CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                         n*sizeof(int), v, NULL);
    if (!dv) clut_panic("failed to allocate input matrix on device memory");

    // set the arguments to our compute kernel
    err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &dv);
    err |= clSetKernelArg(kernel, 1, sizeof(int), &n);
    clut_check_err(err, "failed to set kernel arguments");

    // execute the kernel over the range of our 2D input data set
    size_t local_dim[]  = { LOCAL_SIZE };
    size_t global_dim[] = { n };
    global_dim[0] = ((global_dim[0]+LOCAL_SIZE-1)/LOCAL_SIZE)*LOCAL_SIZE; // round up

    err = clEnqueueNDRangeKernel(dev.queue, kernel, 1,
                                 NULL, global_dim, local_dim, 0, NULL, &evt);
    clut_check_err(err, "failed to execute kernel");

    // copy result from device to host
    err = clEnqueueReadBuffer(dev.queue, dv, CL_TRUE, 0,
                             n*sizeof(int), v, 0, NULL, NULL);
    clut_check_err(err, "failed to read output result");

    // return kernel execution time
    double t = clut_get_duration(evt);

    // print & check results
    printf("Duration: %f\n", t);
    for (i=0; i<n; ++i) {
        #if DEBUG
        printf("%d%s", v[i], (i+1)%LOCAL_SIZE == 0 ? "\n\n" : " ");
        #endif
        if (v[i+LOCAL_SIZE-2*(i%LOCAL_SIZE)-1] != i+1) break;
    }
    printf("%s Sayonara.\n", i==n ? "[OK]" : "[Error]");

    // cleanup
    clReleaseMemObject(dv);
    clReleaseKernel(kernel);

    free(v);

    clut_close_device(&dev);
}


reverseloc.cl
__kernel void reverseloc(__global int* v, int n) {

    int g = get_global_id(0);
    int s = get_local_size(0);
    int l = get_local_id(0);       // ottenibile anche come l = g % s

    if (g>=n) return;

    int elem = v[g];
    barrier(CLK_GLOBAL_MEM_FENCE); // sincronizzazione tra i work item nello stesso work group: provare a toglielo per vedere se il risultato è ancora corretto
    v[g+s-2*l-1] = elem;
}

Valid XHTML 1.0 Transitional :: Valid CSS :: Powered by WikkaWiki
Page was generated in 0.0526 seconds