blob: 8997cd06d60180480221badc6f5e61cefeb97cbe [file] [log] [blame]
//TODO: copyrights
#include "builtins.h"
#include "kernel.h"
#include "buffer.h"
#include "events.h"
#include "memobject.h"
#include <signal.h>
//#include <llvm/Function.h>
#include <iostream>
#include <cstring>
#include <cmath>
#include <windows.h>
#include <stdio.h>
using namespace Devices;
unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z,
size_t row_pitch, size_t slice_pitch,
unsigned int bytes_per_pixel)
{
unsigned char *result = base;
result += (z * slice_pitch) +
(y * row_pitch) +
(x * bytes_per_pixel);
return result;
}
/*
* TLS-related functions
*/
//__thread Devices::CPUKernelWorkGroup *g_work_group; /*!< \brief \c Coal::CPUKernelWorkGroup currently running on this thread */
//__thread void *work_items_data; /*!< \brief Space allocated for work-items stacks, see \ref barrier */
//__thread size_t work_items_size; /*!< \brief Size of \c work_items_data, see \ref barrier */
void setThreadLocalWorkGroup(Devices::CPUKernelWorkGroup *current)
{
//g_work_group = current;
}
void *getWorkItemsData(size_t &size)
{
//size = work_items_size;
return 0;// work_items_data;
}
void setWorkItemsData(void *ptr, size_t size)
{
/*work_items_data = ptr;
work_items_size = size;*/
}
/*
* Actual built-ins implementations
*/
cl_uint CPUKernelWorkGroup::getWorkDim() const
{
return p_work_dim;
}
size_t CPUKernelWorkGroup::getGlobalId(cl_uint dimindx) const
{
if(dimindx > p_work_dim)
return 0;
return p_global_id_start_offset[dimindx] + p_current_context->local_id[dimindx];
}
size_t CPUKernelWorkGroup::getGlobalSize(cl_uint dimindx) const
{
if(dimindx >p_work_dim)
return 1;
return p_event->global_work_size(dimindx);
}
size_t CPUKernelWorkGroup::getLocalSize(cl_uint dimindx) const
{
if(dimindx > p_work_dim)
return 1;
return p_event->local_work_size(dimindx);
}
size_t CPUKernelWorkGroup::getLocalID(cl_uint dimindx) const
{
if(dimindx > p_work_dim)
return 0;
return p_current_context->local_id[dimindx];
}
size_t CPUKernelWorkGroup::getNumGroups(cl_uint dimindx) const
{
if(dimindx > p_work_dim)
return 1;
return (p_event->global_work_size(dimindx) /
p_event->local_work_size(dimindx));
}
size_t CPUKernelWorkGroup::getGroupID(cl_uint dimindx) const
{
if(dimindx > p_work_dim)
return 0;
return p_index[dimindx];
}
size_t CPUKernelWorkGroup::getGlobalOffset(cl_uint dimindx) const
{
if(dimindx > p_work_dim)
return 0;
return p_event->global_work_offset(dimindx);
}
void CPUKernelWorkGroup::barrier(unsigned int flags)
{
p_had_barrier = true;
// Allocate or reuse TLS memory for the stacks (it isn't freed between
// the work groups, and even the kernels, so if we need less space than
// allocated, it's good)
if(!p_contexts)
{
if(p_current_work_item != 0)
{
// Completely abnormal, it means that not every work-items
// encounter the barrier
/*std::cerr << "*** Not every work-items of "
<< p_kernel->function()->getNameStr()
<< " calls barrier(); !" << std::endl;*/
return;
}
// Allocate or reuse the stacks
size_t contexts_size;
p_contexts = getWorkItemsData(contexts_size);
size_t needed_size = p_num_work_items * (p_stack_size + sizeof(Context));
if(!p_contexts || contexts_size < needed_size)
{
// We must allocate a new space
//if(p_contexts)
// munmap(p_contexts, contexts_size);
//p_contexts = mmap(0, needed_size, PROT_EXEC | PROT_READ | PROT_WRITE, /* People say a stack must be executable */
// MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
setWorkItemsData(p_contexts, contexts_size);
}
// Now that we have a real main context, initialize it
p_current_context = getContextAddr(0);
p_current_context->initialized = 1;
std::memset(p_current_context->local_id, 0, p_work_dim * sizeof(size_t));
//getcontext(&p_current_context->context);
}
// Take the next context
p_current_work_item++;
if(p_current_work_item == p_num_work_items) p_current_work_item = 0;
Context *next = getContextAddr(p_current_work_item);
Context *main = getContextAddr(0); // The context not created with makecontext
// If the next context isn't initialized, initialize it.
// Note: mmap zeroes the memory, so next->initialized == 0 if it isn't initialized
if(next->initialized == 0)
{
next->initialized = 1;
// local-id of next is the one of the current context, but incVec'ed
std::memcpy(next->local_id, p_current_context->local_id,
MAX_WORK_DIMS * sizeof(size_t));
incVec(p_work_dim, next->local_id, p_max_local_id);
// Initialize the next context
/*if(getcontext(&next->context) != 0)
return;*/
// Get its stack. It is located a next + sizeof(Context)
char *stack = (char *)next;
stack += sizeof(Context);
/*next->context.uc_link = &main->context;
next->context.uc_stack.ss_sp = stack;
next->context.uc_stack.ss_size = p_stack_size;*/
// Tell it to run the kernel function
//makecontext(&next->context, (void(*)())p_kernel_func_addr, 1, p_args);
}
// Switch to the next context
/*ucontext_t *cur = &p_current_context->context;
p_current_context = next;
swapcontext(cur, &next->context);*/
// When we return here, it means that all the other work items encountered
// a barrier and that we returned to this one. We can continue.
}
void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const
{
/*std::cout << "OpenCL: Non-existant builtin function " << name
<< " found in kernel " << p_kernel->function()->getNameStr()
<< '.' << std::endl;*/
}
/*
* Built-in functions
*/
static size_t get_global_id(cl_uint dimindx)
{
return 0;// g_work_group->getGlobalId(dimindx);
}
static cl_uint get_work_dim()
{
return 0;// g_work_group->getWorkDim();
}
static size_t get_global_size(unsigned int dimindx)
{
return 0;// g_work_group->getGlobalSize(dimindx);
}
static size_t get_local_size(unsigned int dimindx)
{
return 0;// g_work_group->getLocalSize(dimindx);
}
static size_t get_local_id(unsigned int dimindx)
{
return 0;// g_work_group->getLocalID(dimindx);
}
static size_t get_num_groups(unsigned int dimindx)
{
return 0;// g_work_group->getNumGroups(dimindx);
}
static size_t get_group_id(unsigned int dimindx)
{
return 0;// g_work_group->getGroupID(dimindx);
}
static size_t get_global_offset(unsigned int dimindx)
{
return 0;// g_work_group->getGlobalOffset(dimindx);
}
static void barrier(unsigned int flags)
{
0;// g_work_group->barrier(flags);
}
// Images
static int get_image_width(Image2D *image)
{
return image->width();
}
static int get_image_height(Image2D *image)
{
return image->height();
}
static int get_image_depth(Image3D *image)
{
if(image->type() != MemObject::Image3D)
return 1;
return image->depth();
}
static int get_image_channel_data_type(Image2D *image)
{
return image->format().image_channel_data_type;
}
static int get_image_channel_order(Image2D *image)
{
return image->format().image_channel_order;
}
static void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
{
*order = image->format().image_channel_order;
*type = image->format().image_channel_data_type;
return 0;// g_work_group->getImageData(image, x, y, z);
}
static bool is_image_3d(Image3D *image)
{
return (image->type() == MemObject::Image3D ? 1 : 0);
}
static void write_imagef(Image2D *image, int x, int y, int z, float *color)
{
0;// g_work_group->writeImage(image, x, y, z, color);
}
static void write_imagei(Image2D *image, int x, int y, int z, int32_t *color)
{
0;// g_work_group->writeImage(image, x, y, z, color);
}
static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color)
{
0;// g_work_group->writeImage(image, x, y, z, color);
}
static void read_imagefi(float *result, Image2D *image, int x, int y, int z,
int32_t sampler)
{
0;// g_work_group->readImage(result, image, x, y, z, sampler);
}
static void read_imageii(int32_t *result, Image2D *image, int x, int y, int z,
int32_t sampler)
{
0;// g_work_group->readImage(result, image, x, y, z, sampler);
}
static void read_imageuii(uint32_t *result, Image2D *image, int x, int y, int z,
int32_t sampler)
{
0;// g_work_group->readImage(result, image, x, y, z, sampler);
}
static void read_imageff(float *result, Image2D *image, float x, float y,
float z, int32_t sampler)
{
0;// g_work_group->readImage(result, image, x, y, z, sampler);
}
static void read_imageif(int32_t *result, Image2D *image, float x, float y,
float z, int32_t sampler)
{
0;// g_work_group->readImage(result, image, x, y, z, sampler);
}
static void read_imageuif(uint32_t *result, Image2D *image, float x, float y,
float z, int32_t sampler)
{
0;// g_work_group->readImage(result, image, x, y, z, sampler);
}
/*
* Built-in functions generated by src/runtime/builtins.py
*/
#define REPL(x) for (unsigned int i=0; i<x; ++i)
//#include <runtime/builtins_impl.h>
/*
* Bridge between LLVM and us
*/
static void unimplemented_stub()
{
}
void *getBuiltin(const std::string &name)
{
if(name == "get_global_id")
return (void *)&get_global_id;
else if(name == "get_work_dim")
return (void *)&get_work_dim;
else if(name == "get_global_size")
return (void *)&get_global_size;
else if(name == "get_local_size")
return (void *)&get_local_size;
else if(name == "get_local_id")
return (void *)&get_local_id;
else if(name == "get_num_groups")
return (void *)&get_num_groups;
else if(name == "get_group_id")
return (void *)&get_group_id;
else if(name == "get_global_offset")
return (void *)&get_global_offset;
else if(name == "barrier")
return (void *)&barrier;
else if(name == "__cpu_get_image_width")
return (void *)&get_image_width;
else if(name == "__cpu_get_image_height")
return (void *)&get_image_height;
else if(name == "__cpu_get_image_depth")
return (void *)&get_image_depth;
else if(name == "__cpu_get_image_channel_data_type")
return (void *)&get_image_channel_data_type;
else if(name == "__cpu_get_image_channel_order")
return (void *)&get_image_channel_order;
else if(name == "__cpu_image_data")
return (void *)&image_data;
else if(name == "__cpu_is_image_3d")
return (void *)&is_image_3d;
else if(name == "__cpu_write_imagef")
return (void *)&write_imagef;
else if(name == "__cpu_write_imagei")
return (void *)&write_imagei;
else if(name == "__cpu_write_imageui")
return (void *)&write_imageui;
else if(name == "__cpu_read_imagefi")
return (void *)&read_imagefi;
else if(name == "__cpu_read_imageii")
return (void *)&read_imageii;
else if(name == "__cpu_read_imageuii")
return (void *)&read_imageuii;
else if(name == "__cpu_read_imageff")
return (void *)&read_imageff;
else if(name == "__cpu_read_imageif")
return (void *)&read_imageif;
else if(name == "__cpu_read_imageuif")
return (void *)&read_imageuif;
// Built-in functions generated by src/runtime/builtins.py
//#include <runtime/builtins_def.h>
else if(name == "debug")
return (void *)&printf;
// Function not found
//g_work_group->builtinNotFound(name);
return (void *)&unimplemented_stub;
}