OpenCL ray-triangle intersection kernel.

by syoyo

openclraycastsc.png

I’ve wrote possibly World’s first ray-triangle intersection kernel in OpenCL.
I am now working with porting full raytracing kernel(traversal, shader, isector, etc) into OpenCL.
This article is just exposing the small part of my professional(?) OpenCL work to you.

Here’s the sample code of ray-triangle intersection kernel in OpenCL.


//
// Copyright 2009 Syoyo Fujita.
//
typedef struct _ray4_t
{
    float4 rox, roy, roz;
    float4 rdx, rdy, rdz;
    float4 t;
} ray4_t;

typedef struct _triangle4_t
{
    float4 v0x, v0y, v0z;
    float4 e1x, e1y, e1z;
    float4 e2x, e2y, e2z;
} triangle4_t;

typedef struct _triangle_t
{
    float v[3][4];  // (x,y,z,w) * 3
} triangle_t;

static inline float4
mycross4(float4 a, float4 b, float4 c, float4 d)
{
    return ((a * c) - (b * d));
}

static inline float4
mydot4(float4 ax, float4 ay, float4 az, float4 bx, float4 by, float4 bz)
{
    return (ax * bx + ay * by + az * bz);
}

static int4
isect4(
    float4 *t_out,
    float4 *u_out,
    float4 *v_out,
    ray4_t ray,
    triangle4_t tri)
{
    const float4 px = mycross4(tri.e2z, tri.e2y, ray.rdy, ray.rdz);
    const float4 py = mycross4(tri.e2x, tri.e2z, ray.rdz, ray.rdx);
    const float4 pz = mycross4(tri.e2y, tri.e2x, ray.rdx, ray.rdy);

    const float4 sx = ray.rox - tri.v0x;
    const float4 sy = ray.roy - tri.v0y;
    const float4 sz = ray.roz - tri.v0z;

    const float4 vone  = (float4)(1.0f);
    const float4 vzero = (float4)(0.0f);
    const float4 veps  = (float4)(1.0e-6f);

    const float4 det = mydot4(px, py, pz, tri.e1x, tri.e1y, tri.e1z);
    const float4 invdet = (float4)(1.0f) / det;


    const float4 qx = mycross4(tri.e1z, tri.e1y, sy, sz);
    const float4 qy = mycross4(tri.e1x, tri.e1z, sz, sx);
    const float4 qz = mycross4(tri.e1y, tri.e1y, sx, sy);

    const float4 u = mydot4(sx, sy, sz, px, py, pz) * invdet;
    const float4 v = mydot4(ray.rdx, ray.rdy, ray.rdz, qx, qy, qz) * invdet;
    const float4 t = mydot4(tri.e2x, tri.e2y, tri.e2z, qx, qy, qz) * invdet;

    int4 mask = (fabs(det) > veps) & ((u+v)  vzero) & (v > vzero)
              & (t > vzero) & (t < ray.t);


    (*t_out) = t;
    (*u_out) = u;
    (*v_out) = v;

    return mask;

}

__kernel
void
main(
    __global uint *out)
{
    int x = get_global_id(0);
    int y = get_global_id(1);
    int w = get_global_size(0);
    int h = get_global_size(1);

    ray4_t ray4;

    ray4.rox = 2.0f * ((x - w) / (float)w) + 1.0f;
    ray4.roy = 2.0f * ((y - h) / (float)h) + 1.0f;
    ray4.roz = (float4)(0.0f);

    ray4.rdx = (float4)(0.0f);
    ray4.rdy = (float4)(0.0f);
    ray4.rdz = (float4)(-1.0f);

    ray4.t = (float4)(1.0e+30f);



    triangle_t tri;
    triangle4_t tri4;

    tri.v[0][0] = -0.5f;
    tri.v[0][1] = -0.5f;
    tri.v[0][2] = -1.0f;
    tri.v[1][0] =  0.1f;
    tri.v[1][1] =  0.75f;
    tri.v[1][2] = -1.0f;
    tri.v[2][0] =  0.5f;
    tri.v[2][1] = -0.75f;
    tri.v[2][2] = -1.0f;

    tri4.v0x = (float4)(tri.v[0][0]);
    tri4.v0y = (float4)(tri.v[0][1]);
    tri4.v0z = (float4)(tri.v[0][2]);
    tri4.e1x = (float4)(tri.v[1][0] - tri.v[0][0]);
    tri4.e1y = (float4)(tri.v[1][1] - tri.v[0][1]);
    tri4.e1z = (float4)(tri.v[1][2] - tri.v[0][2]);
    tri4.e2x = (float4)(tri.v[2][0] - tri.v[0][0]);
    tri4.e2y = (float4)(tri.v[2][1] - tri.v[0][1]);
    tri4.e2z = (float4)(tri.v[2][2] - tri.v[0][2]);

    float4 t, u, v;
    int4 mask = isect4(&t, &u, &v, ray4, tri4);

    //int r = mask.x & 0xff;
    //int g = mask.y & 0xff;

    int r = (int)(t.x * 255);
    int g = (int)(u.x * 255);
    int b = (int)(v.x * 255);

    r = mask.x & r;
    g = mask.x & g;
    b = mask.x & b;

    out[x+y*get_global_size(0)] = (uint)(r | (g<< 8) | (b << 16) | (255<<24));
}

Similar articles

World’s first SIMD ray-triangle intersection in AVX?
http://lucille.atso-net.jp/blog/?p=649

Advertisements