Monday, March 17, 2008

Entity Occlusion

It's there and it works. Pretty damn beautifully, even if I must say so myself.

I was able to get the Z Buffer updates down to 10 times per second, which gave another handy speed boost. I could have got away with 5, but the increase wasn't too much, so I decided to run with the extra accuracy.

Here's the code: a lot of it is engine-specific, but if you can pull anything useful from it, it's my pleasure.

float r_z_update_time = 0.0f;
#define Z_UPDATE_INTERVAL 0.1f
#define Z_UPDATE_SIZE 64

// software z buffer
float zBuf[Z_UPDATE_SIZE * Z_UPDATE_SIZE];


/*
==================
R_ProjectPoint

project a point from world co-ordinates to screen coordinates
==================
*/
void R_ProjectPoint (vec3_t vin, vec3_t vout)
{
float fvin[4] = {vin[0], vin[1], vin[2], 1};
float fvout[4];
float *mm = r_world_matrix;
float *mp = r_world_project;

// transform our points - fvin will hold the final transformation
fvout[0] = mm[0x0] * fvin[0] + mm[0x4] * fvin[1] + mm[0x8] * fvin[2] + mm[0xc] * fvin[3];
fvout[1] = mm[0x1] * fvin[0] + mm[0x5] * fvin[1] + mm[0x9] * fvin[2] + mm[0xd] * fvin[3];
fvout[2] = mm[0x2] * fvin[0] + mm[0x6] * fvin[1] + mm[0xa] * fvin[2] + mm[0xe] * fvin[3];
fvout[3] = mm[0x3] * fvin[0] + mm[0x7] * fvin[1] + mm[0xb] * fvin[2] + mm[0xf] * fvin[3];

fvin[0] = mp[0x0] * fvout[0] + mp[0x4] * fvout[1] + mp[0x8] * fvout[2] + mp[0xc] * fvout[3];
fvin[1] = mp[0x1] * fvout[0] + mp[0x5] * fvout[1] + mp[0x9] * fvout[2] + mp[0xd] * fvout[3];
fvin[2] = mp[0x2] * fvout[0] + mp[0x6] * fvout[1] + mp[0xa] * fvout[2] + mp[0xe] * fvout[3];
fvin[3] = mp[0x3] * fvout[0] + mp[0x7] * fvout[1] + mp[0xb] * fvout[2] + mp[0xf] * fvout[3];

// prevent division by 0
if (fvin[3] == 0.0) fvin[3] = 0.000001;

// normalize
fvin[0] /= fvin[3];
fvin[1] /= fvin[3];
fvin[2] /= fvin[3];

// map x and y to range 0..1, then scale to buffer dimensions
vout[0] = (fvin[0] * 0.5 + 0.5) * Z_UPDATE_SIZE;
vout[1] = (fvin[1] * 0.5 + 0.5) * Z_UPDATE_SIZE;

// scale to the depth range we're using
vout[2] = (fvin[2] * 0.25 + 0.75);

// move points outside the image into the image
if (vout[0] < 0) vout[0] = 0;
if (vout[0] >= Z_UPDATE_SIZE) vout[0] = Z_UPDATE_SIZE - 1;
if (vout[1] < 0) vout[1] = 0;
if (vout[1] >= Z_UPDATE_SIZE) vout[1] = Z_UPDATE_SIZE - 1;
}


/*
==================
R_ProjectBBox

project a bounding box from world coordinates to screen coordinates, then take a 2D
"bounding box of the bounding box" for use in the occlusion culling tests
==================
*/
void R_ProjectBBox (float *mins, float *maxs, float *minsout, float *maxsout)
{
int i;

// initial corner points
minsout[0] = minsout[1] = minsout[2] = 999999999;
maxsout[0] = maxsout[1] = maxsout[2] = -999999999;

for (i = 0; i < 8; i++)
{
vec3_t bboxptin;
vec3_t bboxptout;

// get the correct corner to use
bboxptin[0] = (i & 1) ? mins[0] : maxs[0];
bboxptin[1] = (i & 2) ? mins[1] : maxs[1];
bboxptin[2] = (i & 4) ? mins[2] : maxs[2];

// project to screen
R_ProjectPoint (bboxptin, bboxptout);

// store min and max
if (bboxptout[0] < minsout[0]) minsout[0] = bboxptout[0];
if (bboxptout[1] < minsout[1]) minsout[1] = bboxptout[1];
if (bboxptout[2] < minsout[2]) minsout[2] = bboxptout[2];
if (bboxptout[0] > maxsout[0]) maxsout[0] = bboxptout[0];
if (bboxptout[1] > maxsout[1]) maxsout[1] = bboxptout[1];
if (bboxptout[2] > maxsout[2]) maxsout[2] = bboxptout[2];
}
}


int R_BoxInFrustum (vec3_t mins, vec3_t maxs);


void R_RunOccludeEntityTest (entity_t *ent, vec3_t mins, vec3_t maxs)
{
vec3_t screen_mins, screen_maxs;
int x;
int y;

R_ProjectBBox (mins, maxs, screen_mins, screen_maxs);

for (y = screen_mins[1]; y <= screen_maxs[1]; y++)
{
int p = y * Z_UPDATE_SIZE;

for (x = screen_mins[0]; x <= screen_maxs[0]; x++)
{
if (zBuf[p + x] > screen_mins[2])
{
// not occluded
ent->occluded = false;
return;
}
}
}

// occluded
ent->occluded = true;
}


void R_RunOcclusionTest (void)
{
int i;
entity_t *ent;
vec3_t mins, maxs;

if (!r_worldentity.model || !cl.worldmodel) return;

for (i = 0; i < cl_numvisedicts; i++)
{
ent = cl_visedicts[i];

// not occluded
ent->occluded = false;

switch (ent->model->type)
{
case mod_brush:
case mod_alias:
case mod_sprite:
// get entity origin
VectorAdd (ent->origin, ent->model->mins, mins);
VectorAdd (ent->origin, ent->model->maxs, maxs);

// do the bbox cull here
if (R_BoxInFrustum (mins, maxs) == FRUSTUM_OUTSIDE)
{
// occluded
ent->occluded = true;
}
else
{
// test for regular occlusion
R_RunOccludeEntityTest (ent, mins, maxs);
}

break;

default:
break;
}
}
}


void R_CaptureDepth (void)
{
texture_t *t;
extern texture_t *texturelist;
extern float r_farclip;

// accumulate update time always
r_z_update_time += r_frametime;

// don't update if it's not time to do so yet
if (r_z_update_time < Z_UPDATE_INTERVAL && r_framecount > 5) return;

// begin the timer again
r_z_update_time = 0;

// render at Z_UPDATE_SIZE x Z_UPDATE_SIZE in the bottom-right corner
// create the viewport for the capture
R_SetupGLViewport (vid.glwidth - (Z_UPDATE_SIZE * 2), Z_UPDATE_SIZE, Z_UPDATE_SIZE, Z_UPDATE_SIZE, r_refdef.fov_y, 4, r_farclip);

// store modelview and projection matrixes for reuse
// fixme - do this in software to prevent a sync-wait
glGetFloatv (GL_MODELVIEW_MATRIX, r_world_matrix);
glGetFloatv (GL_PROJECTION_MATRIX, r_world_project);

// set up the depth range for the capture
// we can use a good chunk of the depth buffer here
glDepthFunc (GL_LEQUAL);
glDepthRange (0.5f, 1.0f);
glDepthMask (GL_TRUE);

// shut down everything we don't need for this
glDisable (GL_TEXTURE_2D);
glColorMask (GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);

// base vertex arrays
vaEnableVertexArray (3);

for (t = texturelist; t; t = t->texturelist)
{
// get the texture chain
msurface_t *surf = t->texturechain;

// no surfs in use
if (!surf) continue;

// skip over these surf types (fixme - this is ugly)
if (surf->flags & SURF_DRAWTURB)
{
if (surf->flags & SURF_DRAWOPAQUE)
{
}
else continue;
}

if (surf->flags & SURF_DRAWSKY) continue;

// walk the chain
for (; surf; surf = surf->texturechain)
{
glpoly_t *p;

// draw polys here as we're sending some liquids through it too
for (p = surf->polys; p; p = p->next)
{
int i;
glvertex_t *v;

vaBegin (GL_TRIANGLE_FAN);

for (i = 0, v = p->verts; i < p->numverts; i++, v++)
vaVertex3fv (v->tv);

vaEnd ();
}
}
}

// done with the render
vaDisableArrays ();

// capture the depth buffer
// per the spec, this scales to a 0..1 range, irrespective of the actual depth range
// (http://www.opengl.org/documentation/specs/man_pages/hardcopy/GL/html/gl/readpixels.html)
// but this is a lie...
glReadPixels (vid.glwidth - (Z_UPDATE_SIZE * 2), Z_UPDATE_SIZE, Z_UPDATE_SIZE, Z_UPDATE_SIZE, GL_DEPTH_COMPONENT, GL_FLOAT, zBuf);

// bring stuff back up
glEnable (GL_TEXTURE_2D);
glColorMask (GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);

// glColorMask leaves the current colour state undefined
glColor3f (1, 1, 1);
}

0 comments: