Notes: calculating -d1 for flags causes GLITCHES /* nVerts = inputVertCount; ClipLoop: do { // fill clipping buffer as much we can nClipped = 0; ClipPrimLoop: do { n = clipPoly(verts[0], verts[1], verts[2]) nClipped += n; verts += 3; if(n == 0) // tri completely outside goto ClipSkip; if(n == 3){ // tri completely inside copy verts; }else{ // this is the ugly case insertPolygon(...); } // buffer may overflow with next tri, render it if(nClipped > clipVertLimit){ nVerts -= 3; goto ClipRender; } ClipSkip: nVerts -= 3; if(nVerts <= 0) goto ClipRender; }// goto ClipPrimLoop; ClipRender: // this can only happen if the input buffer // has no more vertices, no more rendering at all now if(nClipped < 3) goto ClipEnd; render(); if(nVerts <= 0) goto ClipEnd; }// goto ClipLoop; ClipEnd: */ /* plane distances, all left in vf07.w, also negative distances just for sign flag! vf24 = [1.0, 1.0, near, far] w = near w - near subz.w vf07,vf01,vf24 near - w subw.z vf00,vf24,vf01 w = far far - w sub.w vf07,vf24,vf01 w - far sub.w vf00,vf01,vf24 x = -w x + w addx.w vf07,vf01,vf01 - x - w suba.x acc,vf00,vf01; msubw.x vf00,vf24,vf01 x = w w - x subx.w vf07,vf01,vf01 x - w subw.x vf00,vf01,vf01 y = -w y + w addy.w vf07,vf01,vf01 - y - w suba.y acc,vf00,vf01; msubw.y vf00,vf24,vf01 y = w w - y suby.w vf07,vf01,vf01 y - w subw.y vf00,vf01,vf01 now d1-d2 for all the planes so we can divide as early as possible: w = near (p1.w - near) - (p2.w - near) p1.w - p2.w sub.w vfxx,vf03,vf01 w = far (far - p1.w) - (far - p2.w) p2.w - p1.w sub.w vfxx,vf01,vf03 x = -w (p1.x + p1.w) - (p2.x + p2.w) p1.x + p1.w - p2.x - p2.w suba.x acc,vf03,vf01 maddaw.x acc,vf24,vf03 msubw.x vfxx,vf24,vf01 x = w (p1.w - p1.x) - (p2.w - p2.x) p2.x + p1.w - p1.x - p2.w suba.x acc,vf01,vf03 maddaw.x acc,vf24,vf03 msubw.x vfxx,vf24,vf01 y = -w (p1.y + p1.w) - (p2.y + p2.w) p1.y + p1.w - p2.y - p2.w suba.y acc,vf03,vf01 maddaw.y acc,vf24,vf03 msubw.y vfxx,vf24,vf01 y = w (p1.w - p1.y) - (p2.w - p2.y) p2.y + p1.w - p1.y - p2.w suba.y acc,vf01,vf03 maddaw.y acc,vf24,vf03 msubw.y vfxx,vf24,vf01 hack idea for incrementing polyOutPtr without a branch: we have to increment by 2, which is exactly the sign flag of the status flags increment if p1's plane distance is NOT negative, so flag is wrong way around idea 1: do second test with flipped planes, but -x-w and -y-w take one instruction and one cycle more fsand vi01,2 iadd vi07,vi07,vi01 idea 2: flip bit like this, but 3 extra lower instructions fsand vi01,2 iaddiu vi15,vi00,2 iadd vi01,vi01,vi15 iand vi01,vi01,vi15 iadd vi07,vi07,vi01 */ /* p1 = last inserted (=> keep vert around after last sq) polyInPtr = bufA; polyOutPtr = bufB; do { p2 = *polyInPtr++; d1 = dist(p1, plane) d2 = dist(p2, plane) *polyOutPtr = p1; if(d1 >= 0) { last = p1; >:U this completely fucks us over polyOutPtr++; <- see hack above } if(d1*d2 < 0){ // interpolate t = d1/(d1-d2); s = 1.0f-t; last = p1*s + p2*t; *polyOutPtr++ = last; } p1 = p2; } while(polyInPtr != polyInEnd); Interpolation: note s = -d2/(d1-d2) and hence p = p1*(-d2/(d1-d2)) + p2*(d1/(d1-d2)) = (p2*d1 - p1*d2) / (d1-d2) i.e. it's enough to get the division result relatively late and the denominator is a simple msub */