/* ***************************************************************************** * * Copyright (c) 2012-2017 Alexis Naveros. * Portions developed under contract to the SURVICE Engineering Company. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgment in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * ***************************************************************************** */ #include #include #include #include #include #include #include #include #include "cc.h" #include "mm.h" #include "mmatomic.h" #include "mmhash.h" #include "mmbinsort.h" #include "meshdecimation.h" //// #if __SSE__ || _M_X64 || _M_IX86_FP >= 1 || CPU_ENABLE_SSE #include #define CPU_SSE_SUPPORT (1) #endif #if __SSE2__ || _M_X64 || _M_IX86_FP >= 2 || CPU_ENABLE_SSE2 #include #define CPU_SSE2_SUPPORT (1) #endif #if __SSE3__ || __AVX__ || CPU_ENABLE_SSE3 #include #define CPU_SSE3_SUPPORT (1) #endif #if __SSSE3__ || __AVX__ || CPU_ENABLE_SSSE3 #include #define CPU_SSSE3_SUPPORT (1) #endif #if __SSE4_1__ || __AVX__ || CPU_ENABLE_SSE4_1 #include #define CPU_SSE4_1_SUPPORT (1) #endif #if defined(__GNUC__) || defined(__INTEL_COMPILER) #define CPU_ALIGN16 __attribute__((aligned(16))) #define CPU_ALIGN32 __attribute__((aligned(32))) #define CPU_ALIGN64 __attribute__((aligned(64))) #elif defined(_MSC_VER) #define CPU_ALIGN16 __declspec(align(16)) #define CPU_ALIGN64 __declspec(align(64)) #else #define CPU_ALIGN16 #define CPU_ALIGN32 #define CPU_ALIGN64 #warning "SSE/AVX Disabled: Unsupported Compiler." #undef CPU_SSE_SUPPORT #undef CPU_SSE2_SUPPORT #undef CPU_SSE3_SUPPORT #undef CPU_SSSE3_SUPPORT #undef CPU_SSE4_1_SUPPORT #endif //// /* Define to use double floating point precision */ #define MD_CONF_DOUBLE_PRECISION (1) /* Define to use double precision just quadric maths. Very strongly recommended. */ #define MD_CONF_QUADRICS_DOUBLE_PRECISION (1) /* Enable progress report callback */ #define MD_CONF_ENABLE_PROGRESS (1) /* Align all ops on 64 bytes to reduce cache line fetches */ #define MD_CONF_OP_ALIGNMENT (0x40) /* Required with multithreading! */ #define MD_CONFIG_DELAYED_OP_REDIRECT (1) /* Quick hash function instead of cc* */ #define MD_CONFIG_FAST_HASH (1) /* Try to use some crazy __float128 precision to track the d^2 accumulated error, if available */ #define MD_CONFIG_HIGH_QUADRICS (0) #if 1 #define MD_COLLAPSE_COST_COMPACT_TARGET (0.25) #define MD_COLLAPSE_COST_COMPACT_FACTOR (0.00125) #else #define MD_COLLAPSE_COST_COMPACT_TARGET (0.4) #define MD_COLLAPSE_COST_COMPACT_FACTOR (0.025) #endif #define MD_BOUNDARY_WEIGHT (5.0) #define MD_SYNC_STEP_COUNT (32) #define MD_QUADRIC_DETERMINANT_MIN (0.0000000001) #define MD_GLOBAL_LOCK_THRESHOLD (16) #define MD_THREAD_COUNT_DEFAULT (4) #define MD_THREAD_COUNT_MAX (64) //// #if 1 #define DEBUG_VERBOSE (0) #define DEBUG_VERBOSE_QUADRIC (0) #define DEBUG_VERBOSE_BOUNDARY (0) #define DEBUG_VERBOSE_COLLISION (0) #define DEBUG_VERBOSE_COST (0) #define DEBUG_VERBOSE_COLLAPSE (0) #define DEBUG_VERBOSE_EVALUATE (0) #define DEBUG_VERBOSE_MEMORY (0) #else #define DEBUG_VERBOSE (0) #define DEBUG_VERBOSE_QUADRIC (1) #define DEBUG_VERBOSE_BOUNDARY (1) #define DEBUG_VERBOSE_COLLISION (1) #define DEBUG_VERBOSE_COST (1) #define DEBUG_VERBOSE_COLLAPSE (1) #define DEBUG_VERBOSE_EVALUATE (1) #define DEBUG_VERBOSE_MEMORY (0) #endif /* #define DEBUG_LIMIT (1) */ #define DEBUG_DEBUG_CHECK_SOMETHING (0) #if DEBUG_VERBOSE || DEBUG_VERBOSE_COLLISION || DEBUG_VERBOSE_COLLAPSE #if 0 #define MD_ERROR(s,f,...) ({fprintf(stderr,s,__VA_ARGS__);if(f) exit(1);}) #else #define MD_ERROR(s,f,...) ({fprintf(stdout,s,__VA_ARGS__);fflush(stdout);}) #endif #else #define MD_ERROR(s,f,...) ({fprintf(stderr,s,__VA_ARGS__);}) #endif //// #if MM_ATOMIC_SUPPORT #define MD_CONFIG_ATOMIC_SUPPORT (1) #endif //// #if MD_CONF_DOUBLE_PRECISION typedef double mdf; #define mdfmin(x,y) fmin((x),(y)) #define mdfmax(x,y) fmax((x),(y)) #define mdffloor(x) floor(x) #define mdfceil(x) ceil(x) #define mdfround(x) round(x) #define mdfsqrt(x) sqrt(x) #define mdfcbrt(x) cbrt(x) #define mdfabs(x) fabs(x) #define mdflog2(x) log2(x) #define mdfacos(x) acos(x) #else typedef float mdf; #define mdfmin(x,y) fminf((x),(y)) #define mdfmax(x,y) fmaxf((x),(y)) #define mdffloor(x) floorf(x) #define mdfceil(x) ceilf(x) #define mdfround(x) roundf(x) #define mdfsqrt(x) sqrtf(x) #define mdfcbrt(x) cbrtf(x) #define mdfabs(x) fabsf(x) #define mdflog2(x) log2f(x) #define mdfacos(x) acosf(x) #endif #if MD_CONF_DOUBLE_PRECISION #if !MD_CONF_QUADRICS_DOUBLE_PRECISION #define MD_CONF_QUADRICS_DOUBLE_PRECISION (1) #endif #endif #if MD_CONF_QUADRICS_DOUBLE_PRECISION typedef double mdqf; #else typedef float mdqf; #endif #if MD_CONFIG_HIGH_QUADRICS #if ( __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 6 ) ) && !defined(__INTEL_COMPILER) && ( defined(__i386__) || defined(__x86_64__) || defined(__ia64__) ) typedef __float128 mdqfhigh; #else #undef MD_CONFIG_HIGH_QUADRICS typedef double mdqfhigh; #endif #elif MD_CONF_QUADRICS_DOUBLE_PRECISION typedef double mdqfhigh; #else typedef float mdqfhigh; #endif #define MD_SIZEOF_MDI (4) typedef int32_t mdi; //// #ifndef M_PI #define M_PI (3.14159265358979323846264338327) #endif #define MD_VectorSubStore(x,y,z) (x)[0]=(y)[0]-(z)[0];(x)[1]=(y)[1]-(z)[1];(x)[2]=(y)[2]-(z)[2] #define MD_VectorCrossProduct(x,y,z) ({(x)[0]=((y)[1]*(z)[2])-((y)[2]*(z)[1]);(x)[1]=((y)[2]*(z)[0])-((y)[0]*(z)[2]);(x)[2]=((y)[0]*(z)[1])-((y)[1]*(z)[0]);}) #define MD_VectorDotProduct(x,y) ((x)[0]*(y)[0]+(x)[1]*(y)[1]+(x)[2]*(y)[2]) #define MD_VectorCopy(x,y) ({(x)[0]=(y)[0];(x)[1]=(y)[1];(x)[2]=(y)[2];}) #define MD_VectorMulScalar(x,y) (x)[0]*=(y);(x)[1]*=(y);(x)[2]*=(y) #define MD_VectorNormalize(x) ({mdf _f;_f=1.0/sqrt((x)[0]*(x)[0]+(x)[1]*(x)[1]+(x)[2]*(x)[2]);(x)[0]*=_f;(x)[1]*=_f;(x)[2]*=_f;}) #define MD_VectorZero(x) ({(x)[0]=0.0;(x)[1]=0.0;(x)[2]=0.0;}) #define MD_VectorMagnitude(x) (sqrt((x)[0]*(x)[0]+(x)[1]*(x)[1]+(x)[2]*(x)[2])) #define MD_VectorAddMulScalar(x,y,z) (x)[0]+=(y)[0]*(z);(x)[1]+=(y)[1]*(z);(x)[2]+=(y)[2]*(z) //// typedef struct { mdqf area; mdqf a2, ab, ac, ad; mdqf b2, bc, bd; mdqf c2, cd; mdqfhigh d2; } mathQuadric; static inline void mathQuadricInit( mathQuadric *q, mdqf a, mdqf b, mdqf c, mdqf d, mdqf area ) { q->area = area; q->a2 = a * a; q->ab = a * b; q->ac = a * c; q->ad = a * d; q->b2 = b * b; q->bc = b * c; q->bd = b * d; q->c2 = c * c; q->cd = c * d; q->d2 = (mdqfhigh)d * (mdqfhigh)d; #if DEBUG_VERBOSE_QUADRIC printf( " Q Init %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)q->area, (double)q->a2, (double)q->ab, (double)q->ac, (double)q->ad, (double)q->b2, (double)q->bc, (double)q->bd, (double)q->c2, (double)q->cd, (double)q->d2 ); #endif return; } //// static inline mdqf mathMatrix3x3Determinant( mdqf *m ) { mdqf det; det = m[0*3+0] * ( ( m[2*3+2] * m[1*3+1] ) - ( m[2*3+1] * m[1*3+2] ) ); det -= m[1*3+0] * ( ( m[2*3+2] * m[0*3+1] ) - ( m[2*3+1] * m[0*3+2] ) ); det += m[2*3+0] * ( ( m[1*3+2] * m[0*3+1] ) - ( m[1*3+1] * m[0*3+2] ) ); return det; } static inline void mathMatrix3x3MulVector( mdqf *vdst, mdqf *m, mdqf *v ) { vdst[0] = ( v[0] * m[0*3+0] ) + ( v[1] * m[1*3+0] ) + ( v[2] * m[2*3+0] ); vdst[1] = ( v[0] * m[0*3+1] ) + ( v[1] * m[1*3+1] ) + ( v[2] * m[2*3+1] ); vdst[2] = ( v[0] * m[0*3+2] ) + ( v[1] * m[1*3+2] ) + ( v[2] * m[2*3+2] ); return; } static inline void mathQuadricToMatrix3x3( mdqf *m, mathQuadric *q ) { m[0*3+0] = q->a2; m[0*3+1] = q->ab; m[0*3+2] = q->ac; m[1*3+0] = q->ab; m[1*3+1] = q->b2; m[1*3+2] = q->bc; m[2*3+0] = q->ac; m[2*3+1] = q->bc; m[2*3+2] = q->c2; return; } static inline void mathMatrix3x3Invert( mdqf *mdst, mdqf *m, mdqf det ) { mdf detinv; detinv = 1.0 / det; mdst[0*3+0] = ( ( m[2*3+2] * m[1*3+1] ) - ( m[2*3+1] * m[1*3+2] ) ) * detinv; mdst[0*3+1] = -( ( m[2*3+2] * m[0*3+1] ) - ( m[2*3+1] * m[0*3+2] ) ) * detinv; mdst[0*3+2] = ( ( m[1*3+2] * m[0*3+1] ) - ( m[1*3+1] * m[0*3+2] ) ) * detinv; mdst[1*3+0] = -( ( m[2*3+2] * m[1*3+0] ) - ( m[2*3+0] * m[1*3+2] ) ) * detinv; mdst[1*3+1] = ( ( m[2*3+2] * m[0*3+0] ) - ( m[2*3+0] * m[0*3+2] ) ) * detinv; mdst[1*3+2] = -( ( m[1*3+2] * m[0*3+0] ) - ( m[1*3+0] * m[0*3+2] ) ) * detinv; mdst[2*3+0] = ( ( m[2*3+1] * m[1*3+0] ) - ( m[2*3+0] * m[1*3+1] ) ) * detinv; mdst[2*3+1] = -( ( m[2*3+1] * m[0*3+0] ) - ( m[2*3+0] * m[0*3+1] ) ) * detinv; mdst[2*3+2] = ( ( m[1*3+1] * m[0*3+0] ) - ( m[1*3+0] * m[0*3+1] ) ) * detinv; return; } //// static inline int mathQuadricSolve( mathQuadric *q, mdf *v ) { mdqf det, m[9], minv[9], vector[3], vres[3]; mdqf areascale; mathQuadricToMatrix3x3( m, q ); det = mathMatrix3x3Determinant( m ); /* Det scale diagnotic Scale 10x QAdd Sr0 1.120477 ; 0.022271 0.030416 -0.065258 -1.217629 0.042473 -0.090558 -1.704271 0.196298 3.621082 68.438599 QAdd Sr0 112.048033 ; 222.705246 304.155192 -652.582478 -121762.889543 424.729952 -905.579057 -170427.095051 1962.994524 362109.207338 68438554.421304 Det 0.000000440467 Det 440472.329126436263 If scale is 10x, then: q0->area : 100x (10^2) q0->a2 : 10000x (10^4) q0->ab : 10000x (10^4) q0->ac : 10000x (10^4) q0->ad : 100000x (10^5) q0->b2 : 10000x (10^4) q0->bc : 10000x (10^4) q0->bd : 100000x (10^5) q0->c2 : 10000x (10^4) q0->cd : 100000x (10^5) q0->d2 : 1000000x (10^6) det : 1000000000000x (10^12) */ areascale = q->area * q->area; areascale = areascale * areascale * areascale; if( mdfabs( det ) <= ( MD_QUADRIC_DETERMINANT_MIN * areascale ) ) { #if DEBUG_VERBOSE_QUADRIC printf( " Det : %.12f ; Fail\n", (double)det ); #endif return 0; } #if DEBUG_VERBOSE_QUADRIC printf( " Det : %.12f ; Pass\n", (double)det ); #endif mathMatrix3x3Invert( minv, m, det ); vector[0] = -q->ad; vector[1] = -q->bd; vector[2] = -q->cd; mathMatrix3x3MulVector( vres, minv, vector ); v[0] = (mdf)vres[0]; v[1] = (mdf)vres[1]; v[2] = (mdf)vres[2]; #if DEBUG_VERBOSE_QUADRIC printf( " Vector ; %f %f %f ; %f %f %f ( Det %.12f )\n", (double)vector[0], (double)vector[1], (double)vector[2], (double)v[0], (double)v[1], (double)v[2], det ); #endif return 1; } static inline void mathQuadricZero( mathQuadric *q ) { q->area = 0.0; q->a2 = 0.0; q->ab = 0.0; q->ac = 0.0; q->ad = 0.0; q->b2 = 0.0; q->bc = 0.0; q->bd = 0.0; q->c2 = 0.0; q->cd = 0.0; q->d2 = 0.0; return; } static inline void mathQuadricAddStoreQuadric( mathQuadric *qdst, mathQuadric *q0, mathQuadric *q1 ) { #if DEBUG_VERBOSE_QUADRIC printf( " QAdd Sr0 %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)q0->area, (double)q0->a2, (double)q0->ab, (double)q0->ac, (double)q0->ad, (double)q0->b2, (double)q0->bc, (double)q0->bd, (double)q0->c2, (double)q0->cd, (double)q0->d2 ); printf( " QAdd Sr1 %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)q1->area, (double)q1->a2, (double)q1->ab, (double)q1->ac, (double)q1->ad, (double)q1->b2, (double)q1->bc, (double)q1->bd, (double)q1->c2, (double)q1->cd, (double)q1->d2 ); #endif qdst->area = q0->area + q1->area; qdst->a2 = q0->a2 + q1->a2; qdst->ab = q0->ab + q1->ab; qdst->ac = q0->ac + q1->ac; qdst->ad = q0->ad + q1->ad; qdst->b2 = q0->b2 + q1->b2; qdst->bc = q0->bc + q1->bc; qdst->bd = q0->bd + q1->bd; qdst->c2 = q0->c2 + q1->c2; qdst->cd = q0->cd + q1->cd; qdst->d2 = q0->d2 + q1->d2; #if DEBUG_VERBOSE_QUADRIC printf( " QSum %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)qdst->area, (double)qdst->a2, (double)qdst->ab, (double)qdst->ac, (double)qdst->ad, (double)qdst->b2, (double)qdst->bc, (double)qdst->bd, (double)qdst->c2, (double)qdst->cd, (double)qdst->d2 ); #endif return; } static inline void mathQuadricAddQuadric( mathQuadric *qdst, mathQuadric *q ) { #if DEBUG_VERBOSE_QUADRIC printf( " QAdd Src %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)q->area, (double)q->a2, (double)q->ab, (double)q->ac, (double)q->ad, (double)q->b2, (double)q->bc, (double)q->bd, (double)q->c2, (double)q->cd, (double)q->d2 ); printf( " QAdd Dst %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)qdst->area, (double)qdst->a2, (double)qdst->ab, (double)qdst->ac, (double)qdst->ad, (double)qdst->b2, (double)qdst->bc, (double)qdst->bd, (double)qdst->c2, (double)qdst->cd, (double)qdst->d2 ); #endif qdst->area += q->area; qdst->a2 += q->a2; qdst->ab += q->ab; qdst->ac += q->ac; qdst->ad += q->ad; qdst->b2 += q->b2; qdst->bc += q->bc; qdst->bd += q->bd; qdst->c2 += q->c2; qdst->cd += q->cd; qdst->d2 += q->d2; #if DEBUG_VERBOSE_QUADRIC printf( " QSum %f ; %f %f %f %f %f %f %f %f %f %f\n", (double)qdst->area, (double)qdst->a2, (double)qdst->ab, (double)qdst->ac, (double)qdst->ad, (double)qdst->b2, (double)qdst->bc, (double)qdst->bd, (double)qdst->c2, (double)qdst->cd, (double)qdst->d2 ); #endif return; } /* A volatile variable is used to force the compiler to do the math strictly in the order specified. */ static mdf mathQuadricEvaluate( mathQuadric *q, mdf *v ) { volatile mdqfhigh d; mdqfhigh vh[3]; vh[0] = v[0]; vh[1] = v[1]; vh[2] = v[2]; d = ( vh[0] * vh[0] * (mdqfhigh)q->a2 ) + ( vh[1] * vh[1] * (mdqfhigh)q->b2 ) + ( vh[2] * vh[2] * (mdqfhigh)q->c2 ); d += 2.0 * ( ( vh[0] * vh[1] * (mdqfhigh)q->ab ) + ( vh[0] * vh[2] * (mdqfhigh)q->ac ) + ( vh[1] * vh[2] * (mdqfhigh)q->bc ) ); d += 2.0 * ( ( vh[0] * (mdqfhigh)q->ad ) + ( vh[1] * (mdqfhigh)q->bd ) + ( vh[2] * (mdqfhigh)q->cd ) ); d += q->d2; #if DEBUG_VERBOSE_QUADRIC printf( " Q Eval %f ; %f %f %f %f %f %f %f %f %f %f : %.12f\n", (double)q->area, (double)q->a2, (double)q->ab, (double)q->ac, (double)q->ad, (double)q->b2, (double)q->bc, (double)q->bd, (double)q->c2, (double)q->cd, (double)q->d2, (double)d ); #endif return (mdf)d; } static inline void mathQuadricMul( mathQuadric *qdst, mdf f ) { qdst->a2 *= f; qdst->ab *= f; qdst->ac *= f; qdst->ad *= f; qdst->b2 *= f; qdst->bc *= f; qdst->bd *= f; qdst->c2 *= f; qdst->cd *= f; qdst->d2 *= f; return; } ////// typedef struct { mtMutex mutex; mtSignal signal; int resetcount; volatile int index; volatile int count[2]; } mdBarrier; static void mdBarrierInit( mdBarrier *barrier, int count ) { mtMutexInit( &barrier->mutex ); mtSignalInit( &barrier->signal ); barrier->resetcount = count; barrier->index = 0; barrier->count[0] = count; barrier->count[1] = count; return; } static void mdBarrierDestroy( mdBarrier *barrier ) { mtMutexDestroy( &barrier->mutex ); mtSignalDestroy( &barrier->signal ); return; } static int mdBarrierSync( mdBarrier *barrier ) { int index, ret; mtMutexLock( &barrier->mutex ); index = barrier->index; ret = 0; if( !( --barrier->count[index] ) ) { ret = 1; mtSignalBroadcast( &barrier->signal ); index ^= 1; barrier->index = index; barrier->count[index] = barrier->resetcount; } else { for( ; barrier->count[index] ; ) mtSignalWait( &barrier->signal, &barrier->mutex ); } mtMutexUnlock( &barrier->mutex ); return ret; } static int mdBarrierSyncTimeout( mdBarrier *barrier, long milliseconds ) { int index, ret; mtMutexLock( &barrier->mutex ); index = barrier->index; ret = 0; if( !( --barrier->count[index] ) ) { ret = 1; mtSignalBroadcast( &barrier->signal ); index ^= 1; barrier->index = index; barrier->count[index] = barrier->resetcount; } else { mtSignalWaitTimeout( &barrier->signal, &barrier->mutex, milliseconds ); if( !( barrier->count[index] ) ) ret = 1; else barrier->count[index]++; } mtMutexUnlock( &barrier->mutex ); return ret; } ////// /* 16 bytes ; OK ~ Custom tridata packed after it, kept aligned on 8 bytes */ typedef struct { mdi v[3]; union { int edgeflags; mdi redirectindex; } u; } mdTriangle; typedef struct { mdi v[2]; mdi triindex; void *op; } mdEdge; /* 76 bytes, try packing to 64 bytes? */ typedef struct CPU_ALIGN16 { #if CPU_SSE_SUPPORT mdf CPU_ALIGN16 point[4]; #else mdf point[3]; #endif #if MD_CONFIG_ATOMIC_SUPPORT mmAtomic32 atomicowner; #else int owner; mtSpin ownerspinlock; #endif mdi trirefbase; mdi trirefcount; mdi redirectindex; mathQuadric quadric; } mdVertex; typedef struct { int threadcount; uint32_t operationflags; int updatestatusflag; /* User supplied raw data */ void *point; size_t pointstride; void *indices; size_t indicesstride; void *tridata; size_t tridatasize; void (*indicesUserToNative)( mdi *dst, void *src ); void (*indicesNativeToUser)( void *dst, mdi *src ); void (*vertexUserToNative)( mdf *dst, void *src ); void (*vertexNativeToUser)( void *dst, mdf *src ); double (*edgeweight)( void *tridata0, void *tridata1 ); void (*vertexmerge)( void *mergecontext, int dstindex, int srcindex, double dstfactor, double srcfactor ); void *mergecontext; void (*vertexcopy)( void *copycontext, int dstindex, int srcindex ); void *copycontext; void (*writenormal)( void *dst, mdf *src ); /* Per-vertex triangle references */ mdi *trireflist; mdi trirefcount; long trirefalloc; char paddingA[64]; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomic32 trireflock; #else mtSpin trirefspinlock; #endif char paddingB[64]; /* Synchronization stuff */ mdBarrier workbarrier; mdBarrier globalbarrier; int updatebuffercount; int updatebuffershift; /* List of triangles */ void *trilist; long tricount; long tripackcount; size_t trisize; /* List of vertices */ mdVertex *vertexlist; long vertexcount; long vertexalloc; long vertexpackcount; /* Hash table to locate edges from their vertex indices */ void *edgehashtable; /* Collapse penalty function */ mdf (*collapsepenalty)( mdf *newpoint, mdf *oldpoint, mdf *leftpoint, mdf *rightpoint, int *denyflag, mdf compactnesstarget ); /* To compute vertex normals */ void *normalbase; int normalformat; size_t normalstride; /* Decimation strength, max cost */ mdf maxcollapsecost; char paddingC[64]; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomic32 globalvertexlock; #else mtSpin globalvertexspinlock; #endif char paddingD[64]; /* Advanced configuration options */ mdf compactnesstarget; mdf compactnesspenalty; mdf boundaryweight; int syncstepcount; mdf normalsearchangle; /* Normal recomputation buffers */ int clonesearchindex; void *vertexnormal; void *trinormal; } mdMesh; //// static void mdIndicesCharToNative( mdi *dst, void *src ) { unsigned char *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesShortToNative( mdi *dst, void *src ) { unsigned short *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesIntToNative( mdi *dst, void *src ) { unsigned int *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesInt8ToNative( mdi *dst, void *src ) { uint8_t *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesInt16ToNative( mdi *dst, void *src ) { uint16_t *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesInt32ToNative( mdi *dst, void *src ) { uint32_t *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesInt64ToNative( mdi *dst, void *src ) { uint64_t *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdIndicesNativeToChar( void *dst, mdi *src ) { unsigned char *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdIndicesNativeToShort( void *dst, mdi *src ) { unsigned short *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdIndicesNativeToInt( void *dst, mdi *src ) { unsigned int *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdIndicesNativeToInt8( void *dst, mdi *src ) { uint8_t *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdIndicesNativeToInt16( void *dst, mdi *src ) { uint16_t *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdIndicesNativeToInt32( void *dst, mdi *src ) { uint32_t *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdIndicesNativeToInt64( void *dst, mdi *src ) { uint64_t *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdVertexFloatToNative( mdf *dst, void *src ) { float *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdVertexDoubleToNative( mdf *dst, void *src ) { double *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdVertexShortToNative( mdf *dst, void *src ) { short *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdVertexIntToNative( mdf *dst, void *src ) { int *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdVertexInt16ToNative( mdf *dst, void *src ) { int16_t *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdVertexInt32ToNative( mdf *dst, void *src ) { int32_t *s; s = src; dst[0] = s[0]; dst[1] = s[1]; dst[2] = s[2]; return; } static void mdVertexNativeToFloat( void *dst, mdf *src ) { float *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdVertexNativeToDouble( void *dst, mdf *src ) { double *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdVertexNativeToShort( void *dst, mdf *src ) { short *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdVertexNativeToInt( void *dst, mdf *src ) { int *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdVertexNativeToInt16( void *dst, mdf *src ) { int16_t *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdVertexNativeToInt32( void *dst, mdf *src ) { int32_t *d; d = dst; d[0] = src[0]; d[1] = src[1]; d[2] = src[2]; return; } static void mdNormalNativeToChar( void *dst, mdf *src ) { mdf v; char *d; d = dst; v = mdfmin( 1.0, mdfmax( -1.0, src[0] ) ); if( v > 0.0 ) d[0] = (char)( ( v * (mdf)((1< 0.0 ) d[1] = (char)( ( v * (mdf)((1< 0.0 ) d[2] = (char)( ( v * (mdf)((1< 0.0 ) d[0] = (short)( ( v * (mdf)(((1< 0.0 ) d[1] = (short)( ( v * (mdf)(((1< 0.0 ) d[2] = (short)( ( v * (mdf)(((1< 0.0 ) d[0] = (int8_t)( ( v * 127.0 ) + 0.5 ); else d[0] = (int8_t)( ( v * 128.0 ) - 0.5 ); v = mdfmin( 1.0, mdfmax( -1.0, src[1] ) ); if( v > 0.0 ) d[1] = (int8_t)( ( v * 127.0 ) + 0.5 ); else d[1] = (int8_t)( ( v * 128.0 ) - 0.5 ); v = mdfmin( 1.0, mdfmax( -1.0, src[2] ) ); if( v > 0.0 ) d[2] = (int8_t)( ( v * 127.0 ) + 0.5 ); else d[2] = (int8_t)( ( v * 128.0 ) - 0.5 ); return; } static void mdNormalNativeToInt16( void *dst, mdf *src ) { mdf v; int16_t *d; d = dst; v = mdfmin( 1.0, mdfmax( -1.0, src[0] ) ); if( v > 0.0 ) d[0] = (int8_t)( ( v * 32767.0 ) + 0.5 ); else d[0] = (int8_t)( ( v * 32768.0 ) - 0.5 ); v = mdfmin( 1.0, mdfmax( -1.0, src[1] ) ); if( v > 0.0 ) d[1] = (int8_t)( ( v * 32767.0 ) + 0.5 ); else d[1] = (int8_t)( ( v * 32768.0 ) - 0.5 ); v = mdfmin( 1.0, mdfmax( -1.0, src[2] ) ); if( v > 0.0 ) d[2] = (int8_t)( ( v * 32767.0 ) + 0.5 ); else d[2] = (int8_t)( ( v * 32768.0 ) - 0.5 ); return; } static void mdNormalNativeTo10_10_10_2( void *dst, mdf *src ) { int part; uint32_t sum; mdf v; sum = 0; v = mdfmin( 1.0, mdfmax( -1.0, src[0] ) ); if( v > 0.0 ) part = (int)( ( v * 511.0 ) + 0.5 ); else part = (int)( ( v * 512.0 ) - 0.5 ); sum |= ( (uint32_t)part & 1023 ) << 0; v = mdfmin( 1.0, mdfmax( -1.0, src[1] ) ); if( v > 0.0 ) part = (int)( ( v * 511.0 ) + 0.5 ); else part = (int)( ( v * 512.0 ) - 0.5 ); sum |= ( (uint32_t)part & 1023 ) << 10; v = mdfmin( 1.0, mdfmax( -1.0, src[2] ) ); if( v > 0.0 ) part = (int)( ( v * 511.0 ) + 0.5 ); else part = (int)( ( v * 512.0 ) - 0.5 ); sum |= ( (uint32_t)part & 1023 ) << 20; #if 0 v = mdfmin( 1.0, mdfmax( -1.0, src[3] ) ); if( v > 0.0 ) part = (int)( ( v * 1.0 ) + 0.5 ); else part = (int)( ( v * 2.0 ) - 0.5 ); sum |= ( (uint32_t)part & 3 ) << 30; #endif *(uint32_t *)dst = sum; return; } //// static void mdTriangleComputeQuadric( mdMesh *mesh, mdTriangle *tri, mathQuadric *q ) { mdf area, norm, vecta[3], vectb[3], plane[4]; mdVertex *vertex0, *vertex1, *vertex2; vertex0 = &mesh->vertexlist[ tri->v[0] ]; vertex1 = &mesh->vertexlist[ tri->v[1] ]; vertex2 = &mesh->vertexlist[ tri->v[2] ]; MD_VectorSubStore( vecta, vertex1->point, vertex0->point ); MD_VectorSubStore( vectb, vertex2->point, vertex0->point ); MD_VectorCrossProduct( plane, vectb, vecta ); norm = mdfsqrt( MD_VectorDotProduct( plane, plane ) ); if( norm ) { area = 0.5 * norm; plane[0] *= 0.5; plane[1] *= 0.5; plane[2] *= 0.5; plane[3] = -MD_VectorDotProduct( plane, vertex0->point ); } else { area = 0.0; plane[0] = 0.0; plane[1] = 0.0; plane[2] = 0.0; plane[3] = 0.0; } #if DEBUG_VERBOSE_QUADRIC printf( " Plane %f %f %f %f : Area %f\n", plane[0], plane[1], plane[2], plane[3], area ); #endif mathQuadricInit( q, plane[0], plane[1], plane[2], plane[3], area ); return; } static mdf mdEdgeSolvePoint( mdVertex *vertex0, mdVertex *vertex1, mdf *point ) { mdf cost, bestcost; mdf midpoint[3]; mdf *bestpoint; mathQuadric q; mathQuadricAddStoreQuadric( &q, &vertex0->quadric, &vertex1->quadric ); midpoint[0] = 0.5 * ( vertex0->point[0] + vertex1->point[0] ); midpoint[1] = 0.5 * ( vertex0->point[1] + vertex1->point[1] ); midpoint[2] = 0.5 * ( vertex0->point[2] + vertex1->point[2] ); bestcost = mathQuadricEvaluate( &q, midpoint ); #if DEBUG_VERBOSE_QUADRIC printf( " MidCost %f %f %f : %f (%g)\n", midpoint[0], midpoint[1], midpoint[2], bestcost, bestcost ); #endif if( mathQuadricSolve( &q, point ) ) { /* In _theory_, solving the quadric should always provide the optimal cost solution */ /* In practice, rare floating point cases can screw things up, so we compare against the midpoint for safety */ cost = mathQuadricEvaluate( &q, point ); #if DEBUG_VERBOSE_QUADRIC >= 2 printf( " Quadric Eval Check ; Quadric %.12f ; Mid %.12f ; Ratio %f\n", cost, bestcost, cost / bestcost ); #endif if( cost < bestcost ) bestcost = cost; else MD_VectorCopy( point, midpoint ); } else { bestpoint = midpoint; cost = mathQuadricEvaluate( &q, vertex0->point ); #if DEBUG_VERBOSE_QUADRIC printf( " Vx0Cost %f %f %f : %f (%g)\n", vertex0->point[0], vertex0->point[1], vertex0->point[2], cost, cost ); #endif if( cost < bestcost ) { bestcost = cost; bestpoint = vertex0->point; } cost = mathQuadricEvaluate( &q, vertex1->point ); #if DEBUG_VERBOSE_QUADRIC printf( " Vx1Cost %f %f %f : %f (%g)\n", vertex1->point[0], vertex1->point[1], vertex1->point[2], cost, cost ); #endif if( cost < bestcost ) { bestcost = cost; bestpoint = vertex1->point; } #if DEBUG_VERBOSE_QUADRIC printf( " Vector : %f %f %f\n", (double)point[0], (double)point[1], (double)point[2] ); #endif MD_VectorCopy( point, bestpoint ); } return bestcost; } //// static void mdMeshAccumulateBoundary( mdVertex *vertex0, mdVertex *vertex1, mdVertex *vertex2, mdf boundaryweight ) { mdf normal[3], sideplane[4], vecta[3], vectb[3], norm, norminv, area; mathQuadric q; MD_VectorSubStore( vecta, vertex1->point, vertex0->point ); MD_VectorSubStore( vectb, vertex2->point, vertex0->point ); MD_VectorCrossProduct( normal, vectb, vecta ); norm = mdfsqrt( normal[0]*normal[0] + normal[1]*normal[1] + normal[2]*normal[2] ); area = 0.5 * norm; norminv = 1.0 / norm; MD_VectorMulScalar( normal, norminv ); #if DEBUG_VERBOSE_BOUNDARY printf( " Normal %f %f %f\n", normal[0], normal[1], normal[2] ); #endif MD_VectorCrossProduct( sideplane, vecta, normal ); MD_VectorNormalize( sideplane ); MD_VectorMulScalar( sideplane, area ); sideplane[3] = -MD_VectorDotProduct( sideplane, vertex0->point ); #if DEBUG_VERBOSE_BOUNDARY printf( " Boundary plane %f %f %f %f : Area %f\n", sideplane[0], sideplane[1], sideplane[2], sideplane[3], area ); #endif mathQuadricInit( &q, sideplane[0], sideplane[1], sideplane[2], sideplane[3], area ); mathQuadricMul( &q, boundaryweight ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &vertex0->atomicowner, -1, 0xffff ); mathQuadricAddQuadric( &vertex0->quadric, &q ); mmAtomicWrite32( &vertex0->atomicowner, -1 ); #else mtSpinLock( &vertex0->ownerspinlock ); mathQuadricAddQuadric( &vertex0->quadric, &q ); mtSpinUnlock( &vertex0->ownerspinlock ); #endif #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &vertex1->atomicowner, -1, 0xffff ); mathQuadricAddQuadric( &vertex1->quadric, &q ); mmAtomicWrite32( &vertex1->atomicowner, -1 ); #else mtSpinLock( &vertex1->ownerspinlock ); mathQuadricAddQuadric( &vertex1->quadric, &q ); mtSpinUnlock( &vertex1->ownerspinlock ); #endif return; } //// static void mdEdgeHashClearEntry( void *entry ) { mdEdge *edge; edge = entry; edge->v[0] = -1; return; } static int mdEdgeHashEntryValid( void *entry ) { mdEdge *edge; edge = entry; return ( edge->v[0] >= 0 ? 1 : 0 ); } static uint32_t mdEdgeHashEntryKey( void *entry ) { uint32_t hashkey; mdEdge *edge; edge = entry; #if MD_CONFIG_FAST_HASH hashkey = edge->v[0]; hashkey += hashkey << 10; hashkey ^= hashkey >> 6; hashkey += edge->v[1]; hashkey += hashkey << 10; hashkey ^= hashkey >> 6; hashkey += hashkey << 3; hashkey ^= hashkey >> 11; hashkey += hashkey << 15; #else #if MD_SIZEOF_MDI == 4 #if CPUCONF_WORD_SIZE == 64 { uint64_t *v = (uint64_t *)edge->v; hashkey = ccHash32Int64Inline( *v ); } #else hashkey = ccHash32Data4Inline( (void *)edge->v ); #endif #elif MD_SIZEOF_MDI == 8 #if CPUCONF_WORD_SIZE == 64 hashkey = ccHash32Array64( (uint64_t *)edge->v, 2 ); #else hashkey = ccHash32Array32( (uint32_t *)edge->v, 4 ); #endif #else hashkey = ccHash32Data( edge->v, 2*sizeof(mdi) ); #endif #endif return hashkey; } static int mdEdgeHashEntryCmp( void *entry, void *entryref ) { mdEdge *edge, *edgeref; edge = entry; edgeref = entryref; if( edge->v[0] == -1 ) return MM_HASH_ENTRYCMP_INVALID; if( ( edge->v[0] == edgeref->v[0] ) && ( edge->v[1] == edgeref->v[1] ) ) return MM_HASH_ENTRYCMP_FOUND; return MM_HASH_ENTRYCMP_SKIP; } static mmHashAccess mdEdgeHashEdge = { .clearentry = mdEdgeHashClearEntry, .entryvalid = mdEdgeHashEntryValid, .entrykey = mdEdgeHashEntryKey, .entrycmp = mdEdgeHashEntryCmp }; static int mdMeshHashInit( mdMesh *mesh, size_t trianglecount, mdf hashextrabits, uint32_t lockpageshift, size_t maxmemorysize ) { size_t edgecount, hashmemsize, meshmemsize, totalmemorysize; uint32_t hashbits, hashbitsmin, hashbitsmax; edgecount = trianglecount * 3; /* Hard minimum count of hash table bits */ hashbitsmin = ccLog2Int64( edgecount ) + 1; hashbitsmax = hashbitsmin + 4; hashbits = (uint32_t)mdfround( mdflog2( (mdf)edgecount ) + hashextrabits ); if( hashbits < hashbitsmin ) hashbits = hashbitsmin; else if( hashbits > hashbitsmax ) hashbits = hashbitsmax; if( hashbits < 12 ) hashbits = 12; /* lockpageshift = 7; works great, 128 hash entries per lock page */ if( lockpageshift < 3 ) lockpageshift = 3; else if( lockpageshift > 16 ) lockpageshift = 16; meshmemsize = ( mesh->tricount * mesh->trisize ) + ( mesh->vertexcount * sizeof(mdVertex) ); for( ; ; hashbits-- ) { if( hashbits < hashbitsmin ) return 0; hashmemsize = mmHashRequiredSize( sizeof(mdEdge), hashbits, lockpageshift ); totalmemorysize = hashmemsize + meshmemsize; /* Increase estimate of memory consumption by 25% to account for extra stuff not counted here */ totalmemorysize += totalmemorysize >> 2; #if DEBUG_VERBOSE_MEMORY printf( " Hash bits : %d (%d)\n", hashbits, hashbitsmin ); printf( " Estimated Memory Requirements : %lld bytes (%lld MB)\n", (long long)totalmemorysize, (long long)totalmemorysize >> 20 ); printf( " Memory Hard Limit : %lld bytes (%lld MB)\n", (long long)maxmemorysize, (long long)maxmemorysize >> 20 ); #endif if( ( maxmemorysize ) && ( totalmemorysize > maxmemorysize ) ) continue; mesh->edgehashtable = malloc( hashmemsize ); if( mesh->edgehashtable ) break; } mmHashInit( mesh->edgehashtable, &mdEdgeHashEdge, sizeof(mdEdge), hashbits, lockpageshift, MM_HASH_FLAGS_NO_COUNT ); return 1; } static void mdMeshHashEnd( mdMesh *mesh ) { free( mesh->edgehashtable ); return; } //// typedef struct CPU_ALIGN64 { void **opbuffer; int opcount; int opalloc; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomic32 atomlock; #else mtSpin spinlock; #endif } mdUpdateBuffer; typedef struct { #if MD_CONFIG_ATOMIC_SUPPORT mmAtomic32 flags; #else int flags; mtSpin spinlock; #endif mdUpdateBuffer *updatebuffer; mdi v0, v1; #if CPU_SSE_SUPPORT mdf CPU_ALIGN16 collapsepoint[4]; #else mdf collapsepoint[3]; #endif mdf collapsecost; mdf value; mdf penalty; mmListNode list; } mdOp; #define MD_OP_FLAGS_DETACHED (0x1) #define MD_OP_FLAGS_DELETION_PENDING (0x2) #define MD_OP_FLAGS_UPDATE_QUEUED (0x4) #define MD_OP_FLAGS_UPDATE_NEEDED (0x8) #define MD_OP_FLAGS_DELETED (0x10) /* If threadcount exceeds this number, updatebuffers will be shared by nearby cores */ #define MD_THREAD_UPDATE_BUFFER_COUNTMAX (8) typedef struct CPU_ALIGN64 { int threadid; /* Memory block for ops */ mmBlockHead opblock; /* Hierarchical bucket sort of ops */ void *binsort; /* List of ops flagged by other threads in need of update */ mdUpdateBuffer updatebuffer[MD_THREAD_UPDATE_BUFFER_COUNTMAX]; /* Per-thread status trackers */ volatile long statusbuildtricount; volatile long statusbuildrefcount; volatile long statuspopulatecount; volatile long statusdeletioncount; volatile long statuscollisioncount; } mdThreadData; static void mdUpdateBufferInit( mdUpdateBuffer *updatebuffer, int opalloc ) { updatebuffer->opbuffer = malloc( opalloc * sizeof(mdOp *) ); updatebuffer->opcount = 0; updatebuffer->opalloc = opalloc; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &updatebuffer->atomlock, 0x0 ); #else mtSpinInit( &updatebuffer->spinlock ); #endif return; } static void mdUpdateBufferEnd( mdUpdateBuffer *updatebuffer ) { #ifndef MD_CONFIG_ATOMIC_SUPPORT mtSpinDestroy( &updatebuffer->spinlock ); #endif free( updatebuffer->opbuffer ); return; } static void mdUpdateBufferAdd( mdUpdateBuffer *updatebuffer, mdOp *op, int orflags ) { int32_t flags; #if MD_CONFIG_ATOMIC_SUPPORT for( ; ; ) { flags = mmAtomicRead32( &op->flags ); if( flags & MD_OP_FLAGS_UPDATE_QUEUED ) { if( !( flags & MD_OP_FLAGS_UPDATE_NEEDED ) || ( orflags ) ) mmAtomicOr32( &op->flags, orflags | MD_OP_FLAGS_UPDATE_NEEDED ); return; } if( mmAtomicCmpReplace32( &op->flags, flags, flags | orflags | MD_OP_FLAGS_UPDATE_QUEUED | MD_OP_FLAGS_UPDATE_NEEDED ) ) break; } /* TODO: Avoid spin lock, use atomic increment for write offset? Careful with this realloc, pointer could become invalid */ mmAtomicSpin32( &updatebuffer->atomlock, 0x0, 0x1 ); if( updatebuffer->opcount >= updatebuffer->opalloc ) { updatebuffer->opalloc <<= 1; updatebuffer->opbuffer = realloc( updatebuffer->opbuffer, updatebuffer->opalloc * sizeof(mdOp *) ); } updatebuffer->opbuffer[ updatebuffer->opcount++ ] = op; mmAtomicWrite32( &updatebuffer->atomlock, 0x0 ); #else mtSpinLock( &op->spinlock ); op->flags |= orflags; flags = op->flags; if( flags & MD_OP_FLAGS_UPDATE_QUEUED ) { if( !( flags & MD_OP_FLAGS_UPDATE_NEEDED ) ) op->flags |= MD_OP_FLAGS_UPDATE_NEEDED; mtSpinUnlock( &op->spinlock ); return; } op->flags |= MD_OP_FLAGS_UPDATE_QUEUED | MD_OP_FLAGS_UPDATE_NEEDED; mtSpinUnlock( &op->spinlock ); mtSpinLock( &updatebuffer->spinlock ); if( updatebuffer->opcount >= updatebuffer->opalloc ) { updatebuffer->opalloc <<= 1; updatebuffer->opbuffer = realloc( updatebuffer->opbuffer, updatebuffer->opalloc * sizeof(mdOp *) ); } updatebuffer->opbuffer[ updatebuffer->opcount++ ] = op; mtSpinUnlock( &updatebuffer->spinlock ); #endif return; } //// #define MD_COMPACTNESS_NORMALIZATION_FACTOR (0.5*4.0*1.732050808) static mdf mdEdgeCollapsePenaltyTriangle( mdf *newpoint, mdf *oldpoint, mdf *leftpoint, mdf *rightpoint, int *denyflag, mdf compactnesstarget ) { mdf penalty, compactness, oldcompactness, newcompactness, vecta2, norm; mdf vecta[3], oldvectb[3], oldvectc[3], newvectb[3], newvectc[3], oldnormal[3], newnormal[3]; /* Normal of old triangle */ MD_VectorSubStore( vecta, rightpoint, leftpoint ); MD_VectorSubStore( oldvectb, oldpoint, leftpoint ); MD_VectorCrossProduct( oldnormal, vecta, oldvectb ); /* Normal of new triangle */ MD_VectorSubStore( newvectb, newpoint, leftpoint ); MD_VectorCrossProduct( newnormal, vecta, newvectb ); /* Detect normal inversion */ if( MD_VectorDotProduct( oldnormal, newnormal ) < 0.0 ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vecta2 = MD_VectorDotProduct( vecta, vecta ); MD_VectorSubStore( newvectc, newpoint, rightpoint ); newcompactness = MD_COMPACTNESS_NORMALIZATION_FACTOR * mdfsqrt( MD_VectorDotProduct( newnormal, newnormal ) ); norm = vecta2 + MD_VectorDotProduct( newvectb, newvectb ) + MD_VectorDotProduct( newvectc, newvectc ); if( newcompactness < ( compactnesstarget * norm ) ) { newcompactness /= norm; MD_VectorSubStore( oldvectc, oldpoint, rightpoint ); oldcompactness = ( MD_COMPACTNESS_NORMALIZATION_FACTOR * mdfsqrt( MD_VectorDotProduct( oldnormal, oldnormal ) ) ) / ( vecta2 + MD_VectorDotProduct( oldvectb, oldvectb ) + MD_VectorDotProduct( oldvectc, oldvectc ) ); compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #if CPU_SSE4_1_SUPPORT #if !MD_CONF_DOUBLE_PRECISION static float mdEdgeCollapsePenaltyTriangleSSE4p1f( float *newpoint, float *oldpoint, float *leftpoint, float *rightpoint, int *denyflag, float compactnesstarget ) { float penalty, compactness, oldcompactness, newcompactness; static float zero = 0.0; __m128 left, vecta, oldvectb, oldvectc, newvectb, newvectc, oldnormal, newnormal; /* Normal of old triangle */ left = _mm_load_ps( leftpoint ); vecta = _mm_sub_ps( _mm_load_ps( rightpoint ), left ); oldvectb = _mm_sub_ps( _mm_load_ps( oldpoint ), left ); oldnormal = _mm_sub_ps( _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,0,2,1) ), _mm_shuffle_ps( oldvectb, oldvectb, _MM_SHUFFLE(3,1,0,2) ) ), _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,1,0,2) ), _mm_shuffle_ps( oldvectb, oldvectb, _MM_SHUFFLE(3,0,2,1) ) ) ); /* Normal of new triangle */ newvectb = _mm_sub_ps( _mm_load_ps( newpoint ), left ); newnormal = _mm_sub_ps( _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,0,2,1) ), _mm_shuffle_ps( newvectb, newvectb, _MM_SHUFFLE(3,1,0,2) ) ), _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,1,0,2) ), _mm_shuffle_ps( newvectb, newvectb, _MM_SHUFFLE(3,0,2,1) ) ) ); /* Detect normal inversion */ if( _mm_comilt_ss( _mm_dp_ps( oldnormal, newnormal, 0x1 | 0x70 ), _mm_load_ss( &zero ) ) ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vecta = _mm_dp_ps( vecta, vecta, 0x1 | 0x70 ); newvectc = _mm_sub_ps( _mm_load_ps( newpoint ), _mm_load_ps( rightpoint ) ); newnormal = _mm_dp_ps( newnormal, newnormal, 0x1 | 0x70 ); newvectb = _mm_dp_ps( newvectb, newvectb, 0x1 | 0x70 ); newvectc = _mm_dp_ps( newvectc, newvectc, 0x1 | 0x70 ); #ifdef MD_CONFIG_SSE_APPROX newcompactness = _mm_cvtss_f32( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_rcp_ss( _mm_mul_ss( _mm_rsqrt_ss( newnormal ), _mm_add_ss( _mm_add_ss( vecta, newvectb ), newvectc ) ) ) ) ); #else newcompactness = _mm_cvtss_f32( _mm_div_ss( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_ss( newnormal ) ), _mm_add_ss( _mm_add_ss( vecta, newvectb ), newvectc ) ) ); #endif if( newcompactness < compactnesstarget ) { oldvectc = _mm_sub_ps( _mm_load_ps( oldpoint ), _mm_load_ps( rightpoint ) ); oldnormal = _mm_dp_ps( oldnormal, oldnormal, 0x1 | 0x70 ); oldvectb = _mm_dp_ps( oldvectb, oldvectb, 0x1 | 0x70 ); oldvectc = _mm_dp_ps( oldvectc, oldvectc, 0x1 | 0x70 ); #ifdef MD_CONFIG_SSE_APPROX oldcompactness = _mm_cvtss_f32( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_rcp_ss( _mm_mul_ss( _mm_rsqrt_ss( oldnormal ), _mm_add_ss( _mm_add_ss( vecta, oldvectb ), oldvectc ) ) ) ) ); #else oldcompactness = _mm_cvtss_f32( _mm_div_ss( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_ss( oldnormal ) ), _mm_add_ss( _mm_add_ss( vecta, oldvectb ), oldvectc ) ) ); #endif compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #else static double mdEdgeCollapsePenaltyTriangleSSE4p1d( double *newpoint, double *oldpoint, double *leftpoint, double *rightpoint, int *denyflag, double compactnesstarget ) { __m128d vecta0, vecta1, oldvectb0, oldvectb1, oldvectc0, oldvectc1, newvectb0, newvectb1, newvectc0, newvectc1; __m128d oldnormal0, oldnormal1, newnormal0, newnormal1; __m128d dotprsum; __m128d left0, left1; double newcompactness, oldcompactness, compactness, penalty, norm; static double zero = 0.0; /* Normal of old triangle */ left0 = _mm_load_pd( leftpoint+0 ); left1 = _mm_load_sd( leftpoint+2 ); vecta0 = _mm_sub_pd( _mm_load_pd( rightpoint+0 ), left0 ); vecta1 = _mm_sub_pd( _mm_load_pd( rightpoint+2 ), left1 ); oldvectb0 = _mm_sub_pd( _mm_load_pd( oldpoint+0 ), left0 ); oldvectb1 = _mm_sub_pd( _mm_load_pd( oldpoint+2 ), left1 ); oldnormal0 = _mm_sub_pd( _mm_mul_pd( _mm_shuffle_pd( vecta0, vecta1, _MM_SHUFFLE2(0,1) ), _mm_unpacklo_pd( oldvectb1, oldvectb0 ) ), _mm_mul_pd( _mm_unpacklo_pd( vecta1, vecta0 ), _mm_shuffle_pd( oldvectb0, oldvectb1, _MM_SHUFFLE2(0,1) ) ) ); oldnormal1 = _mm_sub_sd( _mm_mul_sd( vecta0, _mm_unpackhi_pd( oldvectb0, oldvectb0 ) ), _mm_mul_sd( _mm_unpackhi_pd( vecta0, vecta0 ), oldvectb0 ) ); /* Normal of new triangle */ newvectb0 = _mm_sub_pd( _mm_load_pd( newpoint+0 ), left0 ); newvectb1 = _mm_sub_pd( _mm_load_pd( newpoint+2 ), left1 ); newnormal0 = _mm_sub_pd( _mm_mul_pd( _mm_shuffle_pd( vecta0, vecta1, _MM_SHUFFLE2(0,1) ), _mm_unpacklo_pd( newvectb1, newvectb0 ) ), _mm_mul_pd( _mm_unpacklo_pd( vecta1, vecta0 ), _mm_shuffle_pd( newvectb0, newvectb1, _MM_SHUFFLE2(0,1) ) ) ); newnormal1 = _mm_sub_sd( _mm_mul_sd( vecta0, _mm_unpackhi_pd( newvectb0, newvectb0 ) ), _mm_mul_sd( _mm_unpackhi_pd( vecta0, vecta0 ), newvectb0 ) ); /* Detect normal inversion */ dotprsum = _mm_add_sd( _mm_dp_pd( oldnormal0, oldnormal0, 0x1 | 0x30 ), _mm_mul_sd( oldnormal1, newnormal1 ) ); if( _mm_comilt_sd( dotprsum, _mm_load_sd( &zero ) ) ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vecta0 = _mm_add_sd( _mm_dp_pd( vecta0, vecta0, 0x1 | 0x30 ), _mm_mul_sd( vecta1, vecta1 ) ); newvectc0 = _mm_sub_pd( _mm_load_pd( newpoint+0 ), _mm_load_pd( rightpoint+0 ) ); newvectc1 = _mm_sub_sd( _mm_load_sd( newpoint+2 ), _mm_load_sd( rightpoint+2 ) ); newnormal0 = _mm_add_sd( _mm_dp_pd( newnormal0, newnormal0, 0x1 | 0x30 ), _mm_mul_sd( newnormal1, newnormal1 ) ); newvectb0 = _mm_add_sd( _mm_dp_pd( newvectb0, newvectb0, 0x1 | 0x30 ), _mm_mul_sd( newvectb1, newvectb1 ) ); newvectc0 = _mm_add_sd( _mm_dp_pd( newvectc0, newvectc0, 0x1 | 0x30 ), _mm_mul_sd( newvectc1, newvectc1 ) ); norm = _mm_cvtsd_f64( _mm_add_sd( vecta0, _mm_add_sd( newvectb0, newvectc0 ) ) ); newcompactness = _mm_cvtsd_f64( _mm_mul_sd( _mm_set_sd( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_sd( newnormal0, newnormal0 ) ) ); if( newcompactness < ( compactnesstarget * norm ) ) { newcompactness /= norm; oldvectc0 = _mm_sub_pd( _mm_load_pd( oldpoint+0 ), _mm_load_pd( rightpoint+0 ) ); oldvectc1 = _mm_sub_sd( _mm_load_sd( oldpoint+2 ), _mm_load_sd( rightpoint+2 ) ); oldnormal0 = _mm_add_sd( _mm_dp_pd( oldnormal0, oldnormal0, 0x1 | 0x30 ), _mm_mul_sd( oldnormal1, oldnormal1 ) ); oldvectb0 = _mm_add_sd( _mm_dp_pd( oldvectb0, oldvectb0, 0x1 | 0x30 ), _mm_mul_sd( oldvectb1, oldvectb1 ) ); oldvectc0 = _mm_add_sd( _mm_dp_pd( oldvectc0, oldvectc0, 0x1 | 0x30 ), _mm_mul_sd( oldvectc1, oldvectc1 ) ); oldcompactness = _mm_cvtsd_f64( _mm_div_sd( _mm_mul_sd( _mm_set_sd( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_sd( oldnormal0, oldnormal0 ) ), _mm_add_sd( vecta0, _mm_add_sd( oldvectb0, oldvectc0 ) ) ) ); compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #endif #elif CPU_SSE3_SUPPORT #if !MD_CONF_DOUBLE_PRECISION static float mdEdgeCollapsePenaltyTriangleSSE3f( float *newpoint, float *oldpoint, float *leftpoint, float *rightpoint, int *denyflag, float compactnesstarget ) { float penalty, compactness, oldcompactness, newcompactness; static float zero = 0.0; __m128 left, vecta, oldvectb, oldvectc, newvectb, newvectc, oldnormal, newnormal, dotproduct; /* Normal of old triangle */ left = _mm_load_ps( leftpoint ); vecta = _mm_sub_ps( _mm_load_ps( rightpoint ), left ); oldvectb = _mm_sub_ps( _mm_load_ps( oldpoint ), left ); oldnormal = _mm_sub_ps( _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,0,2,1) ), _mm_shuffle_ps( oldvectb, oldvectb, _MM_SHUFFLE(3,1,0,2) ) ), _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,1,0,2) ), _mm_shuffle_ps( oldvectb, oldvectb, _MM_SHUFFLE(3,0,2,1) ) ) ); /* Normal of new triangle */ newvectb = _mm_sub_ps( _mm_load_ps( newpoint ), left ); newnormal = _mm_sub_ps( _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,0,2,1) ), _mm_shuffle_ps( newvectb, newvectb, _MM_SHUFFLE(3,1,0,2) ) ), _mm_mul_ps( _mm_shuffle_ps( vecta, vecta, _MM_SHUFFLE(3,1,0,2) ), _mm_shuffle_ps( newvectb, newvectb, _MM_SHUFFLE(3,0,2,1) ) ) ); /* Detect normal inversion */ dotproduct = _mm_mul_ps( oldnormal, newnormal ); dotproduct = _mm_hadd_ps( dotproduct, dotproduct ); dotproduct = _mm_hadd_ps( dotproduct, dotproduct ); if( _mm_comilt_ss( dotproduct, _mm_load_ss( &zero ) ) ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vecta = _mm_mul_ps( vecta, vecta ); vecta = _mm_hadd_ps( vecta, vecta ); vecta = _mm_hadd_ps( vecta, vecta ); newvectc = _mm_sub_ps( _mm_load_ps( newpoint ), _mm_load_ps( rightpoint ) ); newnormal = _mm_mul_ps( newnormal, newnormal ); newnormal = _mm_hadd_ps( newnormal, newnormal ); newnormal = _mm_hadd_ps( newnormal, newnormal ); newvectb = _mm_mul_ps( newvectb, newvectb ); newvectb = _mm_hadd_ps( newvectb, newvectb ); newvectb = _mm_hadd_ps( newvectb, newvectb ); newvectc = _mm_mul_ps( newvectc, newvectc ); newvectc = _mm_hadd_ps( newvectc, newvectc ); newvectc = _mm_hadd_ps( newvectc, newvectc ); #ifdef MD_CONFIG_SSE_APPROX newcompactness = _mm_cvtss_f32( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_rcp_ss( _mm_mul_ss( _mm_rsqrt_ss( newnormal ), _mm_add_ss( _mm_add_ss( vecta, newvectb ), newvectc ) ) ) ) ); #else newcompactness = _mm_cvtss_f32( _mm_div_ss( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_ss( newnormal ) ), _mm_add_ss( _mm_add_ss( vecta, newvectb ), newvectc ) ) ); #endif if( newcompactness < compactnesstarget ) { oldvectc = _mm_sub_ps( _mm_load_ps( oldpoint ), _mm_load_ps( rightpoint ) ); oldnormal = _mm_mul_ps( oldnormal, oldnormal ); oldnormal = _mm_hadd_ps( oldnormal, oldnormal ); oldnormal = _mm_hadd_ps( oldnormal, oldnormal ); oldvectb = _mm_mul_ps( oldvectb, oldvectb ); oldvectb = _mm_hadd_ps( oldvectb, oldvectb ); oldvectb = _mm_hadd_ps( oldvectb, oldvectb ); oldvectc = _mm_mul_ps( oldvectc, oldvectc ); oldvectc = _mm_hadd_ps( oldvectc, oldvectc ); oldvectc = _mm_hadd_ps( oldvectc, oldvectc ); #ifdef MD_CONFIG_SSE_APPROX oldcompactness = _mm_cvtss_f32( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_rcp_ss( _mm_mul_ss( _mm_rsqrt_ss( oldnormal ), _mm_add_ss( _mm_add_ss( vecta, oldvectb ), oldvectc ) ) ) ) ); #else oldcompactness = _mm_cvtss_f32( _mm_div_ss( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_ss( oldnormal ) ), _mm_add_ss( _mm_add_ss( vecta, oldvectb ), oldvectc ) ) ); #endif compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #else static double mdEdgeCollapsePenaltyTriangleSSE3d( double *newpoint, double *oldpoint, double *leftpoint, double *rightpoint, int *denyflag, double compactnesstarget ) { __m128d vecta0, vecta1, oldvectb0, oldvectb1, oldvectc0, oldvectc1, newvectb0, newvectb1, newvectc0, newvectc1; __m128d oldnormal0, oldnormal1, newnormal0, newnormal1; __m128d dotprsum; __m128d left0, left1; double newcompactness, oldcompactness, compactness, penalty, norm; static double zero = 0.0; /* Normal of old triangle */ left0 = _mm_load_pd( leftpoint+0 ); left1 = _mm_load_sd( leftpoint+2 ); vecta0 = _mm_sub_pd( _mm_load_pd( rightpoint+0 ), left0 ); vecta1 = _mm_sub_pd( _mm_load_pd( rightpoint+2 ), left1 ); oldvectb0 = _mm_sub_pd( _mm_load_pd( oldpoint+0 ), left0 ); oldvectb1 = _mm_sub_pd( _mm_load_pd( oldpoint+2 ), left1 ); oldnormal0 = _mm_sub_pd( _mm_mul_pd( _mm_shuffle_pd( vecta0, vecta1, _MM_SHUFFLE2(0,1) ), _mm_unpacklo_pd( oldvectb1, oldvectb0 ) ), _mm_mul_pd( _mm_unpacklo_pd( vecta1, vecta0 ), _mm_shuffle_pd( oldvectb0, oldvectb1, _MM_SHUFFLE2(0,1) ) ) ); oldnormal1 = _mm_sub_sd( _mm_mul_sd( vecta0, _mm_unpackhi_pd( oldvectb0, oldvectb0 ) ), _mm_mul_sd( _mm_unpackhi_pd( vecta0, vecta0 ), oldvectb0 ) ); /* Normal of new triangle */ newvectb0 = _mm_sub_pd( _mm_load_pd( newpoint+0 ), left0 ); newvectb1 = _mm_sub_pd( _mm_load_pd( newpoint+2 ), left1 ); newnormal0 = _mm_sub_pd( _mm_mul_pd( _mm_shuffle_pd( vecta0, vecta1, _MM_SHUFFLE2(0,1) ), _mm_unpacklo_pd( newvectb1, newvectb0 ) ), _mm_mul_pd( _mm_unpacklo_pd( vecta1, vecta0 ), _mm_shuffle_pd( newvectb0, newvectb1, _MM_SHUFFLE2(0,1) ) ) ); newnormal1 = _mm_sub_sd( _mm_mul_sd( vecta0, _mm_unpackhi_pd( newvectb0, newvectb0 ) ), _mm_mul_sd( _mm_unpackhi_pd( vecta0, vecta0 ), newvectb0 ) ); /* Detect normal inversion */ dotprsum = _mm_mul_pd( oldnormal0, newnormal0 ); dotprsum = _mm_add_sd( _mm_hadd_pd( dotprsum, dotprsum ), _mm_mul_sd( oldnormal1, newnormal1 ) ); if( _mm_comilt_sd( dotprsum, _mm_load_sd( &zero ) ) ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vecta0 = _mm_mul_pd( vecta0, vecta0 ); vecta0 = _mm_add_sd( _mm_hadd_pd( vecta0, vecta0 ), _mm_mul_sd( vecta1, vecta1 ) ); newvectc0 = _mm_sub_pd( _mm_load_pd( newpoint+0 ), _mm_load_pd( rightpoint+0 ) ); newvectc1 = _mm_sub_sd( _mm_load_sd( newpoint+2 ), _mm_load_sd( rightpoint+2 ) ); newnormal0 = _mm_mul_pd( newnormal0, newnormal0 ); newnormal0 = _mm_add_sd( _mm_hadd_pd( newnormal0, newnormal0 ), _mm_mul_sd( newnormal1, newnormal1 ) ); newvectb0 = _mm_mul_pd( newvectb0, newvectb0 ); newvectb0 = _mm_add_sd( _mm_hadd_pd( newvectb0, newvectb0 ), _mm_mul_sd( newvectb1, newvectb1 ) ); newvectc0 = _mm_mul_pd( newvectc0, newvectc0 ); newvectc0 = _mm_add_sd( _mm_hadd_pd( newvectc0, newvectc0 ), _mm_mul_sd( newvectc1, newvectc1 ) ); norm = _mm_cvtsd_f64( _mm_add_sd( vecta0, _mm_add_sd( newvectb0, newvectc0 ) ) ); newcompactness = _mm_cvtsd_f64( _mm_mul_sd( _mm_set_sd( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_sd( newnormal0, newnormal0 ) ) ); if( newcompactness < ( compactnesstarget * norm ) ) { newcompactness /= norm; oldvectc0 = _mm_sub_pd( _mm_load_pd( oldpoint+0 ), _mm_load_pd( rightpoint+0 ) ); oldvectc1 = _mm_sub_sd( _mm_load_sd( oldpoint+2 ), _mm_load_sd( rightpoint+2 ) ); oldnormal0 = _mm_mul_pd( oldnormal0, oldnormal0 ); oldnormal0 = _mm_add_sd( _mm_hadd_pd( oldnormal0, oldnormal0 ), _mm_mul_sd( oldnormal1, oldnormal1 ) ); oldvectb0 = _mm_mul_pd( oldvectb0, oldvectb0 ); oldvectb0 = _mm_add_sd( _mm_hadd_pd( oldvectb0, oldvectb0 ), _mm_mul_sd( oldvectb1, oldvectb1 ) ); oldvectc0 = _mm_mul_pd( oldvectc0, oldvectc0 ); oldvectc0 = _mm_add_sd( _mm_hadd_pd( oldvectc0, oldvectc0 ), _mm_mul_sd( oldvectc1, oldvectc1 ) ); oldcompactness = _mm_cvtsd_f64( _mm_div_sd( _mm_mul_sd( _mm_set_sd( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_sd( oldnormal0, oldnormal0 ) ), _mm_add_sd( vecta0, _mm_add_sd( oldvectb0, oldvectc0 ) ) ) ); compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #endif #elif CPU_SSE2_SUPPORT #if !MD_CONF_DOUBLE_PRECISION static float mdEdgeCollapsePenaltyTriangleSSE2f( float *newpoint, float *oldpoint, float *leftpoint, float *rightpoint, int *denyflag, float compactnesstarget ) { float penalty, compactness, oldcompactness, newcompactness, vectadp, norm; __m128 left; m128f vecta, oldvectb, oldvectc, newvectb, newvectc, oldnormal, newnormal; /* Normal of old triangle */ left = _mm_load_ps( leftpoint ); vecta.v = _mm_sub_ps( _mm_load_ps( rightpoint ), left ); oldvectb.v = _mm_sub_ps( _mm_load_ps( oldpoint ), left ); oldnormal.v = _mm_sub_ps( _mm_mul_ps( _mm_shuffle_ps( vecta.v, vecta.v, _MM_SHUFFLE(3,0,2,1) ), _mm_shuffle_ps( oldvectb.v, oldvectb.v, _MM_SHUFFLE(3,1,0,2) ) ), _mm_mul_ps( _mm_shuffle_ps( vecta.v, vecta.v, _MM_SHUFFLE(3,1,0,2) ), _mm_shuffle_ps( oldvectb.v, oldvectb.v, _MM_SHUFFLE(3,0,2,1) ) ) ); /* Normal of new triangle */ newvectb.v = _mm_sub_ps( _mm_load_ps( newpoint ), left ); newnormal.v = _mm_sub_ps( _mm_mul_ps( _mm_shuffle_ps( vecta.v, vecta.v, _MM_SHUFFLE(3,0,2,1) ), _mm_shuffle_ps( newvectb.v, newvectb.v, _MM_SHUFFLE(3,1,0,2) ) ), _mm_mul_ps( _mm_shuffle_ps( vecta.v, vecta.v, _MM_SHUFFLE(3,1,0,2) ), _mm_shuffle_ps( newvectb.v, newvectb.v, _MM_SHUFFLE(3,0,2,1) ) ) ); /* Detect normal inversion */ if( MD_VectorDotProduct( oldnormal.f, newnormal.f ) < 0.0 ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vectadp = MD_VectorDotProduct( vecta.f, vecta.f ); newvectc.v = _mm_sub_ps( _mm_load_ps( newpoint ), _mm_load_ps( rightpoint ) ); #ifdef MD_CONFIG_SSE_APPROX norm = vectadp + MD_VectorDotProduct( newvectb.f, newvectb.f ) + MD_VectorDotProduct( newvectc.f, newvectc.f ); newcompactness = _mm_cvtss_f32( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_rcp_ss( _mm_mul_ss( _mm_rsqrt_ss( _mm_set_ss( MD_VectorDotProduct( newnormal.f, newnormal.f ) ) ), _mm_set_ss( norm ) ) ) ) ); if( newcompactness < compactnesstarget ) { #else newcompactness = MD_COMPACTNESS_NORMALIZATION_FACTOR * sqrtf( MD_VectorDotProduct( newnormal.f, newnormal.f ) ); norm = vectadp + MD_VectorDotProduct( newvectb.f, newvectb.f ) + MD_VectorDotProduct( newvectc.f, newvectc.f ); if( newcompactness < ( compactnesstarget * norm ) ) { newcompactness /= norm; #endif oldvectc.v = _mm_sub_ps( _mm_load_ps( oldpoint ), _mm_load_ps( rightpoint ) ); #ifdef MD_CONFIG_SSE_APPROX norm = vectadp + MD_VectorDotProduct( oldvectb.f, oldvectb.f ) + MD_VectorDotProduct( oldvectc.f, oldvectc.f ); oldcompactness = _mm_cvtss_f32( _mm_mul_ss( _mm_set_ss( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_rcp_ss( _mm_mul_ss( _mm_rsqrt_ss( _mm_set_ss( MD_VectorDotProduct( oldnormal.f, oldnormal.f ) ) ), _mm_set_ss( norm ) ) ) ) ); #else oldcompactness = ( MD_COMPACTNESS_NORMALIZATION_FACTOR * sqrtf( MD_VectorDotProduct( oldnormal.f, oldnormal.f ) ) ) / ( vectadp + MD_VectorDotProduct( oldvectb.f, oldvectb.f ) + MD_VectorDotProduct( oldvectc.f, oldvectc.f ) ); #endif compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #else static double mdEdgeCollapsePenaltyTriangleSSE2d( double *newpoint, double *oldpoint, double *leftpoint, double *rightpoint, int *denyflag, double compactnesstarget ) { __m128d vecta0, vecta1, oldvectb0, oldvectb1, oldvectc0, oldvectc1, newvectb0, newvectb1, newvectc0, newvectc1; __m128d oldnormal0, oldnormal1, newnormal0, newnormal1; __m128d dotprsum; __m128d left0, left1; double newcompactness, oldcompactness, compactness, penalty, norm; static double zero = 0.0; /* Normal of old triangle */ left0 = _mm_load_pd( leftpoint+0 ); left1 = _mm_load_sd( leftpoint+2 ); vecta0 = _mm_sub_pd( _mm_load_pd( rightpoint+0 ), left0 ); vecta1 = _mm_sub_pd( _mm_load_pd( rightpoint+2 ), left1 ); oldvectb0 = _mm_sub_pd( _mm_load_pd( oldpoint+0 ), left0 ); oldvectb1 = _mm_sub_pd( _mm_load_pd( oldpoint+2 ), left1 ); oldnormal0 = _mm_sub_pd( _mm_mul_pd( _mm_shuffle_pd( vecta0, vecta1, _MM_SHUFFLE2(0,1) ), _mm_unpacklo_pd( oldvectb1, oldvectb0 ) ), _mm_mul_pd( _mm_unpacklo_pd( vecta1, vecta0 ), _mm_shuffle_pd( oldvectb0, oldvectb1, _MM_SHUFFLE2(0,1) ) ) ); oldnormal1 = _mm_sub_sd( _mm_mul_sd( vecta0, _mm_unpackhi_pd( oldvectb0, oldvectb0 ) ), _mm_mul_sd( _mm_unpackhi_pd( vecta0, vecta0 ), oldvectb0 ) ); /* Normal of new triangle */ newvectb0 = _mm_sub_pd( _mm_load_pd( newpoint+0 ), left0 ); newvectb1 = _mm_sub_pd( _mm_load_pd( newpoint+2 ), left1 ); newnormal0 = _mm_sub_pd( _mm_mul_pd( _mm_shuffle_pd( vecta0, vecta1, _MM_SHUFFLE2(0,1) ), _mm_unpacklo_pd( newvectb1, newvectb0 ) ), _mm_mul_pd( _mm_unpacklo_pd( vecta1, vecta0 ), _mm_shuffle_pd( newvectb0, newvectb1, _MM_SHUFFLE2(0,1) ) ) ); newnormal1 = _mm_sub_sd( _mm_mul_sd( vecta0, _mm_unpackhi_pd( newvectb0, newvectb0 ) ), _mm_mul_sd( _mm_unpackhi_pd( vecta0, vecta0 ), newvectb0 ) ); /* Detect normal inversion */ dotprsum = _mm_mul_pd( oldnormal0, newnormal0 ); dotprsum = _mm_add_sd( _mm_add_sd( dotprsum, _mm_unpackhi_pd( dotprsum, dotprsum ) ), _mm_mul_sd( oldnormal1, newnormal1 ) ); if( _mm_comilt_sd( dotprsum, _mm_load_sd( &zero ) ) ) { *denyflag = 1; return 0.0; } /* Penalize long thin triangles */ penalty = 0.0; vecta0 = _mm_mul_pd( vecta0, vecta0 ); vecta0 = _mm_add_sd( _mm_add_sd( vecta0, _mm_unpackhi_pd( vecta0, vecta0 ) ), _mm_mul_sd( vecta1, vecta1 ) ); newvectc0 = _mm_sub_pd( _mm_load_pd( newpoint+0 ), _mm_load_pd( rightpoint+0 ) ); newvectc1 = _mm_sub_sd( _mm_load_sd( newpoint+2 ), _mm_load_sd( rightpoint+2 ) ); newnormal0 = _mm_mul_pd( newnormal0, newnormal0 ); newnormal0 = _mm_add_sd( _mm_add_sd( newnormal0, _mm_unpackhi_pd( newnormal0, newnormal0 ) ), _mm_mul_sd( newnormal1, newnormal1 ) ); newvectb0 = _mm_mul_pd( newvectb0, newvectb0 ); newvectb0 = _mm_add_sd( _mm_add_sd( newvectb0, _mm_unpackhi_pd( newvectb0, newvectb0 ) ), _mm_mul_sd( newvectb1, newvectb1 ) ); newvectc0 = _mm_mul_pd( newvectc0, newvectc0 ); newvectc0 = _mm_add_sd( _mm_add_sd( newvectc0, _mm_unpackhi_pd( newvectc0, newvectc0 ) ), _mm_mul_sd( newvectc1, newvectc1 ) ); norm = _mm_cvtsd_f64( _mm_add_sd( vecta0, _mm_add_sd( newvectb0, newvectc0 ) ) ); newcompactness = _mm_cvtsd_f64( _mm_mul_sd( _mm_set_sd( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_sd( newnormal0, newnormal0 ) ) ); if( newcompactness < ( compactnesstarget * norm ) ) { newcompactness /= norm; oldvectc0 = _mm_sub_pd( _mm_load_pd( oldpoint+0 ), _mm_load_pd( rightpoint+0 ) ); oldvectc1 = _mm_sub_sd( _mm_load_sd( oldpoint+2 ), _mm_load_sd( rightpoint+2 ) ); oldnormal0 = _mm_mul_pd( oldnormal0, oldnormal0 ); oldnormal0 = _mm_add_sd( _mm_add_sd( oldnormal0, _mm_unpackhi_pd( oldnormal0, oldnormal0 ) ), _mm_mul_sd( oldnormal1, oldnormal1 ) ); oldvectb0 = _mm_mul_pd( oldvectb0, oldvectb0 ); oldvectb0 = _mm_add_sd( _mm_add_sd( oldvectb0, _mm_unpackhi_pd( oldvectb0, oldvectb0 ) ), _mm_mul_sd( oldvectb1, oldvectb1 ) ); oldvectc0 = _mm_mul_pd( oldvectc0, oldvectc0 ); oldvectc0 = _mm_add_sd( _mm_add_sd( oldvectc0, _mm_unpackhi_pd( oldvectc0, oldvectc0 ) ), _mm_mul_sd( oldvectc1, oldvectc1 ) ); oldcompactness = _mm_cvtsd_f64( _mm_div_sd( _mm_mul_sd( _mm_set_sd( MD_COMPACTNESS_NORMALIZATION_FACTOR ), _mm_sqrt_sd( oldnormal0, oldnormal0 ) ), _mm_add_sd( vecta0, _mm_add_sd( oldvectb0, oldvectc0 ) ) ) ); compactness = fmin( compactnesstarget, oldcompactness ) - newcompactness; if( compactness > 0.0 ) penalty = compactness; } return penalty; } #endif #endif //// static mdf mdEdgeCollapsePenaltyAll( mdMesh *mesh, mdThreadData *tdata, mdi *trireflist, mdi trirefcount, mdi pivotindex, mdi skipindex, mdf *collapsepoint, int *denyflag ) { int index; mdf penalty; mdi triindex; mdTriangle *tri; mdf (*collapsepenalty)( mdf *newpoint, mdf *oldpoint, mdf *leftpoint, mdf *rightpoint, int *denyflag, mdf compactnesstarget ); collapsepenalty = mesh->collapsepenalty; penalty = 0.0; for( index = 0 ; index < trirefcount ; index++ ) { if( *denyflag ) break; triindex = trireflist[ index ]; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; #if DEBUG_VERBOSE_COST >= 2 printf( " Penalty Tri %d,%d,%d ( Pivot %d ; Skip %d )\n", tri->v[0], tri->v[1], tri->v[2], pivotindex, skipindex ); #endif #if DEBUG_DEBUG_CHECK_SOMETHING mdi triv[3]; triv[0] = tri->v[0]; triv[1] = tri->v[1]; triv[2] = tri->v[2]; #endif if( tri->v[0] == pivotindex ) { if( ( tri->v[1] == skipindex ) || ( tri->v[2] == skipindex ) ) continue; penalty += collapsepenalty( collapsepoint, mesh->vertexlist[ tri->v[0] ].point, mesh->vertexlist[ tri->v[2] ].point, mesh->vertexlist[ tri->v[1] ].point, denyflag, mesh->compactnesstarget ); } else if( tri->v[1] == pivotindex ) { if( ( tri->v[2] == skipindex ) || ( tri->v[0] == skipindex ) ) continue; penalty += collapsepenalty( collapsepoint, mesh->vertexlist[ tri->v[1] ].point, mesh->vertexlist[ tri->v[0] ].point, mesh->vertexlist[ tri->v[2] ].point, denyflag, mesh->compactnesstarget ); } else if( tri->v[2] == pivotindex ) { if( ( tri->v[0] == skipindex ) || ( tri->v[1] == skipindex ) ) continue; penalty += collapsepenalty( collapsepoint, mesh->vertexlist[ tri->v[2] ].point, mesh->vertexlist[ tri->v[1] ].point, mesh->vertexlist[ tri->v[0] ].point, denyflag, mesh->compactnesstarget ); } else { #if DEBUG_DEBUG_CHECK_SOMETHING MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); printf( "CopyV : %d %d %d (%d)\n", triv[0], triv[1], triv[2], pivotindex ); printf( "TriV : %d %d %d (%d)\n", tri->v[0], tri->v[1], tri->v[2], pivotindex ); sleep( 1 ); printf( "CopyV : %d %d %d (%d)\n", triv[0], triv[1], triv[2], pivotindex ); printf( "TriV : %d %d %d (%d)\n", tri->v[0], tri->v[1], tri->v[2], pivotindex ); #endif } } penalty *= mesh->compactnesspenalty; #if DEBUG_VERBOSE_COST printf( " Penalty Sum : %.12f\n", penalty ); #endif return penalty; } static mdf mdEdgeCollapsePenalty( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1, mdf *collapsepoint, int *denyflag ) { mdf penalty, collapsearea, penaltyfactor; mdVertex *vertex0, *vertex1; vertex0 = &mesh->vertexlist[ v0 ]; vertex1 = &mesh->vertexlist[ v1 ]; collapsearea = vertex0->quadric.area + vertex1->quadric.area; penaltyfactor = collapsearea * collapsearea / ( vertex0->trirefcount + vertex1->trirefcount ); #if DEBUG_VERBOSE_COST printf( " Compute Penalty Edge %d,%d\n", v0, v1 ); #endif *denyflag = 0; penalty = mdEdgeCollapsePenaltyAll( mesh, tdata, &mesh->trireflist[ vertex0->trirefbase ], vertex0->trirefcount, v0, v1, collapsepoint, denyflag ); penalty += mdEdgeCollapsePenaltyAll( mesh, tdata, &mesh->trireflist[ vertex1->trirefbase ], vertex1->trirefcount, v1, v0, collapsepoint, denyflag ); #if DEBUG_VERBOSE_COST printf( " Penalty Total : %.12f * %.12f -> %.12f\n", penalty, penaltyfactor, penalty * penaltyfactor ); #endif penalty *= penaltyfactor; return penalty; } //// static void mdMeshEdgeOpCallback( void *opaque, void *entry, int newflag ) { mdEdge *edge; edge = entry; edge->op = opaque; return; } static double mdMeshOpValueCallback( void *item ) { mdOp *op; op = item; return (double)op->collapsecost; } static void mdMeshAddOp( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1 ) { int denyflag, opflags; mdOp *op; mdEdge edge; op = mmBlockAlloc( &tdata->opblock ); opflags = 0x0; op->updatebuffer = tdata->updatebuffer; op->v0 = v0; op->v1 = v1; op->value = mdEdgeSolvePoint( &mesh->vertexlist[v0], &mesh->vertexlist[v1], op->collapsepoint ); #if CPU_SSE_SUPPORT op->collapsepoint[3] = 0.0; #endif op->penalty = mdEdgeCollapsePenalty( mesh, tdata, v0, v1, op->collapsepoint, &denyflag ); op->collapsecost = op->value + op->penalty; if( ( denyflag ) || ( op->collapsecost > mesh->maxcollapsecost ) ) opflags |= MD_OP_FLAGS_DETACHED; else mmBinSortAdd( tdata->binsort, op, op->collapsecost ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &op->flags, opflags ); #else op->flags = opflags; mtSpinInit( &op->spinlock ); #endif edge.v[0] = v0; edge.v[1] = v1; if( mmHashLockCallEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, mdMeshEdgeOpCallback, op, 0 ) != MM_HASH_SUCCESS ) MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #if DEBUG_VERBOSE_COLLAPSE printf( " Added Edge Op %d,%d ; Point %f %f %f ; Value %f ; Penalty %f ; Cost %f (%g, %f)\n", (int)op->v0, (int)op->v1, op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->value, op->penalty, op->collapsecost, op->collapsecost, op->collapsecost / mesh->maxcollapsecost ); #endif return; } static void mdMeshPopulateOpList( mdMesh *mesh, mdThreadData *tdata, mdi tribase, mdi tricount ) { mdTriangle *tri, *tristart, *triend; long populatecount; tristart = ADDRESS( mesh->trilist, tribase * mesh->trisize ); triend = ADDRESS( tristart, tricount * mesh->trisize ); populatecount = 0; for( tri = tristart ; tri < triend ; tri = ADDRESS( tri, mesh->trisize ) ) { #if DEBUG_VERBOSE_COLLAPSE printf( "Triangle Edges %d,%d,%d\n", tri->v[0], tri->v[1], tri->v[2] ); #endif if( ( ( tri->v[0] < tri->v[1] ) || ( tri->u.edgeflags & 0x1 ) ) && !( tri->u.edgeflags & 0x10 ) ) mdMeshAddOp( mesh, tdata, tri->v[0], tri->v[1] ); if( ( ( tri->v[1] < tri->v[2] ) || ( tri->u.edgeflags & 0x2 ) ) && !( tri->u.edgeflags & 0x20 ) ) mdMeshAddOp( mesh, tdata, tri->v[1], tri->v[2] ); if( ( ( tri->v[2] < tri->v[0] ) || ( tri->u.edgeflags & 0x4 ) ) && !( tri->u.edgeflags & 0x40 ) ) mdMeshAddOp( mesh, tdata, tri->v[2], tri->v[0] ); populatecount++; tdata->statuspopulatecount = populatecount; } return; } //// /* Merge vertex attributes of v0 and v1, write to v0 */ static inline void mdEdgeCollapseMergeVertexAttribs( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1, mdf *collapsepoint ) { mdVertex *vertex0, *vertex1; mdf dist[3], dist0, dist1, weightsum, weightsuminv; mdf weight0, weight1; vertex0 = &mesh->vertexlist[ v0 ]; MD_VectorSubStore( dist, collapsepoint, vertex0->point ); dist0 = MD_VectorMagnitude( dist ); vertex1 = &mesh->vertexlist[ v1 ]; MD_VectorSubStore( dist, collapsepoint, vertex1->point ); dist1 = MD_VectorMagnitude( dist ); weight0 = dist1 * vertex0->quadric.area; weight1 = dist0 * vertex1->quadric.area; weightsum = weight0 + weight1; if( weightsum ) { weightsuminv = 1.0 / weightsum; weight0 *= weightsuminv; weight1 *= weightsuminv; } else { weight0 = 0.5; weight1 = 0.5; } mesh->vertexmerge( mesh->mergecontext, v0, v1, weight0, weight1 ); return; } /* Delete triangle and return outer vertex */ static mdi mdEdgeCollapseDeleteTriangle( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1, int *retdelflags ) { int delflags; mdi outer; mdEdge edge; mdTriangle *tri; mdOp *op; *retdelflags = 0x0; edge.v[0] = v0; edge.v[1] = v1; if( mmHashLockDeleteEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) != MM_HASH_SUCCESS ) return -1; op = edge.op; if( op ) mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, MD_OP_FLAGS_DELETION_PENDING ); tri = ADDRESS( mesh->trilist, edge.triindex * mesh->trisize ); #if DEBUG_VERBOSE_COLLAPSE printf( " Delete Triangle %d,%d,%d\n", tri->v[0], tri->v[1], tri->v[2] ); #endif if( tri->v[0] != v0 ) { edge.v[0] = tri->v[0]; edge.v[1] = tri->v[1]; if( mmHashLockDeleteEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) == MM_HASH_SUCCESS ) { op = edge.op; if( op ) mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, MD_OP_FLAGS_DELETION_PENDING ); } else { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } if( tri->v[1] != v0 ) { edge.v[0] = tri->v[1]; edge.v[1] = tri->v[2]; if( mmHashLockDeleteEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) == MM_HASH_SUCCESS ) { op = edge.op; if( op ) mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, MD_OP_FLAGS_DELETION_PENDING ); } else { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } if( tri->v[2] != v0 ) { edge.v[0] = tri->v[2]; edge.v[1] = tri->v[0]; if( mmHashLockDeleteEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) == MM_HASH_SUCCESS ) { op = edge.op; if( op ) mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, MD_OP_FLAGS_DELETION_PENDING ); } else { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } /* Determine outer vertex */ /* TODO: Replace branches with bitwise arithmetics */ delflags = 0x0; if( tri->v[0] == v0 ) { outer = tri->v[2]; if( tri->u.edgeflags & 0x2 ) delflags |= 0x1; if( tri->u.edgeflags & 0x4 ) delflags |= 0x2; } else if( tri->v[1] == v0 ) { outer = tri->v[0]; if( tri->u.edgeflags & 0x4 ) delflags |= 0x1; if( tri->u.edgeflags & 0x1 ) delflags |= 0x2; } else if( tri->v[2] == v0 ) { outer = tri->v[1]; if( tri->u.edgeflags & 0x1 ) delflags |= 0x1; if( tri->u.edgeflags & 0x2 ) delflags |= 0x2; } else { MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); outer = tri->v[0]; } *retdelflags = delflags; /* Invalidate triangle */ tri->v[0] = -1; return outer; } static void mdEdgeCollapseUpdateTriangle( mdMesh *mesh, mdThreadData *tdata, mdTriangle *tri, mdi newv, int pivot, int left, int right ) { mdEdge edge; mdOp *op; #if DEBUG_VERBOSE_COLLAPSE printf( " Collapse Update %d : Tri %d,%d,%d\n", newv, tri->v[pivot], tri->v[right], tri->v[left] ); #endif edge.v[0] = tri->v[pivot]; edge.v[1] = tri->v[right]; edge.op = 0; if( edge.v[0] == newv ) { if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) != MM_HASH_SUCCESS ) { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } else { if( mmHashLockDeleteEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) == MM_HASH_SUCCESS ) { edge.v[0] = newv; if( mmHashLockAddEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) != MM_HASH_SUCCESS ) MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); } else { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } op = edge.op; if( op ) { #if DEBUG_VERBOSE_COLLAPSE printf( " Update Edge %d,%d Before ; Point %f %f %f ; Cost %f\n", op->v0, op->v1, op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->collapsecost ); #endif #if MD_CONFIG_DELAYED_OP_REDIRECT op->value = mdEdgeSolvePoint( &mesh->vertexlist[edge.v[0]], &mesh->vertexlist[edge.v[1]], op->collapsepoint ); #else op->v0 = newv; op->value = mdEdgeSolvePoint( &mesh->vertexlist[op->v0], &mesh->vertexlist[op->v1], op->collapsepoint ); #endif #if CPU_SSE_SUPPORT op->collapsepoint[3] = 0.0; #endif mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, 0x0 ); #if DEBUG_VERBOSE_COLLAPSE printf( " Update Edge %d,%d After ; Point %f %f %f ; Cost %f\n", op->v0, op->v1, op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->collapsecost ); printf( " Edge %d,%d ; Value %f ; Penalty %f ; Cost %f\n", op->v0, op->v1, op->value, op->penalty, op->collapsecost ); #endif } edge.v[0] = tri->v[left]; edge.v[1] = tri->v[pivot]; edge.op = 0; if( edge.v[1] == newv ) { if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) != MM_HASH_SUCCESS ) { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } else { if( mmHashLockDeleteEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) == MM_HASH_SUCCESS ) { edge.v[1] = newv; if( mmHashLockAddEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) != MM_HASH_SUCCESS ) MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); } else { #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif } } op = edge.op; if( op ) { #if DEBUG_VERBOSE_COLLAPSE printf( " Update Edge %d,%d Before ; Point %f %f %f ; Cost %f\n", op->v0, op->v1, op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->collapsecost ); #endif #if MD_CONFIG_DELAYED_OP_REDIRECT op->value = mdEdgeSolvePoint( &mesh->vertexlist[edge.v[0]], &mesh->vertexlist[edge.v[1]], op->collapsepoint ); #else op->v1 = newv; op->value = mdEdgeSolvePoint( &mesh->vertexlist[op->v0], &mesh->vertexlist[op->v1], op->collapsepoint ); #endif #if CPU_SSE_SUPPORT op->collapsepoint[3] = 0.0; #endif mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, 0x0 ); #if DEBUG_VERBOSE_COLLAPSE printf( " Update Edge %d,%d After ; Point %f %f %f ; Cost %f\n", op->v0, op->v1, op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->collapsecost ); printf( " Edge %d,%d ; Value %f ; Penalty %f ; Cost %f\n", op->v0, op->v1, op->value, op->penalty, op->collapsecost ); #endif } tri->v[pivot] = newv; return; } /* Walk through the list of all triangles attached to a vertex - Update cost of collapse for other edges of triangles - Build up the updated list of triangle references for new vertex */ static mdi *mdEdgeCollapseUpdateAll( mdMesh *mesh, mdThreadData *tdata, mdi *trireflist, mdi trirefcount, mdi oldv, mdi newv, mdi *trirefstore ) { int index; mdi triindex; mdTriangle *tri; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; #if DEBUG_VERBOSE_COLLAPSE printf( " Tri %d : %d %d %d\n", index, tri->v[0], tri->v[1], tri->v[2] ); #endif *trirefstore = triindex; trirefstore++; if( tri->v[0] == oldv ) mdEdgeCollapseUpdateTriangle( mesh, tdata, tri, newv, 0, 2, 1 ); else if( tri->v[1] == oldv ) mdEdgeCollapseUpdateTriangle( mesh, tdata, tri, newv, 1, 0, 2 ); else if( tri->v[2] == oldv ) mdEdgeCollapseUpdateTriangle( mesh, tdata, tri, newv, 2, 1, 0 ); else MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); } return trirefstore; } static void mdVertexInvalidateTri( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1 ) { mdEdge edge; mdOp *op; edge.v[0] = v0; edge.v[1] = v1; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) { op = edge.op; if( op ) mdUpdateBufferAdd( &op->updatebuffer[ tdata->threadid >> mesh->updatebuffershift ], op, 0x0 ); } #if 0 /* Shouldn't happen with a proper watertight mesh, but it can happen if edges are reused... */ else MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #endif return; } static void mdVertexInvalidate( mdMesh *mesh, mdThreadData *tdata, mdi vertexindex ) { mdi index, triindex, trirefcount; mdi *trireflist; mdTriangle *tri; mdVertex *vertex; /* Vertices of the collapsed edge */ vertex = &mesh->vertexlist[ vertexindex ]; trireflist = &mesh->trireflist[ vertex->trirefbase ]; trirefcount = vertex->trirefcount; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; if( tri->v[0] == vertexindex ) { mdVertexInvalidateTri( mesh, tdata, tri->v[0], tri->v[1] ); mdVertexInvalidateTri( mesh, tdata, tri->v[2], tri->v[0] ); } else if( tri->v[1] == vertexindex ) { mdVertexInvalidateTri( mesh, tdata, tri->v[1], tri->v[2] ); mdVertexInvalidateTri( mesh, tdata, tri->v[0], tri->v[1] ); } else if( tri->v[2] == vertexindex ) { mdVertexInvalidateTri( mesh, tdata, tri->v[2], tri->v[0] ); mdVertexInvalidateTri( mesh, tdata, tri->v[1], tri->v[2] ); } else MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); } return; } static void mdVertexInvalidateAll( mdMesh *mesh, mdThreadData *tdata, mdi *trireflist, mdi trirefcount, mdi pivotindex ) { int index; mdi triindex; mdTriangle *tri; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; if( tri->v[0] == pivotindex ) { mdVertexInvalidate( mesh, tdata, tri->v[1] ); mdVertexInvalidate( mesh, tdata, tri->v[2] ); } else if( tri->v[1] == pivotindex ) { mdVertexInvalidate( mesh, tdata, tri->v[2] ); mdVertexInvalidate( mesh, tdata, tri->v[0] ); } else if( tri->v[2] == pivotindex ) { mdVertexInvalidate( mesh, tdata, tri->v[0] ); mdVertexInvalidate( mesh, tdata, tri->v[1] ); } else MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); } return; } static void mdEdgeCollapseLinkOuter( mdMesh *mesh, mdThreadData *tdata, mdi newv, mdi outer ) { int sideflags; mdEdge edge; mdOp *op; if( outer == -1 ) return; sideflags = 0x0; edge.v[0] = newv; edge.v[1] = outer; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) { sideflags |= 0x1; op = edge.op; if( op ) return; } edge.v[0] = outer; edge.v[1] = newv; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) { sideflags |= 0x2; op = edge.op; if( op ) return; } if( ( newv < outer ) && ( sideflags & 0x1 ) ) mdMeshAddOp( mesh, tdata, newv, outer ); else if( sideflags & 0x2 ) mdMeshAddOp( mesh, tdata, outer, newv ); return; } static void mdEdgeCollapsePropagateBoundary( mdMesh *mesh, mdi v0, mdi v1 ) { mdEdge edge; mdTriangle *tri; edge.v[0] = v1; edge.v[1] = v0; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) return; edge.v[0] = v0; edge.v[1] = v1; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) != MM_HASH_SUCCESS ) return; tri = ADDRESS( mesh->trilist, edge.triindex * mesh->trisize ); if( tri->v[0] == v0 ) tri->u.edgeflags |= 0x1; else if( tri->v[1] == v0 ) tri->u.edgeflags |= 0x2; else if( tri->v[2] == v0 ) tri->u.edgeflags |= 0x4; else MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); /* TODO: Grow the boundary for affected vertex quadrics */ return; } #define MD_LOCK_BUFFER_STATIC (512) typedef struct { mdi vertexstatic[MD_LOCK_BUFFER_STATIC]; mdi *vertexlist; int vertexcount; int vertexalloc; } mdLockBuffer; static inline void mdLockBufferInit( mdLockBuffer *buffer, int maxvertexcount ) { buffer->vertexlist = buffer->vertexstatic; buffer->vertexalloc = MD_LOCK_BUFFER_STATIC; if( maxvertexcount > MD_LOCK_BUFFER_STATIC ) { buffer->vertexlist = malloc( maxvertexcount * sizeof(mdi) ); buffer->vertexalloc = maxvertexcount; } buffer->vertexcount = 0; return; } static inline void mdLockBufferResize( mdLockBuffer *buffer, int maxvertexcount ) { int index; mdi *vertexlist; if( maxvertexcount > buffer->vertexalloc ) { vertexlist = malloc( maxvertexcount * sizeof(mdi) ); for( index = 0 ; index < buffer->vertexcount ; index++ ) vertexlist[index] = buffer->vertexlist[index]; if( buffer->vertexlist != buffer->vertexstatic ) free( buffer->vertexlist ); buffer->vertexlist = vertexlist; buffer->vertexalloc = maxvertexcount; } return; } static inline void mdLockBufferEnd( mdLockBuffer *buffer ) { if( buffer->vertexlist != buffer->vertexstatic ) free( buffer->vertexlist ); buffer->vertexlist = buffer->vertexstatic; buffer->vertexalloc = MD_LOCK_BUFFER_STATIC; buffer->vertexcount = 0; return; } void mdLockBufferUnlockAll( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *buffer ) { int index; mdVertex *vertex; for( index = 0 ; index < buffer->vertexcount ; index++ ) { vertex = &mesh->vertexlist[ buffer->vertexlist[index] ]; #if MD_CONFIG_ATOMIC_SUPPORT if( mmAtomicRead32( &vertex->atomicowner ) == tdata->threadid ) mmAtomicCmpXchg32( &vertex->atomicowner, tdata->threadid, -1 ); #else mtSpinLock( &vertex->ownerspinlock ); if( vertex->owner == tdata->threadid ) vertex->owner = -1; mtSpinUnlock( &vertex->ownerspinlock ); #endif } buffer->vertexcount = 0; return; } /* If it fails, release all locks */ int mdLockBufferTryLock( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *buffer, mdi vertexindex ) { int32_t owner; mdVertex *vertex; vertex = &mesh->vertexlist[ vertexindex ]; #if MD_CONFIG_ATOMIC_SUPPORT owner = mmAtomicRead32( &vertex->atomicowner ); if( owner == tdata->threadid ) return 1; if( ( owner != -1 ) || !( mmAtomicCmpReplace32( &vertex->atomicowner, -1, tdata->threadid ) ) ) { mdLockBufferUnlockAll( mesh, tdata, buffer ); return 0; } #else mtSpinLock( &vertex->ownerspinlock ); owner = vertex->owner; if( owner == tdata->threadid ) { mtSpinUnlock( &vertex->ownerspinlock ); return 1; } if( owner != -1 ) { mtSpinUnlock( &vertex->ownerspinlock ); mdLockBufferUnlockAll( mesh, tdata, buffer ); return 0; } vertex->owner = tdata->threadid; mtSpinUnlock( &vertex->ownerspinlock ); #endif buffer->vertexlist[buffer->vertexcount++] = vertexindex; return 1; } /* If it fails, release all locks then wait for the desired lock to become available */ int mdLockBufferLock( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *buffer, mdi vertexindex ) { int32_t owner; mdVertex *vertex; vertex = &mesh->vertexlist[ vertexindex ]; #if MD_CONFIG_ATOMIC_SUPPORT owner = mmAtomicRead32( &vertex->atomicowner ); if( owner == tdata->threadid ) return 1; if( ( owner == -1 ) && mmAtomicCmpReplace32( &vertex->atomicowner, -1, tdata->threadid ) ) { buffer->vertexlist[buffer->vertexcount++] = vertexindex; return 1; } /* Lock failed, release all locks and wait until we get the lock we got stuck on */ mdLockBufferUnlockAll( mesh, tdata, buffer ); mmAtomicSpinWaitEq32( &vertex->atomicowner, -1 ); #else mtSpinLock( &vertex->ownerspinlock ); owner = vertex->owner; if( owner == tdata->threadid ) { mtSpinUnlock( &vertex->ownerspinlock ); return 1; } if( owner == -1 ) { vertex->owner = tdata->threadid; mtSpinUnlock( &vertex->ownerspinlock ); buffer->vertexlist[buffer->vertexcount++] = vertexindex; return 1; } mtSpinUnlock( &vertex->ownerspinlock ); /* Lock failed, release all locks */ mdLockBufferUnlockAll( mesh, tdata, buffer ); #endif return 0; } static int mdPivotLockRefs( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *buffer, mdi vertexindex ) { int index; mdi triindex, iav, ibv, trirefcount; mdi *trireflist; mdTriangle *tri; mdVertex *vertex; vertex = &mesh->vertexlist[ vertexindex ]; trireflist = &mesh->trireflist[ vertex->trirefbase ]; trirefcount = vertex->trirefcount; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; if( tri->v[0] == vertexindex ) { iav = tri->v[1]; ibv = tri->v[2]; } else if( tri->v[1] == vertexindex ) { iav = tri->v[2]; ibv = tri->v[0]; } else if( tri->v[2] == vertexindex ) { iav = tri->v[0]; ibv = tri->v[1]; } else { MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); continue; } if( !( mdLockBufferLock( mesh, tdata, buffer, iav ) ) ) return 0; if( !( mdLockBufferLock( mesh, tdata, buffer, ibv ) ) ) return 0; } return 1; } static void mdOpResolveLockEdge( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *lockbuffer, mdOp *op ) { int failcount, globalflag; mdVertex *vertex0, *vertex1; failcount = 0; globalflag = 0; for( ; ; ) { if( ( failcount > MD_GLOBAL_LOCK_THRESHOLD ) && !( globalflag ) ) { mdLockBufferUnlockAll( mesh, tdata, lockbuffer ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &mesh->globalvertexlock, 0x0, 0x1 ); #else mtSpinLock( &mesh->globalvertexspinlock ); #endif globalflag = 1; } if( !( mdLockBufferLock( mesh, tdata, lockbuffer, op->v0 ) ) || !( mdLockBufferLock( mesh, tdata, lockbuffer, op->v1 ) ) ) { failcount++; continue; } vertex0 = &mesh->vertexlist[ op->v0 ]; vertex1 = &mesh->vertexlist[ op->v1 ]; #if MD_CONFIG_DELAYED_OP_REDIRECT if( vertex0->redirectindex != -1 ) op->v0 = vertex0->redirectindex; else if( vertex1->redirectindex != -1 ) op->v1 = vertex1->redirectindex; else break; #else break; #endif } #if MD_CONFIG_ATOMIC_SUPPORT if( globalflag ) mmAtomicWrite32( &mesh->globalvertexlock, 0x0 ); #else if( globalflag ) mtSpinUnlock( &mesh->globalvertexspinlock ); #endif return; } static int mdOpResolveLockEdgeTry( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *lockbuffer, mdOp *op ) { mdVertex *vertex0, *vertex1; for( ; ; ) { if( !( mdLockBufferTryLock( mesh, tdata, lockbuffer, op->v0 ) ) || !( mdLockBufferTryLock( mesh, tdata, lockbuffer, op->v1 ) ) ) return 0; vertex0 = &mesh->vertexlist[ op->v0 ]; vertex1 = &mesh->vertexlist[ op->v1 ]; #if MD_CONFIG_DELAYED_OP_REDIRECT if( vertex0->redirectindex != -1 ) op->v0 = vertex0->redirectindex; else if( vertex1->redirectindex != -1 ) op->v1 = vertex1->redirectindex; else break; #else break; #endif } return 1; } static void mdOpResolveLockFull( mdMesh *mesh, mdThreadData *tdata, mdLockBuffer *lockbuffer, mdOp *op ) { int failcount, globalflag; mdVertex *vertex0, *vertex1; failcount = 0; globalflag = 0; for( ; ; ) { if( ( failcount > MD_GLOBAL_LOCK_THRESHOLD ) && !( globalflag ) ) { mdLockBufferUnlockAll( mesh, tdata, lockbuffer ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &mesh->globalvertexlock, 0x0, 0x1 ); #else mtSpinLock( &mesh->globalvertexspinlock ); #endif globalflag = 1; } if( !( mdLockBufferLock( mesh, tdata, lockbuffer, op->v0 ) ) || !( mdLockBufferLock( mesh, tdata, lockbuffer, op->v1 ) ) ) { failcount++; continue; } vertex0 = &mesh->vertexlist[ op->v0 ]; vertex1 = &mesh->vertexlist[ op->v1 ]; #if MD_CONFIG_DELAYED_OP_REDIRECT if( vertex0->redirectindex != -1 ) op->v0 = vertex0->redirectindex; else if( vertex1->redirectindex != -1 ) op->v1 = vertex1->redirectindex; else { mdLockBufferResize( lockbuffer, 2 + ( ( vertex0->trirefcount + vertex1->trirefcount ) << 1 ) ); if( !( mdPivotLockRefs( mesh, tdata, lockbuffer, op->v0 ) ) || !( mdPivotLockRefs( mesh, tdata, lockbuffer, op->v1 ) ) ) { failcount++; continue; } break; } #else mdLockBufferResize( lockbuffer, 2 + ( ( vertex0->trirefcount + vertex1->trirefcount ) << 1 ) ); if( !( mdPivotLockRefs( mesh, tdata, lockbuffer, op->v0 ) ) || !( mdPivotLockRefs( mesh, tdata, lockbuffer, op->v1 ) ) ) { failcount++; continue; } break; #endif } #if MD_CONFIG_ATOMIC_SUPPORT if( globalflag ) mmAtomicWrite32( &mesh->globalvertexlock, 0x0 ); #else if( globalflag ) mtSpinUnlock( &mesh->globalvertexspinlock ); #endif return; } #define MD_EDGE_COLLAPSE_TRIREF_STATIC (512) static void mdEdgeCollapse( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1, mdf *collapsepoint ) { int index, delflags0, delflags1; long deletioncount; mdi newv, trirefcount, trirefmax, outer0, outer1; mdi *trireflist, *trirefstore; mdi trirefstatic[MD_EDGE_COLLAPSE_TRIREF_STATIC]; mdVertex *vertex0, *vertex1; /* Vertices of the collapsed edge */ vertex0 = &mesh->vertexlist[ v0 ]; vertex1 = &mesh->vertexlist[ v1 ]; /* Collapse other custom vertex attributes */ if( mesh->vertexmerge ) mdEdgeCollapseMergeVertexAttribs( mesh, tdata, v0, v1, collapsepoint ); #if DEBUG_VERBOSE_COLLAPSE printf( "Collapse %d,%d ; Point %f %f %f ( Overwrite %d ; Delete %d )\n", (int)v0, (int)v1, collapsepoint[0], collapsepoint[1], collapsepoint[2], (int)v0, (int)v1 ); #endif /* New vertex overwriting v0 */ newv = v0; /* Delete the triangles on both sides of the edge and all associated edges */ outer0 = mdEdgeCollapseDeleteTriangle( mesh, tdata, v0, v1, &delflags0 ); outer1 = mdEdgeCollapseDeleteTriangle( mesh, tdata, v1, v0, &delflags1 ); /* Track count of deletions */ deletioncount = tdata->statusdeletioncount; if( outer0 != -1 ) deletioncount++; if( outer1 != -1 ) deletioncount++; tdata->statusdeletioncount = deletioncount; #if DEBUG_VERBOSE_COLLAPSE printf( " Redirect %d -> %d ; %d -> %d\n", (int)v0, (int)vertex0->redirectindex, (int)v1, (int)vertex1->redirectindex ); #endif #if DEBUG_VERBOSE_COLLAPSE printf( " Move Point %f %f %f ( %f %f %f ) -> %f %f %f\n", vertex0->point[0], vertex0->point[1], vertex0->point[2], vertex1->point[0], vertex1->point[1], vertex1->point[2], collapsepoint[0], collapsepoint[1], collapsepoint[2] ); #endif /* Set up new vertex over v0 */ MD_VectorCopy( vertex0->point, collapsepoint ); mathQuadricAddQuadric( &vertex0->quadric, &vertex1->quadric ); /* Propagate boundaries from deleted triangles */ if( delflags0 ) { if( delflags0 & 0x1 ) mdEdgeCollapsePropagateBoundary( mesh, newv, outer0 ); if( delflags0 & 0x2 ) mdEdgeCollapsePropagateBoundary( mesh, outer0, newv ); } if( delflags1 ) { if( delflags1 & 0x1 ) mdEdgeCollapsePropagateBoundary( mesh, newv, outer1 ); if( delflags1 & 0x2 ) mdEdgeCollapsePropagateBoundary( mesh, outer1, newv ); } /* Redirect vertex1 to vertex0 */ vertex1->redirectindex = newv; /* Maximum theoritical count of triangle references for our new vertex, we need a chunk of memory that big */ trirefmax = vertex0->trirefcount + vertex1->trirefcount; /* Buffer to temporarily store our new trirefs */ trireflist = trirefstatic; if( trirefmax > MD_EDGE_COLLAPSE_TRIREF_STATIC ) trireflist = malloc( trirefmax * sizeof(mdi) ); /* Update all triangles connected to vertex0 and vertex1 */ trirefstore = trireflist; trirefstore = mdEdgeCollapseUpdateAll( mesh, tdata, &mesh->trireflist[ vertex0->trirefbase ], vertex0->trirefcount, v0, newv, trirefstore ); trirefstore = mdEdgeCollapseUpdateAll( mesh, tdata, &mesh->trireflist[ vertex1->trirefbase ], vertex1->trirefcount, v1, newv, trirefstore ); /* Find where to store the trirefs */ trirefcount = (int)( trirefstore - trireflist ); #if DEBUG_VERBOSE_COLLAPSE printf( "TriRefCount %d ; Alloc %d\n", trirefcount, trirefmax ); #endif if( trirefcount > vertex0->trirefcount ) { if( trirefcount <= vertex1->trirefcount ) vertex0->trirefbase = vertex1->trirefbase; else { /* Multithreading, acquire lock */ #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &mesh->trireflock, 0x0, 0x1 ); #else mtSpinLock( &mesh->trirefspinlock ); #endif vertex0->trirefbase = mesh->trirefcount; mesh->trirefcount += trirefcount; if( mesh->trirefcount >= mesh->trirefalloc ) MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &mesh->trireflock, 0x0 ); #else mtSpinUnlock( &mesh->trirefspinlock ); #endif } } /* Mark vertex1 as unused */ vertex1->trirefcount = 0; /* Store trirefs */ vertex0->trirefcount = trirefcount; trirefstore = &mesh->trireflist[ vertex0->trirefbase ]; for( index = 0 ; index < trirefcount ; index++ ) { #if DEBUG_VERBOSE_COLLAPSE mdTriangle *tri; tri = ADDRESS( mesh->trilist, trireflist[index] * mesh->trisize ); printf( " Triref %d : %d,%d,%d\n", index, tri->v[0], tri->v[1], tri->v[2] ); #endif trirefstore[index] = trireflist[index]; } /* Invalidate all cost calculations in neighborhood of pivot vertex */ mdVertexInvalidateAll( mesh, tdata, trireflist, trirefcount, newv ); /* If buffer wasn't static, free it */ if( trireflist != trirefstatic ) free( trireflist ); /* Verify if we should create new ops between newv and outer vertices of deleted triangles */ mdEdgeCollapseLinkOuter( mesh, tdata, newv, outer0 ); mdEdgeCollapseLinkOuter( mesh, tdata, newv, outer1 ); #if DEBUG_VERBOSE_COLLAPSE printf( "Collapse End %d,%d\n", (int)v0, (int)v1 ); #endif return; } //// typedef struct { int collisionflag; mdi trileft; mdi triright; } mdEdgeCollisionData; static void mdEdgeCollisionCallback( void *opaque, void *entry, int newflag ) { mdEdge *edge; mdEdgeCollisionData *ecd; edge = entry; ecd = opaque; if( ( edge->triindex != ecd->trileft ) && ( edge->triindex != ecd->triright ) ) ecd->collisionflag = 1; return; } /* Prevent 2D collapses Check all triangles attached to v1 that would have to attach back to v0 If any of the edge is already present in the hash table, deny the collapse */ static int mdEdgeCollisionCheck( mdMesh *mesh, mdThreadData *tdata, mdi v0, mdi v1 ) { int index, trirefcount, left, right; mdi triindex, vsrc, vdst; mdi *trireflist; mdEdge edge; mdTriangle *tri; mdVertex *vertex0, *vertex1; mdEdgeCollisionData ecd; vertex0 = &mesh->vertexlist[ v0 ]; vertex1 = &mesh->vertexlist[ v1 ]; if( vertex0->trirefcount < vertex1->trirefcount ) { vsrc = v0; vdst = v1; trireflist = &mesh->trireflist[ vertex0->trirefbase ]; trirefcount = vertex0->trirefcount; } else { vsrc = v1; vdst = v0; trireflist = &mesh->trireflist[ vertex1->trirefbase ]; trirefcount = vertex1->trirefcount; } #if DEBUG_VERBOSE_COLLISION printf( "Collision Check %d,%d\n", (int)v0, (int)v1 ); printf( " Src %d ; Dst %d\n", vsrc, vdst ); #endif /* Find the triangles that would be deleted so that we don't detect false collisions with them */ ecd.collisionflag = 0; ecd.trileft = -1; edge.v[0] = v0; edge.v[1] = v1; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) ecd.trileft = edge.triindex; ecd.triright = -1; edge.v[0] = v1; edge.v[1] = v0; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) ecd.triright = edge.triindex; /* Check all trirefs for collision */ for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; if( ( triindex == ecd.trileft ) || ( triindex == ecd.triright ) ) continue; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; #if DEBUG_VERBOSE_COLLISION printf( " Tri %d : %d,%d,%d\n", index, tri->v[0], tri->v[1], tri->v[2] ); #endif if( tri->v[0] == vsrc ) { left = 2; right = 1; } else if( tri->v[1] == vsrc ) { left = 0; right = 2; } else if( tri->v[2] == vsrc ) { left = 1; right = 0; } else { MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); continue; } edge.v[0] = vdst; edge.v[1] = tri->v[right]; mmHashLockCallEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, mdEdgeCollisionCallback, &ecd, 0 ); if( ecd.collisionflag ) return 0; edge.v[0] = tri->v[left]; edge.v[1] = vdst; mmHashLockCallEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, mdEdgeCollisionCallback, &ecd, 0 ); if( ecd.collisionflag ) return 0; } return 1; } //// /* Mesh init step 0, allocate, NOT threaded */ static int mdMeshInit( mdMesh *mesh, size_t maxmemorysize ) { int retval; /* Allocate vertices, no extra room for vertices, we overwrite existing ones as we decimate */ mesh->vertexlist = mmAlignAlloc( mesh->vertexalloc * sizeof(mdVertex), 0x40 ); /* Allocate space for per-vertex lists of face references, including future vertices */ mesh->trirefcount = 0; mesh->trirefalloc = 2 * 6 * mesh->tricount; mesh->trireflist = malloc( mesh->trirefalloc * sizeof(mdi) ); /* Allocate triangles */ mesh->trisize = ( sizeof(mdTriangle) + mesh->tridatasize + 0x7 ) & ~0x7; mesh->trilist = malloc( mesh->tricount * mesh->trisize ); /* Allocate edge hash table */ retval = 1; if( !( mesh->operationflags & MD_FLAGS_NO_DECIMATION ) ) retval = mdMeshHashInit( mesh, mesh->tricount, 2.0, 7, maxmemorysize ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &mesh->trireflock, 0x0 ); mmAtomicWrite32( &mesh->globalvertexlock, 0x0 ); #else mtSpinInit( &mesh->trirefspinlock ); mtSpinInit( &mesh->globalvertexspinlock ); #endif return retval; } /* Mesh init step 1, initialize vertices, threaded */ static void mdMeshInitVertices( mdMesh *mesh, mdThreadData *tdata, int threadcount ) { int vertexindex, vertexindexmax, vertexperthread; mdVertex *vertex; void *point; vertexperthread = ( mesh->vertexcount / threadcount ) + 1; vertexindex = tdata->threadid * vertexperthread; vertexindexmax = vertexindex + vertexperthread; if( vertexindexmax > mesh->vertexcount ) vertexindexmax = mesh->vertexcount; vertex = &mesh->vertexlist[vertexindex]; point = ADDRESS( mesh->point, vertexindex * mesh->pointstride ); for( ; vertexindex < vertexindexmax ; vertexindex++, vertex++ ) { #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &vertex->atomicowner, -1 ); #else vertex->owner = -1; mtSpinInit( &vertex->ownerspinlock ); #endif mesh->vertexUserToNative( vertex->point, point ); #if CPU_SSE_SUPPORT vertex->point[3] = 0.0; #endif vertex->trirefcount = 0; vertex->redirectindex = -1; mathQuadricZero( &vertex->quadric ); point = ADDRESS( point, mesh->pointstride ); } return; } static inline void mdMeshForbidEdge( mdMesh *mesh, int v0, int v1 ) { int edgeflags; mdEdge edge; mdTriangle *tri; edge.v[0] = v0; edge.v[1] = v1; if( mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ) == MM_HASH_SUCCESS ) { tri = ADDRESS( mesh->trilist, edge.triindex * mesh->trisize ); edgeflags = 0; if( tri->v[0] == v0 ) edgeflags = ( tri->v[1] == v1 ? 0x10 : 0x40 ); else if( tri->v[1] == v0 ) edgeflags = ( tri->v[2] == v1 ? 0x20 : 0x10 ); else if( tri->v[2] == v0 ) edgeflags = ( tri->v[0] == v1 ? 0x40 : 0x20 ); tri->u.edgeflags |= edgeflags; } return; } /* Mesh init step 2, initialize triangles, threaded */ static void mdMeshInitTriangles( mdMesh *mesh, mdThreadData *tdata, int threadcount ) { int i, triperthread, triindex, triindexmax; long buildtricount; void *indices, *tridata; mdTriangle *tri; mdVertex *vertex; mdEdge edge; mathQuadric q; triperthread = ( mesh->tricount / threadcount ) + 1; triindex = tdata->threadid * triperthread; triindexmax = triindex + triperthread; if( triindexmax > mesh->tricount ) triindexmax = mesh->tricount; /* Initialize triangles */ buildtricount = 0; indices = ADDRESS( mesh->indices, triindex * mesh->indicesstride ); tridata = ADDRESS( mesh->tridata, triindex * mesh->tridatasize ); tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); edge.op = 0; for( ; triindex < triindexmax ; triindex++, indices = ADDRESS( indices, mesh->indicesstride ), tri = ADDRESS( tri, mesh->trisize ), tridata = ADDRESS( tridata, mesh->tridatasize ) ) { mesh->indicesUserToNative( tri->v, indices ); #if DEBUG_VERBOSE_QUADRIC printf( "Triangle %d,%d,%d\n", (int)tri->v[0], (int)tri->v[1], (int)tri->v[2] ); #endif mdTriangleComputeQuadric( mesh, tri, &q ); tri->u.edgeflags = 0; for( i = 0 ; i < 3 ; i++ ) { vertex = &mesh->vertexlist[ tri->v[i] ]; #if DEBUG_VERBOSE_QUADRIC printf( " Accum quadric to vertex %d\n", (int)tri->v[i] ); #endif #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &vertex->atomicowner, -1, tdata->threadid ); mathQuadricAddQuadric( &vertex->quadric, &q ); vertex->trirefcount++; mmAtomicWrite32( &vertex->atomicowner, -1 ); #else mtSpinLock( &vertex->ownerspinlock ); mathQuadricAddQuadric( &vertex->quadric, &q ); vertex->trirefcount++; mtSpinUnlock( &vertex->ownerspinlock ); #endif } if( mesh->tridatasize ) memcpy( ADDRESS(tri,sizeof(mdTriangle)), tridata, mesh->tridatasize ); if( !( mesh->operationflags & MD_FLAGS_NO_DECIMATION ) ) { edge.triindex = triindex; edge.v[0] = tri->v[0]; edge.v[1] = tri->v[1]; if( mmHashLockAddEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) != MM_HASH_SUCCESS ) { /* MD_ERROR( "ERROR: Bad geometry, duplicate edge %d %d ; %s:%d\n", 1, (int)edge.v[0], (int)edge.v[1], __FILE__, __LINE__ ); */ tri->u.edgeflags |= 0x10; mdMeshForbidEdge( mesh, edge.v[0], edge.v[1] ); mdMeshForbidEdge( mesh, edge.v[1], edge.v[0] ); tdata->statuscollisioncount++; } edge.v[0] = tri->v[1]; edge.v[1] = tri->v[2]; if( mmHashLockAddEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) != MM_HASH_SUCCESS ) { /* MD_ERROR( "ERROR: Bad geometry, duplicate edge %d %d ; %s:%d\n", 1, (int)edge.v[0], (int)edge.v[1], __FILE__, __LINE__ ); */ tri->u.edgeflags |= 0x20; mdMeshForbidEdge( mesh, edge.v[0], edge.v[1] ); mdMeshForbidEdge( mesh, edge.v[1], edge.v[0] ); tdata->statuscollisioncount++; } edge.v[0] = tri->v[2]; edge.v[1] = tri->v[0]; if( mmHashLockAddEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge, 1 ) != MM_HASH_SUCCESS ) { /* MD_ERROR( "ERROR: Bad geometry, duplicate edge %d %d ; %s:%d\n", 1, (int)edge.v[0], (int)edge.v[1], __FILE__, __LINE__ ); */ tri->u.edgeflags |= 0x40; mdMeshForbidEdge( mesh, edge.v[0], edge.v[1] ); mdMeshForbidEdge( mesh, edge.v[1], edge.v[0] ); tdata->statuscollisioncount++; } } buildtricount++; tdata->statusbuildtricount = buildtricount; } return; } /* Mesh init step 3, initialize vertex trirefbase, NOT threaded */ static void mdMeshInitTrirefs( mdMesh *mesh ) { mdi vertexindex, trirefcount; mdVertex *vertex; /* Compute base of vertex triangle references */ trirefcount = 0; vertex = mesh->vertexlist; for( vertexindex = 0 ; vertexindex < mesh->vertexcount ; vertexindex++, vertex++ ) { vertex->trirefbase = trirefcount; trirefcount += vertex->trirefcount; vertex->trirefcount = 0; } mesh->trirefcount = trirefcount; return; } /* Accumulate quadrics from boundaries or weighted edges as returned by user callback */ static inline void mdMeshAccumBoundaryEdges( mdMesh *mesh, mdTriangle *tri, mdVertex **trivertex ) { int hashread; mdf edgeweight; mdEdge edge; mdTriangle *trilink; edge.v[0] = tri->v[1]; edge.v[1] = tri->v[0]; hashread = mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ); if( !( tri->u.edgeflags & 0x10 ) && ( hashread != MM_HASH_SUCCESS ) ) { #if DEBUG_VERBOSE_BOUNDARY printf( "Boundary %d,%d (%d)\n", tri->v[1], tri->v[0], tri->v[2] ); #endif edgeweight = mesh->boundaryweight; tri->u.edgeflags |= 0x1; } else if( ( mesh->edgeweight ) && ( hashread == MM_HASH_SUCCESS ) ) { trilink = ADDRESS( mesh->trilist, edge.triindex * mesh->trisize ); edgeweight = mesh->edgeweight( ADDRESS( tri, sizeof(mdTriangle) ), ADDRESS( trilink, sizeof(mdTriangle) ) ); if( edgeweight <= 0.0 ) goto skip01; #if DEBUG_VERBOSE_BOUNDARY printf( "Edge weight %d,%d (%d) ; %f\n", tri->v[1], tri->v[0], tri->v[2], edgeweight ); #endif } else goto skip01; mdMeshAccumulateBoundary( trivertex[0], trivertex[1], trivertex[2], edgeweight ); skip01: edge.v[0] = tri->v[2]; edge.v[1] = tri->v[1]; hashread = mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ); if( !( tri->u.edgeflags & 0x20 ) && ( hashread != MM_HASH_SUCCESS ) ) { #if DEBUG_VERBOSE_BOUNDARY printf( "Boundary %d,%d (%d)\n", tri->v[2], tri->v[1], tri->v[0] ); #endif edgeweight = mesh->boundaryweight; tri->u.edgeflags |= 0x2; } else if( ( mesh->edgeweight ) && ( hashread == MM_HASH_SUCCESS ) ) { trilink = ADDRESS( mesh->trilist, edge.triindex * mesh->trisize ); edgeweight = mesh->edgeweight( ADDRESS( tri, sizeof(mdTriangle) ), ADDRESS( trilink, sizeof(mdTriangle) ) ); if( edgeweight <= 0.0 ) goto skip12; #if DEBUG_VERBOSE_BOUNDARY printf( "Edge weight %d,%d (%d) ; %f\n", tri->v[2], tri->v[1], tri->v[0], edgeweight ); #endif } else goto skip12; mdMeshAccumulateBoundary( trivertex[1], trivertex[2], trivertex[0], mesh->boundaryweight ); skip12: edge.v[0] = tri->v[0]; edge.v[1] = tri->v[2]; hashread = mmHashLockReadEntry( mesh->edgehashtable, &mdEdgeHashEdge, &edge ); if( !( tri->u.edgeflags & 0x40 ) && ( hashread != MM_HASH_SUCCESS ) ) { #if DEBUG_VERBOSE_BOUNDARY printf( "Boundary %d,%d (%d)\n", tri->v[0], tri->v[2], tri->v[1] ); #endif edgeweight = mesh->boundaryweight; tri->u.edgeflags |= 0x4; } else if( ( mesh->edgeweight ) && ( hashread == MM_HASH_SUCCESS ) ) { trilink = ADDRESS( mesh->trilist, edge.triindex * mesh->trisize ); edgeweight = mesh->edgeweight( ADDRESS( tri, sizeof(mdTriangle) ), ADDRESS( trilink, sizeof(mdTriangle) ) ); if( edgeweight <= 0.0 ) goto skip20; #if DEBUG_VERBOSE_BOUNDARY printf( "Edge weight %d,%d (%d) ; %f\n", tri->v[0], tri->v[2], tri->v[1], edgeweight ); #endif } else goto skip20; mdMeshAccumulateBoundary( trivertex[2], trivertex[0], trivertex[1], mesh->boundaryweight ); skip20: return; } /* Mesh init step 4, store vertex trirefs and accumulate boundary quadrics, threaded */ static void mdMeshBuildTrirefs( mdMesh *mesh, mdThreadData *tdata, int threadcount ) { int i, triperthread, triindex, triindexmax; long buildrefcount; mdTriangle *tri; mdVertex *vertex, *trivertex[3]; triperthread = ( mesh->tricount / threadcount ) + 1; triindex = tdata->threadid * triperthread; triindexmax = triindex + triperthread; if( triindexmax > mesh->tricount ) triindexmax = mesh->tricount; /* Store vertex triangle references and accumulate boundary quadrics */ buildrefcount = 0; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); for( ; triindex < triindexmax ; triindex++, tri = ADDRESS( tri, mesh->trisize ) ) { for( i = 0 ; i < 3 ; i++ ) { vertex = &mesh->vertexlist[ tri->v[i] ]; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &vertex->atomicowner, -1, tdata->threadid ); mesh->trireflist[ vertex->trirefbase + vertex->trirefcount++ ] = triindex; mmAtomicWrite32( &vertex->atomicowner, -1 ); #else mtSpinLock( &vertex->ownerspinlock ); mesh->trireflist[ vertex->trirefbase + vertex->trirefcount++ ] = triindex; mtSpinUnlock( &vertex->ownerspinlock ); #endif trivertex[i] = vertex; } if( !( mesh->operationflags & MD_FLAGS_NO_DECIMATION ) ) mdMeshAccumBoundaryEdges( mesh, tri, trivertex ); buildrefcount++; tdata->statusbuildrefcount = buildrefcount; } return; } /* Mesh clean up */ static void mdMeshEnd( mdMesh *mesh ) { #ifndef MD_CONFIG_ATOMIC_SUPPORT mdi index; mdVertex *vertex; vertex = mesh->vertexlist; for( index = 0 ; index < mesh->vertexcount ; index++, vertex++ ) mtSpinDestroy( &vertex->ownerspinlock ); mtSpinDestroy( &mesh->trirefspinlock ); mtSpinDestroy( &mesh->globalvertexspinlock ); #endif mmAlignFree( mesh->vertexlist ); free( mesh->trireflist ); free( mesh->trilist ); return; } //// static void mdSortOp( mdMesh *mesh, mdThreadData *tdata, mdOp *op, int denyflag ) { mdf collapsecost; collapsecost = op->value + op->penalty; if( ( denyflag ) || ( collapsecost > mesh->maxcollapsecost ) ) { #if MD_CONFIG_ATOMIC_SUPPORT if( !( mmAtomicRead32( &op->flags ) & MD_OP_FLAGS_DETACHED ) ) { mmBinSortRemove( tdata->binsort, op, op->collapsecost ); mmAtomicOr32( &op->flags, MD_OP_FLAGS_DETACHED ); } #else mtSpinLock( &op->spinlock ); if( !( op->flags & MD_OP_FLAGS_DETACHED ) ) { mmBinSortRemove( tdata->binsort, op, op->collapsecost ); op->flags |= MD_OP_FLAGS_DETACHED; } mtSpinUnlock( &op->spinlock ); #endif } else { #if MD_CONFIG_ATOMIC_SUPPORT if( mmAtomicRead32( &op->flags ) & MD_OP_FLAGS_DETACHED ) { mmBinSortAdd( tdata->binsort, op, collapsecost ); mmAtomicAnd32( &op->flags, ~MD_OP_FLAGS_DETACHED ); } else if( op->collapsecost != collapsecost ) mmBinSortUpdate( tdata->binsort, op, op->collapsecost, collapsecost ); #else mtSpinLock( &op->spinlock ); if( op->flags & MD_OP_FLAGS_DETACHED ) { mmBinSortAdd( tdata->binsort, op, collapsecost ); op->flags &= ~MD_OP_FLAGS_DETACHED; } else if( op->collapsecost != collapsecost ) mmBinSortUpdate( tdata->binsort, op, op->collapsecost, collapsecost ); mtSpinUnlock( &op->spinlock ); #endif op->collapsecost = collapsecost; } return; } static void mdUpdateOp( mdMesh *mesh, mdThreadData *tdata, mdOp *op, int32_t opflagsmask ) { int denyflag, flags; #if MD_CONFIG_ATOMIC_SUPPORT for( ; ; ) { flags = mmAtomicRead32( &op->flags ); if( mmAtomicCmpReplace32( &op->flags, flags, flags & opflagsmask ) ) break; } #else mtSpinLock( &op->spinlock ); flags = op->flags; op->flags &= opflagsmask; mtSpinUnlock( &op->spinlock ); #endif if( !( flags & MD_OP_FLAGS_UPDATE_NEEDED ) ) return; if( flags & MD_OP_FLAGS_DELETED ) return; if( flags & MD_OP_FLAGS_DELETION_PENDING ) { if( !( flags & MD_OP_FLAGS_DETACHED ) ) mmBinSortRemove( tdata->binsort, op, op->collapsecost ); /* Race condition, flag the op as deleted but don't free it ~ Free them all at the end with FreeAll(). */ /* mmBlockFree( &tdata->opblock, op ); */ #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicOr32( &op->flags, MD_OP_FLAGS_DELETED ); #else mtSpinLock( &op->spinlock ); op->flags |= MD_OP_FLAGS_DELETED; mtSpinUnlock( &op->spinlock ); #endif } else { op->penalty = mdEdgeCollapsePenalty( mesh, tdata, op->v0, op->v1, op->collapsepoint, &denyflag ); mdSortOp( mesh, tdata, op, denyflag ); } return; } static void mdUpdateBufferOps( mdMesh *mesh, mdThreadData *tdata, mdUpdateBuffer *updatebuffer, mdLockBuffer *lockbuffer ) { int index; mdOp *op; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &updatebuffer->atomlock, 0x0, 0x1 ); #else mtSpinLock( &updatebuffer->spinlock ); #endif for( index = 0 ; index < updatebuffer->opcount ; index++ ) { op = updatebuffer->opbuffer[index]; if( mdOpResolveLockEdgeTry( mesh, tdata, lockbuffer, op ) ) { mdUpdateOp( mesh, tdata, op, ~( MD_OP_FLAGS_UPDATE_QUEUED | MD_OP_FLAGS_UPDATE_NEEDED ) ); mdLockBufferUnlockAll( mesh, tdata, lockbuffer ); } else { #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &updatebuffer->atomlock, 0x0 ); #else mtSpinUnlock( &updatebuffer->spinlock ); #endif mdOpResolveLockEdge( mesh, tdata, lockbuffer, op ); mdUpdateOp( mesh, tdata, op, ~( MD_OP_FLAGS_UPDATE_QUEUED | MD_OP_FLAGS_UPDATE_NEEDED ) ); mdLockBufferUnlockAll( mesh, tdata, lockbuffer ); #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicSpin32( &updatebuffer->atomlock, 0x0, 0x1 ); #else mtSpinLock( &updatebuffer->spinlock ); #endif } } updatebuffer->opcount = 0; #if MD_CONFIG_ATOMIC_SUPPORT mmAtomicWrite32( &updatebuffer->atomlock, 0x0 ); #else mtSpinUnlock( &updatebuffer->spinlock ); #endif return; } static int mdMeshProcessQueue( mdMesh *mesh, mdThreadData *tdata ) { int index, decimationcount, stepindex; int32_t opflags; mdf maxcost; mdOp *op; mdLockBuffer lockbuffer; #if DEBUG_LIMIT int limit = DEBUG_LIMIT; #endif mdLockBufferInit( &lockbuffer, 2 ); stepindex = 0; maxcost = 0.0; decimationcount = 0; for( ; ; ) { /* Update all ops flagged as requiring update */ if( mesh->operationflags & MD_FLAGS_CONTINUOUS_UPDATE ) { for( index = 0 ; index < mesh->updatebuffercount ; index++ ) mdUpdateBufferOps( mesh, tdata, &tdata->updatebuffer[index], &lockbuffer ); } /* Acquire first op */ op = mmBinSortGetFirstItem( tdata->binsort, maxcost ); if( !( op ) ) { if( ++stepindex >= mesh->syncstepcount ) break; maxcost = mesh->maxcollapsecost * ( (mdf)stepindex / (mdf)mesh->syncstepcount ); mdBarrierSync( &mesh->workbarrier ); /* Update all ops flagged as requiring update */ if( !( mesh->operationflags & MD_FLAGS_CONTINUOUS_UPDATE ) ) { for( index = 0 ; index < mesh->updatebuffercount ; index++ ) mdUpdateBufferOps( mesh, tdata, &tdata->updatebuffer[index], &lockbuffer ); } continue; } #if DEBUG_VERBOSE || DEBUG_VERBOSE_COST #if MD_CONFIG_ATOMIC_SUPPORT printf( "Op %p ; Edge %d,%d (0x%x) ; Point %f %f %f ; Value %f ; Penalty %f ; Cost %f\n", op, op->v0, op->v1, mmAtomicRead32( &op->flags ), op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->value, op->penalty, op->collapsecost ); #else printf( "Op %p ; Edge %d,%d (0x%x) ; Point %f %f %f ; Value %f ; Penalty %f ; Cost %f\n", op, op->v0, op->v1, op->flags, op->collapsepoint[0], op->collapsepoint[1], op->collapsepoint[2], op->value, op->penalty, op->collapsecost ); #endif #endif /* Acquire lock for op edge and all trirefs vertices */ mdOpResolveLockFull( mesh, tdata, &lockbuffer, op ); /* If our op was flagged for update between mdUpdateBufferOps() and before we acquired lock, no big deal, catch the update */ #if MD_CONFIG_ATOMIC_SUPPORT opflags = mmAtomicRead32( &op->flags ); #else mtSpinLock( &op->spinlock ); opflags = op->flags; mtSpinUnlock( &op->spinlock ); #endif if( opflags & MD_OP_FLAGS_UPDATE_NEEDED ) { mdLockBufferUnlockAll( mesh, tdata, &lockbuffer ); mdUpdateOp( mesh, tdata, op, ~MD_OP_FLAGS_UPDATE_NEEDED ); continue; } #if DEBUG_PENALTY_CHECK int denyflag; mdf penalty; penalty = mdEdgeCollapsePenalty( mesh, tdata, op->v0, op->v1, op->collapsepoint, &denyflag ); if( fabs( penalty - op->penalty ) > 0.001 * fmax( penalty, op->penalty ) ) printf( "CRAP : %f %f\n", penalty, op->penalty ); #endif if( !( mdEdgeCollisionCheck( mesh, tdata, op->v0, op->v1 ) ) ) { #if MD_CONFIG_ATOMIC_SUPPORT if( mmAtomicRead32( &op->flags ) & MD_OP_FLAGS_DETACHED ) MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); mmAtomicOr32( &op->flags, MD_OP_FLAGS_DETACHED ); mmBinSortRemove( tdata->binsort, op, op->collapsecost ); #else mtSpinLock( &op->spinlock ); if( op->flags & MD_OP_FLAGS_DETACHED ) MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); op->flags |= MD_OP_FLAGS_DETACHED; mtSpinUnlock( &op->spinlock ); mmBinSortRemove( tdata->binsort, op, op->collapsecost ); #endif goto opdone; } mdEdgeCollapse( mesh, tdata, op->v0, op->v1, op->collapsepoint ); decimationcount++; #if DEBUG_LIMIT if( decimationcount >= limit ) break; #endif opdone: /* Release all locks for op */ mdLockBufferUnlockAll( mesh, tdata, &lockbuffer ); } mdLockBufferEnd( &lockbuffer ); #if DEBUG_VERBOSE printf( "Final Count of Collapses : %d\n", decimationcount ); #endif return decimationcount; } ////// typedef struct { mdf normal[3]; mdf factor[3]; } mdTriNormal; static mdi mdMeshPackCountTriangles( mdMesh *mesh ) { mdi tricount; mdTriangle *tri, *triend; tricount = 0; tri = mesh->trilist; triend = ADDRESS( tri, mesh->tricount * mesh->trisize ); for( ; tri < triend ; tri = ADDRESS( tri, mesh->trisize ) ) { if( tri->v[0] == -1 ) continue; tri->u.redirectindex = tricount; tricount++; } mesh->tripackcount = tricount; return tricount; } static mdf mdMeshAngleFactor( mdf dotangle ) { mdf factor; if( dotangle >= 1.0 ) factor = 0.0; else if( dotangle <= -1.0 ) factor = 0.5 * M_PI; else { factor = mdfacos( dotangle ); if( isnan( factor ) ) factor = 0.0; } return factor; } static void mdMeshBuildTriangleNormals( mdMesh *mesh ) { mdTriangle *tri, *triend; mdVertex *vertex0, *vertex1, *vertex2; mdf vecta[3], vectb[3], vectc[3], normalfactor, magna, magnb, magnc, norm, norminv; mdTriNormal *trinormal; trinormal = mesh->trinormal; normalfactor = 1.0; if( mesh->operationflags & MD_FLAGS_TRIANGLE_WINDING_CCW ) normalfactor = -1.0; tri = mesh->trilist; triend = ADDRESS( tri, mesh->tricount * mesh->trisize ); for( ; tri < triend ; tri = ADDRESS( tri, mesh->trisize ) ) { if( tri->v[0] == -1 ) continue; /* Compute triangle normal */ vertex0 = &mesh->vertexlist[ tri->v[0] ]; vertex1 = &mesh->vertexlist[ tri->v[1] ]; vertex2 = &mesh->vertexlist[ tri->v[2] ]; MD_VectorSubStore( vecta, vertex1->point, vertex0->point ); MD_VectorSubStore( vectb, vertex2->point, vertex0->point ); MD_VectorCrossProduct( trinormal->normal, vectb, vecta ); norm = mdfsqrt( MD_VectorDotProduct( trinormal->normal, trinormal->normal ) ); if( norm ) { norminv = normalfactor / norm; trinormal->normal[0] *= norminv; trinormal->normal[1] *= norminv; trinormal->normal[2] *= norminv; } MD_VectorSubStore( vectc, vertex2->point, vertex1->point ); magna = MD_VectorMagnitude( vecta ); magnb = MD_VectorMagnitude( vectb ); magnc = MD_VectorMagnitude( vectc ); trinormal->factor[0] = norm * mdMeshAngleFactor( MD_VectorDotProduct( vecta, vectb ) / ( magna * magnb ) ); trinormal->factor[1] = norm * mdMeshAngleFactor( -MD_VectorDotProduct( vecta, vectc ) / ( magna * magnc ) ); trinormal->factor[2] = norm * mdMeshAngleFactor( MD_VectorDotProduct( vectb, vectc ) / ( magnb * magnc ) ); trinormal++; } return; } static int mdMeshVertexComputeNormal( mdMesh *mesh, mdi vertexindex, mdi *trireflist, int trirefcount, mdf *normal ) { int index, pivot, validflag; mdi triindex; mdf norm, norminv; mdTriangle *tri; mdTriNormal *trinormal, *tn; trinormal = mesh->trinormal; /* Loop through all trirefs associated with the vertex */ validflag = 0; MD_VectorZero( normal ); for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; if( triindex == -1 ) continue; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; if( tri->v[0] == vertexindex ) pivot = 0; else if( tri->v[1] == vertexindex ) pivot = 1; else if( tri->v[2] == vertexindex ) pivot = 2; else { MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); continue; } tn = &trinormal[ tri->u.redirectindex ]; MD_VectorAddMulScalar( normal, tn->normal, tn->factor[pivot] ); validflag = 1; } if( !( validflag ) ) return 0; norm = mdfsqrt( MD_VectorDotProduct( normal, normal ) ); if( norm ) { norminv = 1.0 / norm; normal[0] *= norminv; normal[1] *= norminv; normal[2] *= norminv; } return 1; } static mdi mdMeshCloneVertex( mdMesh *mesh, mdi cloneindex, mdf *point ) { mdi vertexindex, retindex; mdVertex *vertex; retindex = -1; vertex = &mesh->vertexlist[ mesh->clonesearchindex ]; for( vertexindex = mesh->clonesearchindex ; vertexindex < mesh->vertexalloc ; vertexindex++, vertex++ ) { if( ( vertexindex < mesh->vertexcount ) && ( vertex->trirefcount ) ) continue; vertex->trirefcount = -1; vertex->redirectindex = -1; /* Copy the point from the cloned vertex */ MD_VectorCopy( vertex->point, point ); /* Copy custom vertex attributes, if any */ if( mesh->vertexcopy ) mesh->vertexcopy( mesh->copycontext, vertexindex, cloneindex ); retindex = vertexindex; if( vertexindex >= mesh->vertexcount ) mesh->vertexcount = vertexindex+1; break; } mesh->clonesearchindex = vertexindex; return retindex; } static void mdMeshVertexRedirectTriRefs( mdMesh *mesh, mdi vertexindex, mdi newvertexindex, mdi *trireflist, int trirefcount ) { int index; mdi triindex; mdTriangle *tri; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; if( triindex == -1 ) continue; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; if( tri->v[0] == vertexindex ) tri->v[0] = newvertexindex; else if( tri->v[1] == vertexindex ) tri->v[1] = newvertexindex; else if( tri->v[2] == vertexindex ) tri->v[2] = newvertexindex; else MD_ERROR( "SHOULD NOT HAPPEN %s:%d\n", 1, __FILE__, __LINE__ ); } return; } /* Find a target normal */ static int mdMeshVertexFindTarget( mdMesh *mesh, mdi *trireflist, int trirefcount, mdf **targetnormal ) { int i0, i1; mdi triindex0, triindex1; mdf dotangle, bestdotangle; mdTriangle *tri0, *tri1; mdTriNormal *trinormal, *tn0, *tn1; /* Of all triangles, find the most diverging pair, pick one */ targetnormal[0] = 0; targetnormal[1] = 0; trinormal = mesh->trinormal; bestdotangle = mesh->normalsearchangle; for( i0 = 0 ; i0 < trirefcount ; i0++ ) { triindex0 = trireflist[ i0 ]; if( triindex0 == -1 ) continue; tri0 = ADDRESS( mesh->trilist, triindex0 * mesh->trisize ); if( tri0->v[0] == -1 ) continue; tn0 = &trinormal[ tri0->u.redirectindex ]; for( i1 = i0+1 ; i1 < trirefcount ; i1++ ) { triindex1 = trireflist[ i1 ]; if( triindex1 == -1 ) continue; tri1 = ADDRESS( mesh->trilist, triindex1 * mesh->trisize ); if( tri1->v[0] == -1 ) continue; tn1 = &trinormal[ tri1->u.redirectindex ]; dotangle = MD_VectorDotProduct( tn0->normal, tn1->normal ); if( dotangle < bestdotangle ) { bestdotangle = dotangle; targetnormal[0] = tn0->normal; targetnormal[1] = tn1->normal; } } } return ( targetnormal[0] != 0 ); } #define MD_MESH_TRIREF_MAX (256) static int mdMeshVertexBuildNormal( mdMesh *mesh, mdi vertexindex, mdi *trireflist, int trirefcount, mdf *point, mdf *normal ) { int index, trirefbuffercount; mdi triindex, newvertexindex; mdi trirefbuffer[MD_MESH_TRIREF_MAX]; mdf dotangle0, dotangle1; mdf *newnormal, *targetnormal[2]; mdTriangle *tri; mdTriNormal *trinormal, *tn; if( trirefcount > MD_MESH_TRIREF_MAX ) return 1; /* Loop to repeat as we retire trirefs from the list */ trinormal = mesh->trinormal; for( ; ; ) { /* Compute normal for vertex */ if( !( mdMeshVertexComputeNormal( mesh, vertexindex, trireflist, trirefcount, normal ) ) ) return 0; /* If user doesn't allow vertex splitting, take the normal as it is */ if( !( mesh->operationflags & MD_FLAGS_NORMAL_VERTEX_SPLITTING ) ) break; /* Find a pair of target normals */ if( !( mdMeshVertexFindTarget( mesh, trireflist, trirefcount, targetnormal ) ) ) break; /* Find all trirefs that agree with targetnormal[1] and store them independently */ trirefbuffercount = 0; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; if( triindex == -1 ) continue; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; tn = &trinormal[ tri->u.redirectindex ]; dotangle1 = MD_VectorDotProduct( targetnormal[1], tn->normal ); if( dotangle1 < mesh->normalsearchangle ) continue; dotangle0 = MD_VectorDotProduct( targetnormal[0], tn->normal ); if( dotangle0 > dotangle1 ) continue; trirefbuffer[trirefbuffercount++] = triindex; trireflist[ index ] = -1; } if( !( trirefbuffercount ) ) break; /* Find an unused vertex, bail out if none can be found */ newvertexindex = mdMeshCloneVertex( mesh, vertexindex, point ); if( newvertexindex == -1 ) break; /* Correct all trirefs to new vertex */ mdMeshVertexRedirectTriRefs( mesh, vertexindex, newvertexindex, trirefbuffer, trirefbuffercount ); /* Spawn a new vertex */ newnormal = ADDRESS( mesh->vertexnormal, newvertexindex * 3 * sizeof(mdf) ); mdMeshVertexBuildNormal( mesh, newvertexindex, trirefbuffer, trirefbuffercount, point, newnormal ); } return 1; } /* In some rare circumstances, a vertex can be unused even with redirectindex == -1 and trirefs leading to deleted triangles */ static int mdMeshVertexCheckUse( mdMesh *mesh, mdi *trireflist, int trirefcount ) { int index; mdi triindex; mdTriangle *tri; for( index = 0 ; index < trirefcount ; index++ ) { triindex = trireflist[ index ]; if( triindex == -1 ) continue; tri = ADDRESS( mesh->trilist, triindex * mesh->trisize ); if( tri->v[0] == -1 ) continue; return 1; } return 0; } static void mdMeshWriteVertices( mdMesh *mesh ) { mdi vertexindex, writeindex; mdf *point; mdVertex *vertex; point = mesh->point; writeindex = 0; vertex = mesh->vertexlist; for( vertexindex = 0 ; vertexindex < mesh->vertexcount ; vertexindex++, vertex++ ) { if( !( mesh->operationflags & MD_FLAGS_NO_VERTEX_PACKING ) ) { if( vertex->redirectindex != -1 ) continue; if( !( vertex->trirefcount ) ) continue; if( ( vertex->trirefcount != -1 ) && !( mdMeshVertexCheckUse( mesh, &mesh->trireflist[ vertex->trirefbase ], vertex->trirefcount ) ) ) continue; } vertex->redirectindex = writeindex; mesh->vertexNativeToUser( point, vertex->point ); if( ( mesh->vertexcopy ) && ( writeindex != vertexindex ) ) mesh->vertexcopy( mesh->copycontext, writeindex, vertexindex ); point = ADDRESS( point, mesh->pointstride ); writeindex++; } mesh->vertexpackcount = writeindex; if( mesh->operationflags & MD_FLAGS_NO_VERTEX_PACKING ) mesh->vertexpackcount = mesh->vertexcount; return; } static void mdMeshWriteIndices( mdMesh *mesh ) { mdi finaltricount, v[3]; mdTriangle *tri, *triend; mdVertex *vertex0, *vertex1, *vertex2; void *indices, *tridata; indices = mesh->indices; finaltricount = 0; tri = mesh->trilist; triend = ADDRESS( tri, mesh->tricount * mesh->trisize ); tridata = mesh->tridata; for( ; tri < triend ; tri = ADDRESS( tri, mesh->trisize ), tridata = ADDRESS( tridata, mesh->tridatasize ) ) { if( tri->v[0] == -1 ) continue; vertex0 = &mesh->vertexlist[ tri->v[0] ]; v[0] = vertex0->redirectindex; vertex1 = &mesh->vertexlist[ tri->v[1] ]; v[1] = vertex1->redirectindex; vertex2 = &mesh->vertexlist[ tri->v[2] ]; v[2] = vertex2->redirectindex; mesh->indicesNativeToUser( indices, v ); if( mesh->tridatasize ) memcpy( tridata, ADDRESS(tri,sizeof(mdTriangle)), mesh->tridatasize ); indices = ADDRESS( indices, mesh->indicesstride ); finaltricount++; } mesh->tripackcount = finaltricount; return; } /* Write vertices and indices, recompute normals, store them along with vertices and indices at once */ static void mdMeshWriteVerticesAndNormals( mdMesh *mesh ) { mdi vertexindex, writeindex; mdf *point, *normal; mdVertex *vertex; void (*writenormal)( void *dst, mdf *src ); void *normaldst; /* Start search for free vertices to clone at 0 */ mesh->clonesearchindex = 0; mesh->trinormal = malloc( mesh->tripackcount * sizeof(mdTriNormal) ); mesh->vertexnormal = malloc( mesh->vertexalloc * 3 * sizeof(normal) ); /* Count triangles and assign redirectindex to each in sequence */ mdMeshPackCountTriangles( mesh ); /* Build up mesh->trinormal, store normals, area and vertex angles of each triangle */ mdMeshBuildTriangleNormals( mesh ); /* Build each vertex normal */ vertex = mesh->vertexlist; for( vertexindex = 0 ; vertexindex < mesh->vertexcount ; vertexindex++, vertex++ ) { if( !( vertex->trirefcount ) || ( vertex->trirefcount == -1 ) ) continue; normal = ADDRESS( mesh->vertexnormal, vertexindex * 3 * sizeof(mdf) ); if( !( mdMeshVertexBuildNormal( mesh, vertexindex, &mesh->trireflist[ vertex->trirefbase ], vertex->trirefcount, vertex->point, normal ) ) ) vertex->trirefcount = 0; } /* Write vertices along with normals and other attributes */ writenormal = mesh->writenormal; point = mesh->point; writeindex = 0; vertex = mesh->vertexlist; for( vertexindex = 0 ; vertexindex < mesh->vertexcount ; vertexindex++, vertex++ ) { if( !( mesh->operationflags & MD_FLAGS_NO_VERTEX_PACKING ) ) { if( vertex->redirectindex != -1 ) continue; if( !( vertex->trirefcount ) ) continue; } vertex->redirectindex = writeindex; mesh->vertexNativeToUser( point, vertex->point ); normal = ADDRESS( mesh->vertexnormal, vertexindex * 3 * sizeof(mdf) ); normaldst = ADDRESS( mesh->normalbase, writeindex * mesh->normalstride ); writenormal( normaldst, normal ); if( ( mesh->vertexcopy ) && ( writeindex != vertexindex ) ) mesh->vertexcopy( mesh->copycontext, writeindex, vertexindex ); point = ADDRESS( point, mesh->pointstride ); writeindex++; } mesh->vertexpackcount = writeindex; if( mesh->operationflags & MD_FLAGS_NO_VERTEX_PACKING ) mesh->vertexpackcount = mesh->vertexcount; free( mesh->vertexnormal ); free( mesh->trinormal ); return; } ////// typedef struct { int threadid; mdMesh *mesh; long deletioncount; long collisioncount; long decimationcount; mdThreadData *tdata; int stage; } mdThreadInit; #ifndef MD_CONFIG_ATOMIC_SUPPORT int mdFreeOpCallback( void *chunk, void *userpointer ) { mdOp *op; op = chunk; mtSpinDestroy( &op->spinlock ); return 0; } #endif static void *mdThreadMain( void *value ) { int index, tribase, trimax, triperthread, nodeindex; int groupthreshold; mdThreadInit *tinit; mdThreadData tdata; mdMesh *mesh; tinit = value; mesh = tinit->mesh; tinit->tdata = &tdata; /* Thread memory initialization */ tdata.threadid = tinit->threadid; tdata.statusbuildtricount = 0; tdata.statusbuildrefcount = 0; tdata.statuspopulatecount = 0; tdata.statusdeletioncount = 0; tdata.statuscollisioncount = 0; groupthreshold = mesh->tricount >> 9; if( groupthreshold < 256 ) groupthreshold = 256; nodeindex = -1; if( mmcontext.numaflag ) { mmThreadBindToCpu( tdata.threadid ); nodeindex = mmCpuGetNode( tdata.threadid ); mmBlockNodeInit( &tdata.opblock, nodeindex, sizeof(mdOp), 16384, 16384, MD_CONF_OP_ALIGNMENT ); } else mmBlockInit( &tdata.opblock, sizeof(mdOp), 16384, 16384, MD_CONF_OP_ALIGNMENT ); tdata.binsort = mmBinSortInit( offsetof(mdOp,list), 64, 16, -0.2 * mesh->maxcollapsecost, 1.2 * mesh->maxcollapsecost, groupthreshold, mdMeshOpValueCallback, 6, nodeindex ); for( index = 0 ; index < mesh->updatebuffercount ; index++ ) mdUpdateBufferInit( &tdata.updatebuffer[index], 4096 ); /* Wait until all threads have properly initialized */ if( mesh->updatestatusflag ) mdBarrierSync( &mesh->globalbarrier ); /* Build mesh step 1 */ if( !( tdata.threadid ) ) tinit->stage = MD_STATUS_STAGE_BUILDVERTICES; mdMeshInitVertices( mesh, &tdata, mesh->threadcount ); mdBarrierSync( &mesh->workbarrier ); /* Build mesh step 2 */ if( !( tdata.threadid ) ) tinit->stage = MD_STATUS_STAGE_BUILDTRIANGLES; mdMeshInitTriangles( mesh, &tdata, mesh->threadcount ); mdBarrierSync( &mesh->globalbarrier ); /* Build mesh step 3 is not parallel, have the main thread run it */ if( !( tdata.threadid ) ) tinit->stage = MD_STATUS_STAGE_BUILDTRIREFS; mdBarrierSync( &mesh->globalbarrier ); /* Build mesh step 4 */ mdMeshBuildTrirefs( mesh, &tdata, mesh->threadcount ); mdBarrierSync( &mesh->workbarrier ); if( !( mesh->operationflags & MD_FLAGS_NO_DECIMATION ) ) { /* Initialize the thread's op queue */ if( !( tdata.threadid ) ) tinit->stage = MD_STATUS_STAGE_BUILDQUEUE; triperthread = ( mesh->tricount / mesh->threadcount ) + 1; tribase = tdata.threadid * triperthread; trimax = tribase + triperthread; if( trimax > mesh->tricount ) trimax = mesh->tricount; /* Initialize a list of ops for all edges */ mdMeshPopulateOpList( mesh, &tdata, tribase, trimax - tribase ); /* Wait for all threads to reach this point */ mdBarrierSync( &mesh->workbarrier ); /* Process the thread's op queue */ if( !( tdata.threadid ) ) tinit->stage = MD_STATUS_STAGE_DECIMATION; tinit->decimationcount = mdMeshProcessQueue( mesh, &tdata ); } /* Wait for all threads to reach this point */ tinit->deletioncount = tdata.statusdeletioncount; tinit->collisioncount = tdata.statuscollisioncount; mdBarrierSync( &mesh->globalbarrier ); /* If we didn't use atomic operations, we have spinlocks to destroy in each op */ #ifndef MD_CONFIG_ATOMIC_SUPPORT mmBlockProcessList( &tdata.opblock, 0, mdFreeOpCallback ); #endif /* Free thread memory allocations */ mmBlockFreeAll( &tdata.opblock ); for( index = 0 ; index < mesh->updatebuffercount ; index++ ) mdUpdateBufferEnd( &tdata.updatebuffer[index] ); mmBinSortFree( tdata.binsort ); return 0; } ////////// static const char *mdStatusStageName[] = { [MD_STATUS_STAGE_INIT] = "Initializing", [MD_STATUS_STAGE_BUILDVERTICES] = "Building Vertices", [MD_STATUS_STAGE_BUILDTRIANGLES] = "Building Triangles", [MD_STATUS_STAGE_BUILDTRIREFS] = "Building Trirefs", [MD_STATUS_STAGE_BUILDQUEUE] = "Building Queues", [MD_STATUS_STAGE_DECIMATION] = "Decimating Mesh", [MD_STATUS_STAGE_STORE] = "Storing Geometry", [MD_STATUS_STAGE_DONE] = "Done" }; static double mdStatusStageProgress[] = { [MD_STATUS_STAGE_INIT] = 0.0, [MD_STATUS_STAGE_BUILDVERTICES] = 2.0, [MD_STATUS_STAGE_BUILDTRIANGLES] = 6.0, [MD_STATUS_STAGE_BUILDTRIREFS] = 6.0, [MD_STATUS_STAGE_BUILDQUEUE] = 8.0, [MD_STATUS_STAGE_DECIMATION] = 75.0, [MD_STATUS_STAGE_STORE] = 3.0, [MD_STATUS_STAGE_DONE] = 0.0 }; static void mdUpdateStatus( mdMesh *mesh, mdThreadInit *threadinit, int stage, mdStatus *status ) { int threadid, stageindex; long buildtricount, buildrefcount, populatecount, deletioncount; double progress, subprogress; mdThreadInit *tinit; mdThreadData *tdata; subprogress = 0.0; if( !( threadinit ) ) status->stage = stage; else { tinit = &threadinit[0]; status->stage = tinit->stage; tdata = tinit->tdata; if( (unsigned)status->stage >= MD_STATUS_STAGE_COUNT ) return; buildtricount = 0; buildrefcount = 0; populatecount = 0; deletioncount = 0; for( threadid = 0 ; threadid < mesh->threadcount ; threadid++ ) { tinit = &threadinit[threadid]; tdata = tinit->tdata; buildtricount += tdata->statusbuildtricount; buildrefcount += tdata->statusbuildrefcount; populatecount += tdata->statuspopulatecount; deletioncount += tdata->statusdeletioncount; } status->trianglecount = mesh->tricount - deletioncount; if( status->stage == MD_STATUS_STAGE_DECIMATION ) subprogress = 1.0 - ( (double)status->trianglecount / (double)mesh->tricount ); else if( status->stage == MD_STATUS_STAGE_BUILDQUEUE ) subprogress = (double)populatecount / (double)mesh->tricount; else if( status->stage == MD_STATUS_STAGE_BUILDTRIANGLES ) subprogress = (double)buildtricount / (double)mesh->tricount; else if( status->stage == MD_STATUS_STAGE_BUILDTRIREFS ) subprogress = (double)buildrefcount / (double)mesh->tricount; subprogress = fmax( 0.0, fmin( 1.0, subprogress ) ); } progress = 0.0; status->stagename = mdStatusStageName[status->stage]; for( stageindex = 0 ; stageindex < status->stage ; stageindex++ ) progress += mdStatusStageProgress[stageindex]; progress += subprogress * mdStatusStageProgress[status->stage]; if( progress > status->progress ) status->progress = progress; return; } ////////// void mdOperationInit( mdOperation *op ) { /* Input */ memset( op, 0, sizeof(mdOperation) ); /* Advanced settings, default values */ op->compactnesstarget = MD_COLLAPSE_COST_COMPACT_TARGET; op->compactnesspenalty = MD_COLLAPSE_COST_COMPACT_FACTOR; op->boundaryweight = MD_BOUNDARY_WEIGHT; op->syncstepcount = MD_SYNC_STEP_COUNT; op->normalsearchangle = 45.0; mmInit(); if( mmcontext.sysmemory ) { op->maxmemorysize = ( mmcontext.sysmemory >> 1 ) + ( mmcontext.sysmemory >> 2 ); /* By default, allow to allocate up to 75% of system memory */ if( op->maxmemorysize < 1024*1024*1024 ) op->maxmemorysize = 1024*1024*1024; } return; } void mdOperationData( mdOperation *op, size_t vertexcount, void *vertex, int vertexformat, size_t vertexstride, size_t tricount, void *indices, int indicesformat, size_t indicesstride ) { op->vertexcount = vertexcount; op->vertex = vertex; op->vertexformat = vertexformat; op->vertexstride = vertexstride; op->indices = indices; op->indicesformat = indicesformat; op->indicesstride = indicesstride; op->tricount = tricount; return; } void mdOperationStrength( mdOperation *op, double decimationstrength ) { /* pow( decimationstrength/4.0, 6.0 ) */ decimationstrength *= 0.25; decimationstrength *= decimationstrength; op->decimationstrength = decimationstrength * decimationstrength * decimationstrength; return; } void mdOperationTriData( mdOperation *op, void *tridata, size_t tridatasize, double (*edgeweight)( void *tridata0, void *tridata1 ) ) { op->tridata = tridata; op->tridatasize = tridatasize; op->edgeweight = edgeweight; return; } void mdOperationVertexCopy( mdOperation *op, void (*vertexcopy)( void *copycontext, int dstindex, int srcindex ), void *copycontext ) { op->vertexcopy = vertexcopy; op->copycontext = copycontext; return; } void mdOperationVertexMerge( mdOperation *op, void (*vertexmerge)( void *mergecontext, int dstindex, int srcindex, double dstfactor, double srcfactor ), void *mergecontext ) { op->vertexmerge = vertexmerge; op->mergecontext = mergecontext; return; } void mdOperationComputeNormals( mdOperation *op, void *base, int format, size_t stride ) { op->normalbase = base; op->normalformat = format; op->normalstride = stride; return; } void mdOperationStatusCallback( mdOperation *op, void (*statuscallback)( void *statuscontext, const mdStatus *status ), void *statuscontext, long milliseconds ) { op->statusmilliseconds = milliseconds; op->statuscontext = statuscontext; op->statuscallback = statuscallback; return; } ////// int mdMeshDecimation( mdOperation *operation, int threadcount, int flags ) { int threadid, maxthreadcount, retval; long statuswait; mdMesh mesh; mtThread thread[MD_THREAD_COUNT_MAX]; mdThreadInit threadinit[MD_THREAD_COUNT_MAX]; mdThreadInit *tinit; mdStatus status; #if MD_CONF_ENABLE_PROGRESS long deletioncount; #endif retval = 1; operation->decimationcount = 0; operation->msecs = 0; maxthreadcount = operation->tricount / 1024; if( maxthreadcount > MD_THREAD_COUNT_MAX ) maxthreadcount = MD_THREAD_COUNT_MAX; if( maxthreadcount == 0 ) maxthreadcount = 1; if( threadcount <= 0 ) { threadcount = mmcontext.cpucount; if( threadcount <= 0 ) threadcount = MD_THREAD_COUNT_DEFAULT; } if( threadcount > maxthreadcount ) threadcount = maxthreadcount; /* Get operation general settings */ mesh.point = operation->vertex; mesh.pointstride = operation->vertexstride; mesh.vertexcount = operation->vertexcount; mesh.indices = operation->indices; mesh.indicesstride = operation->indicesstride; mesh.tridata = operation->tridata; mesh.tridatasize = operation->tridatasize; switch( operation->indicesformat ) { case MD_FORMAT_BYTE: case MD_FORMAT_UBYTE: mesh.indicesUserToNative = mdIndicesCharToNative; mesh.indicesNativeToUser = mdIndicesNativeToChar; break; case MD_FORMAT_SHORT: case MD_FORMAT_USHORT: mesh.indicesUserToNative = mdIndicesShortToNative; mesh.indicesNativeToUser = mdIndicesNativeToShort; break; case MD_FORMAT_INT: case MD_FORMAT_UINT: mesh.indicesUserToNative = mdIndicesIntToNative; mesh.indicesNativeToUser = mdIndicesNativeToInt; break; case MD_FORMAT_INT8: case MD_FORMAT_UINT8: mesh.indicesUserToNative = mdIndicesInt8ToNative; mesh.indicesNativeToUser = mdIndicesNativeToInt8; break; case MD_FORMAT_INT16: case MD_FORMAT_UINT16: mesh.indicesUserToNative = mdIndicesInt16ToNative; mesh.indicesNativeToUser = mdIndicesNativeToInt16; break; case MD_FORMAT_INT32: case MD_FORMAT_UINT32: mesh.indicesUserToNative = mdIndicesInt32ToNative; mesh.indicesNativeToUser = mdIndicesNativeToInt32; break; case MD_FORMAT_INT64: case MD_FORMAT_UINT64: mesh.indicesUserToNative = mdIndicesInt64ToNative; mesh.indicesNativeToUser = mdIndicesNativeToInt64; break; default: return 0; } switch( operation->vertexformat ) { case MD_FORMAT_FLOAT: mesh.vertexUserToNative = mdVertexFloatToNative; mesh.vertexNativeToUser = mdVertexNativeToFloat; break; case MD_FORMAT_DOUBLE: mesh.vertexUserToNative = mdVertexDoubleToNative; mesh.vertexNativeToUser = mdVertexNativeToDouble; break; case MD_FORMAT_SHORT: mesh.vertexUserToNative = mdVertexShortToNative; mesh.vertexNativeToUser = mdVertexNativeToShort; break; case MD_FORMAT_INT: mesh.vertexUserToNative = mdVertexIntToNative; mesh.vertexNativeToUser = mdVertexNativeToInt; break; case MD_FORMAT_INT16: mesh.vertexUserToNative = mdVertexInt16ToNative; mesh.vertexNativeToUser = mdVertexNativeToInt16; break; case MD_FORMAT_INT32: mesh.vertexUserToNative = mdVertexInt32ToNative; mesh.vertexNativeToUser = mdVertexNativeToInt32; break; default: return 0; } mesh.edgeweight = operation->edgeweight; mesh.vertexmerge = operation->vertexmerge; mesh.mergecontext = operation->mergecontext; mesh.vertexcopy = operation->vertexcopy; mesh.copycontext = operation->copycontext; mesh.tricount = operation->tricount; if( mesh.tricount < 2 ) return 0; mesh.maxcollapsecost = operation->decimationstrength; /* Record start time */ operation->msecs = mmGetMillisecondsTime(); mesh.threadcount = threadcount; mesh.operationflags = flags; /* To compute vertex normals */ mesh.normalbase = operation->normalbase; mesh.normalformat = operation->normalformat; mesh.normalstride = operation->normalstride; if( mesh.normalbase ) { switch( mesh.normalformat ) { case MD_FORMAT_FLOAT: mesh.writenormal = mdVertexNativeToFloat; break; case MD_FORMAT_DOUBLE: mesh.writenormal = mdVertexNativeToDouble; break; case MD_FORMAT_BYTE: mesh.writenormal = mdNormalNativeToChar; break; case MD_FORMAT_SHORT: mesh.writenormal = mdNormalNativeToShort; break; case MD_FORMAT_INT8: mesh.writenormal = mdNormalNativeToInt8; break; case MD_FORMAT_INT16: mesh.writenormal = mdNormalNativeToInt16; break; case MD_FORMAT_INT_2_10_10_10_REV: mesh.writenormal = mdNormalNativeTo10_10_10_2; break; default: return 0; } } /* Advanced configuration options */ mesh.compactnesstarget = operation->compactnesstarget; mesh.compactnesspenalty = operation->compactnesspenalty; mesh.boundaryweight = operation->boundaryweight; mesh.syncstepcount = operation->syncstepcount; if( mesh.syncstepcount < 1 ) mesh.syncstepcount = 1; if( mesh.syncstepcount > 1024 ) mesh.syncstepcount = 1024; mesh.normalsearchangle = cos( 1.0 * operation->normalsearchangle * (M_PI/180.0) ); if( mesh.normalsearchangle > 0.9 ) mesh.normalsearchangle = 0.9; /* Synchronization */ mdBarrierInit( &mesh.workbarrier, threadcount ); mdBarrierInit( &mesh.globalbarrier, threadcount+1 ); /* Determine update buffer shift required, find the count of updatebuffers */ for( mesh.updatebuffershift = 0 ; ( threadcount >> mesh.updatebuffershift ) > MD_THREAD_UPDATE_BUFFER_COUNTMAX ; mesh.updatebuffershift++ ); mesh.updatebuffercount = ( ( threadcount - 1 ) >> mesh.updatebuffershift ) + 1; /* Runtime picking of collapse penalty computation path */ mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangle; #if CPU_SSE_SUPPORT #if !MD_CONF_DOUBLE_PRECISION #if CPU_SSE4_1_SUPPORT mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangleSSE4p1f; #elif CPU_SSE3_SUPPORT mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangleSSE3f; #elif CPU_SSE2_SUPPORT mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangleSSE2f; #endif #else #if CPU_SSE4_1_SUPPORT mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangleSSE4p1d; #elif CPU_SSE3_SUPPORT mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangleSSE3d; #elif CPU_SSE2_SUPPORT mesh.collapsepenalty = mdEdgeCollapsePenaltyTriangleSSE2d; #endif #endif #endif /* Initialize entire mesh storage */ mesh.vertexalloc = operation->vertexalloc; if( mesh.vertexalloc < mesh.vertexcount ) mesh.vertexalloc = mesh.vertexcount; if( !( mdMeshInit( &mesh, operation->maxmemorysize ) ) ) { retval = 0; goto error; } mesh.updatestatusflag = 0; status.progress = 0.0; statuswait = ( operation->statusmilliseconds > 10 ? operation->statusmilliseconds : 10 ); status.trianglecount = 0; if( operation->statuscallback ) { mesh.updatestatusflag = 1; mdUpdateStatus( &mesh, 0, MD_STATUS_STAGE_INIT, &status ); operation->statuscallback( operation->statuscontext, &status ); } /* Launch threads! */ tinit = threadinit; for( threadid = 0 ; threadid < threadcount ; threadid++, tinit++ ) { tinit->threadid = threadid; tinit->mesh = &mesh; tinit->stage = MD_STATUS_STAGE_INIT; mtThreadCreate( &thread[threadid], mdThreadMain, tinit, MT_THREAD_FLAGS_JOINABLE, 0, 0 ); } /* Wait until all threads have properly initialized */ if( mesh.updatestatusflag ) mdBarrierSync( &mesh.globalbarrier ); /* Wait for all threads to reach step 3 */ #if MD_CONF_ENABLE_PROGRESS if( !( mesh.updatestatusflag ) ) mdBarrierSync( &mesh.globalbarrier ); else { for( ; !( mdBarrierSyncTimeout( &mesh.globalbarrier, statuswait ) ) ; ) { mdUpdateStatus( &mesh, threadinit, 0, &status ); operation->statuscallback( operation->statuscontext, &status ); } } #else mdBarrierSync( &mesh.globalbarrier ); #endif /* Build mesh step 3 is not parallel, have the main thread run it */ mdMeshInitTrirefs( &mesh ); /* Wake up all threads */ mdBarrierSync( &mesh.globalbarrier ); /* Wait for all threads to complete */ #if MD_CONF_ENABLE_PROGRESS if( !( mesh.updatestatusflag ) ) mdBarrierSync( &mesh.globalbarrier ); else { for( ; !( mdBarrierSyncTimeout( &mesh.globalbarrier, statuswait ) ) ; ) { mdUpdateStatus( &mesh, threadinit, 0, &status ); operation->statuscallback( operation->statuscontext, &status ); } } deletioncount = 0; for( threadid = 0 ; threadid < threadcount ; threadid++ ) { deletioncount += threadinit[threadid].deletioncount; mtThreadJoin( &thread[threadid] ); } status.trianglecount = mesh.tricount - deletioncount; #else mdBarrierSync( &mesh.globalbarrier ); for( threadid = 0 ; threadid < threadcount ; threadid++ ) { mtThreadJoin( &thread[threadid] ); } #endif /* Count sums of all threads */ operation->decimationcount = 0; operation->collisioncount = 0; tinit = threadinit; for( threadid = 0 ; threadid < threadcount ; threadid++, tinit++ ) { operation->decimationcount += tinit->decimationcount; operation->collisioncount += tinit->collisioncount; } if( mesh.updatestatusflag ) { mdUpdateStatus( &mesh, 0, MD_STATUS_STAGE_STORE, &status ); operation->statuscallback( operation->statuscontext, &status ); } /* Write out the final mesh */ if( ( mesh.normalbase ) && ( mesh.writenormal ) ) mdMeshWriteVerticesAndNormals( &mesh ); else mdMeshWriteVertices( &mesh ); mdMeshWriteIndices( &mesh ); operation->vertexcount = mesh.vertexpackcount; operation->tricount = mesh.tripackcount; if( mesh.updatestatusflag ) { mdUpdateStatus( &mesh, 0, MD_STATUS_STAGE_DONE, &status ); operation->statuscallback( operation->statuscontext, &status ); } /* Requires mmhash.c compiled with MM_HASH_DEBUG_STATISTICS */ #if MM_HASH_DEBUG_STATISTICS { long accesscount, collisioncount, relocationcount; long entrycount, entrycountmax, hashsizemax; mmHashStatistics( mesh.edgehashtable, &accesscount, &collisioncount, &relocationcount, &entrycount, &entrycountmax, &hashsizemax ); printf( "Hash Access : %ld\n", accesscount ); printf( "Hash Collision : %ld\n", collisioncount ); printf( "Hash Relocation : %ld\n", relocationcount ); printf( "Entry Count : %ld\n", entrycount ); printf( "Entry Count Max : %ld\n", entrycountmax ); printf( "Hash Size Max : %ld\n", hashsizemax ); } #endif /* Free all global data */ error: if( !( mesh.operationflags & MD_FLAGS_NO_DECIMATION ) ) mdMeshHashEnd( &mesh ); mdMeshEnd( &mesh ); mdBarrierDestroy( &mesh.workbarrier ); mdBarrierDestroy( &mesh.globalbarrier ); /* Store total processing time */ operation->msecs = mmGetMillisecondsTime() - operation->msecs; return retval; }