#if __SSE4_2__ /* SSE */ #include /* SSE 2 */ #include /* SSE 3 */ #include /* SSSE 3 */ #include /* SSE 4.1 */ #include /* SSE 4.2 */ #include #define CPU_SSE4_2_SUPPORT (1) #endif //// #if CLP_CONFIG_PATHVLIST_EXTRA_ALLOC < 4 #error clpPointInPolygon() requires CLP_CONFIG_PATHVLIST_EXTRA_ALLOC >=4 (guard values without bound checking) #endif /* vertexlist must have valid memory storage beyond 'vertexcount' by CLP_CONFIG_PATHVLIST_EXTRA_ALLOC vertices */ static inline int clpPointInPolygon( const clpPoint64 *pt, int64_t *vertexlist, size_t vertexcount ) { size_t guardindex; int edgeparity; int abovestateflag; double crsl, crsr; int64_t ptx, pty; int64_t *vcur, *vend, *vprev; #if CPU_SSE4_2_SUPPORT uint32_t cmpmask; __m128i vpty, vload0, vload1, vcmp0, vcmp1; #endif if( vertexcount < 3 ) return CLP_POINTINPOLYRESULT_OUTSIDE; ptx = pt->x; pty = pt->y; edgeparity = 0; /* We insert guard values beyond 'vertexcount', to avoid bound checking in loops */ /* CLP_CONFIG_PATHVLIST_EXTRA_ALLOC is a minimum of extra storage beyond 'vertexcount' for all path vertexlists */ /* ( SSE 4.2 path needs CLP_CONFIG_PATHVLIST_EXTRA_ALLOC>=4, scalar path needs CLP_CONFIG_PATHVLIST_EXTRA_ALLOC>=1 ) */ vend = &vertexlist[ vertexcount * 2 ]; for( guardindex = 0 ; guardindex < CLP_CONFIG_PATHVLIST_EXTRA_ALLOC*2 ; guardindex += 2 ) { vend[ guardindex + 0 ] = ptx; vend[ guardindex + 1 ] = pty; } /* Detect orientation above/below Y of last vertex, iterate until nonequal */ for( vprev = vend - 2 ; vprev[1] == pty ; vprev -= 2 ) { if( vprev == vertexlist ) return CLP_POINTINPOLYRESULT_OUTSIDE; } abovestateflag = ( pty > vprev[1] ); #if CPU_SSE4_2_SUPPORT vpty = _mm_set1_epi64x( pty ); #endif for( vcur = vertexlist ; vcur < vend ; vcur += 2 ) { #if CPU_SSE4_2_SUPPORT /* Iterate until we toggle above/below pty, meaning we have a possible intersection */ if( abovestateflag ) { for( ; ; vcur += 8 ) { vload0 = _mm_unpackhi_epi64( _mm_load_si128( (void *)&vcur[0] ), _mm_load_si128( (void *)&vcur[2] ) ); vload1 = _mm_unpackhi_epi64( _mm_load_si128( (void *)&vcur[4] ), _mm_load_si128( (void *)&vcur[6] ) ); vcmp0 = _mm_cmpgt_epi64( vpty, vload0 ); vcmp1 = _mm_cmpgt_epi64( vpty, vload1 ); cmpmask = _mm_movemask_ps( _mm_shuffle_ps( _mm_castsi128_ps( vcmp0 ), _mm_castsi128_ps( vcmp1 ), 0x88 ) ); if( cmpmask != 0xf ) break; } } else { for( ; ; vcur += 8 ) { vload0 = _mm_unpackhi_epi64( _mm_load_si128( (void *)&vcur[0] ), _mm_load_si128( (void *)&vcur[2] ) ); vload1 = _mm_unpackhi_epi64( _mm_load_si128( (void *)&vcur[4] ), _mm_load_si128( (void *)&vcur[6] ) ); vcmp0 = _mm_cmpgt_epi64( vload0, vpty ); vcmp1 = _mm_cmpgt_epi64( vload1, vpty ); cmpmask = _mm_movemask_ps( _mm_shuffle_ps( _mm_castsi128_ps( vcmp0 ), _mm_castsi128_ps( vcmp1 ), 0x88 ) ); if( cmpmask != 0xf ) break; } } vcur += ( ( 0x484c484 >> ( cmpmask + cmpmask ) ) & 3 ) << 1; #else /* Iterate until we toggle above/below pty, meaning we have a possible intersection */ if( abovestateflag ) { for( ; vcur[1] < pty ; vcur += 2 ); } else { for( ; vcur[1] > pty ; vcur += 2 ); } #endif if( vcur >= vend ) break; vprev = vcur - 2; if( vcur == vertexlist ) vprev = vend - 2; if( vcur[1] == pty ) { /* Point either matches contour point, or between two points of horizontal edge */ if( ( vcur[0] == ptx ) || ( ( vcur[1] == vprev[1] ) && ( ( ptx < vprev[0] ) != ( ptx < vcur[0] ) ) ) ) return CLP_POINTINPOLYRESULT_ON; /* If Y of point and contour matches, continue until we get out of that horizontal collinear special case */ continue; } abovestateflag ^= 1; /* We are only interested in edges crossing on the left */ if( ( ptx >= vcur[0] ) || ( ptx >= vprev[0] ) ) { if( ( ptx > vcur[0] ) && ( ptx > vprev[0] ) ) edgeparity ^= 1; else { /* Compare sides of cross product ~ we have only one flop rounding, correctness can not flip around */ crsl = (double)( vcur[0] - vprev[0] ) * (double)( pty - vcur[1] ); crsr = (double)( vcur[1] - vprev[1] ) * (double)( ptx - vcur[0] ); if( crsl == crsr ) return CLP_POINTINPOLYRESULT_ON; edgeparity ^= ( ( crsl < crsr ) ^ abovestateflag ); } } } return ( edgeparity ? CLP_POINTINPOLYRESULT_INSIDE : CLP_POINTINPOLYRESULT_OUTSIDE ); }