/* -----------------------------------------------------------------------------
 *
 * Copyright (c) 2022 Alexis Naveros.
 * Feel free to use however you want.
 *
 * Highly accurate intersection point between two int64_t line segments
 * Input must be within the [-INT64_MAX/2,INT64_MAX/2] range
 *
 * ----------------------------------------------------------------------------- */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <float.h>
#include <math.h>

#if defined(_MSC_VER)
 #include <intrin.h>
#endif

#if defined(__GNUC__) && defined(__SIZEOF_INT128__)
typedef __int128 int128;
typedef unsigned __int128 uint128;
 #define MATH128_NATIVE (1)
#elif defined(_MSC_VER) && 0
typedef struct
{
  uint64_t low;
  uint64_t high;
} uint128;
 #define MATH128_MSVC (1)
#else
 #warning No support path for 128 bits math detected, we will fallback to inaccurate double-based math
 #define MATH128_NONE (1)
#endif

#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
 #define MATH_LIKELY(x) __builtin_expect(!!(x), 1)
 #define MATH_UNLIKELY(x) __builtin_expect(!!(x), 0)
 #define MATH_ALWAYSINLINE __attribute__((always_inline))
#elif defined(_MSC_VER)
 #define MATH_LIKELY(x) (x)
 #define MATH_UNLIKELY(x) (x)
 #define MATH_ALWAYSINLINE __forceinline
#else
 #define MATH_LIKELY(x) (x)
 #define MATH_UNLIKELY(x) (x)
 #define MATH_ALWAYSINLINE
#endif


#if defined(_MSC_VER) && ( defined(_M_AMD64) || defined(_M_X64) )
 /* Work around MSVC being unable to inline the most trivial standard library calls */
 #include <xmmintrin.h>
 #include <emmintrin.h>
 #define fmin(a,b) _mm_cvtsd_f64(_mm_min_sd(_mm_set_sd(a),_mm_set_sd(b)))
 #define fmax(a,b) _mm_cvtsd_f64(_mm_max_sd(_mm_set_sd(a),_mm_set_sd(b)))
 #define nearbyint(a) _mm_cvtsd_si64(_mm_set_sd(a)) /* Note: semantics of nearbyint() changed, expression type is (int64_t) */
#endif


/* When right shifting bits out due to operands exceeding 64 bits, perform rounding based on shifted out bits */
/* This is very slightly more accurate */
/* Always automatically enabled when compiling for amd64/x86-64 using GCC/clang/ICC (inline assembly makes it free) */
#define MATH_LINELINE_SHIFT_ROUND (1)


////


/* Signed add64 ~ returns non-zero on success, zero on overflow */
MATH_ALWAYSINLINE static inline int mathOverflow_Add64s( int64_t *dst, int64_t src0, int64_t src1 )
{
#if defined(__GNUC__)
  int retval;
  long long int sum;
  retval = __builtin_saddll_overflow( src0, src1, &sum );
  *dst = (int64_t)sum;
  return !retval;
#else
  uint64_t a, b, sum;
  /* We must use unsigned addition ~ signed overflow is undefined by the C standard, and compilers can optimize it away */
  a = (uint64_t)src0;
  b = (uint64_t)src1;
  sum = a + b;
  *dst = (int64_t)sum;
  return !( ( ~( a ^ b ) & ( a ^ sum ) ) >> 63 );
#endif
}


////


/* We have a decent math128 implementation available */
#ifndef MATH128_NONE
/* Accurate and fast results for the win */


/* Signed mul64 ~ returns non-zero on success, zero on overflow */
MATH_ALWAYSINLINE static inline int mathOverflow_Mul64s( int64_t *dst, int64_t src0, int64_t src1 )
{
#if defined(__GNUC__) && 0
  int retval;
  long long int product;
  retval = !__builtin_smulll_overflow( src0, src1, &product );
  *dst = (int64_t)product;
  return retval;
#elif MATH128_NATIVE
  /* Why is it faster to check the top 65 bits of the product, rather than call __builtin_smulll_overflow()? This is weird */
  /* GCC emits the proper imulq/setno sequence, I guess Ryzen chips don't like using flags after an imulq */
  int128 product;
  int64_t res;
  product = (int128)src0 * (int128)src1;
  res = (int64_t)product;
  *dst = res;
  return ( (int64_t)( product >> 64 ) == ( res >> 63 ) );
#elif MATH128_MSVC
  int64_t low, high;
  low = _mul128( src0, src1, &high );
  *dst = low;
  return ( high == ( low >> 63 ) );
#else
 #error
#endif
}

MATH_ALWAYSINLINE static inline uint64_t math128_divmod( uint128 num, uint64_t denom, uint64_t *retrem )
{
#if defined(__GNUC__) && ( defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) )
  uint64_t res, rem;
  __asm__(
  "divq %2\n\t"
  : "=a"(res), "=d"(rem) : "rm" (denom), "A" (num) );
  *retrem = rem;
#elif MATH128_NATIVE
  uint64_t res, rem;
  res = (uint128)num / denom;
  rem = (uint128)num % denom;
  *retrem = rem;
#elif MATH128_MSVC
  uint64_t res;
  unsigned __int64 remainder;
  res = _udiv128( num.high, num.low, denom, &remainder );
  *retrem = remainder;
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint64_t math128_divRound( uint128 num, uint64_t denom )
{
#if defined(__GNUC__) && ( defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) )
  uint64_t res, rem;
  /* On amd64, it's faster to use the remainder than adjust the numerator */
  /* Not touching the numerator allows the CPU to schedule the divq instruction earlier */
  __asm__(
  "divq %2\n\t"
  : "=a"(res), "=d"(rem) : "rm" (denom), "A" (num) );
  res += ( rem >= ( (denom+1) >> 1 ) );
#elif MATH128_NATIVE
  uint64_t res, rem;
  /* Friendlier to ARM, AARCH64 */
  num += denom >> 1;
  res = num / denom;
#elif MATH128_MSVC
  uint64_t res;
  unsigned __int64 rem;
  res = _udiv128( num.high, num.low, denom, &rem );
  res += ( rem >= ( (denom+1) >> 1 ) );
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint128 math128_imul( int64_t a, int64_t b )
{
  uint128 res;
#if defined(__GNUC__) && ( defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) )
  __asm__(
  "imulq %2\n\t"
  : "=A"(res) : "a" (a), "rm" (b) );
#elif MATH128_NATIVE
  res = (int128)a * (int128)b;
#elif MATH128_MSVC
  __int64 low, high;
  low = _mul128( a, b, &high );
  res.low = low;
  res.high = high;
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint128 math128_mul( uint64_t a, uint64_t b )
{
  uint128 res;
#if defined(__GNUC__) && ( defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) )
  __asm__(
  "mulq %2\n\t"
  : "=A"(res) : "a" (a), "rm" (b) );
#elif MATH128_NATIVE
  res = (uint128)a * (uint128)b;
#elif MATH128_MSVC
  unsigned __int64 low, high;
  low = _umul128( a, b, &high );
  res.low = low;
  res.high = high;
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint128 math128_sub( uint128 src, uint128 sub )
{
  uint128 res;
#if MATH128_NATIVE
  res = src - sub;
#elif MATH128_MSVC
  unsigned __int64 low, high;
  unsigned char carry;
  carry = _subborrow_u64( 0, src.low, sub.low, &low );
  _subborrow_u64( carry, src.high, sub.high, &high );
  res.low = low;
  res.high = high;
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint64_t math128_shrdRoundLow64( uint128 src, int shift )
{
  uint64_t res;
#if defined(__GNUC__) && ( defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) )
  __asm__(
  "shrdq %2, %0\n"
  "adcq $0, %0\n"
  : "=r"(res) : "0" ((uint64_t)src), "r" ((uint64_t)(src>>64)), "c" (shift) );
#elif MATH128_NATIVE
 #if MATH_LINELINE_SHIFT_ROUND
  res = (uint64_t)( (uint128)src >> shift ) + ( ( (uint64_t)src >> ( shift - 1 ) ) & 0x1 );
 #else
  res = (uint128)src >> shift;
 #endif
#elif MATH128_MSVC
 #if MATH_LINELINE_SHIFT_ROUND
  res = ( ( (uint64_t)src.high << ( 64 - shift ) ) | ( (uint64_t)src.low >> shift ) ) + ( ( (uint64_t)src.low >> ( shift - 1 ) ) & 0x1 );
 #else
  res = ( (uint64_t)src.high << ( 64 - shift ) ) | ( (uint64_t)src.low >> shift );
 #endif
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint128 math128_shrdRound( uint128 src, int shift )
{
  uint128 res;
#if defined(__GNUC__) && ( defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) )
  uint64_t low, high;
  low = (uint64_t)src;
  high = (uint64_t)( src >> 64 );
  __asm__(
  "shrdq %3, %0\n"
  "adcq $0, %0\n"
  "adcq $0, %1\n"
  : "=&r"(low), "=r"(high) : "0" (low), "r" (high), "1" (high>>shift), "c" (shift) );
  res = (uint128)low | ( (uint128)high << 64 );
#elif MATH128_NATIVE
 #if MATH_LINELINE_SHIFT_ROUND
  res = ( (uint128)src + ( (int64_t)1 << ( shift - 1 ) ) ) >> shift;
 #else
  res = (uint128)src >> shift;
 #endif
#elif MATH128_MSVC
  unsigned __int64 low, high;
  unsigned char carry;
 #if MATH_LINELINE_SHIFT_ROUND
  carry = ( (uint64_t)src.low >> ( shift - 1 ) ) & 0x1;
  low = ( ( (uint64_t)src.high << ( 64 - shift ) ) | ( (uint64_t)src.low >> shift ) ) + ( ( (uint64_t)src.low >> ( shift - 1 ) ) & 0x1 );
  high = src.high >> shift;
  carry = _addcarry_u64( 0, low, carry, &low );
  _addcarry_u64( carry, high, 0, &high );
 #else
  low = ( (uint64_t)src.high << ( 64 - shift ) ) | ( (uint64_t)src.low >> shift );
  high = src.high >> shift;
 #endif
  res.low = low;
  res.high = high;
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline uint128 math128_xorsub( uint128 src, int64_t xormask )
{
  uint128 res;
#if MATH128_NATIVE
  res = ( (uint128)src ^ xormask ) - xormask;
#elif MATH128_MSVC
  unsigned __int64 low, high;
  unsigned char carry;
  low = src.low ^ xormask;
  high = src.high ^ xormask;
  carry = _subborrow_u64( 0, low, xormask, &low );
  _subborrow_u64( carry, high, xormask, &high );
  res.low = low;
  res.high = high;
#else
 #error
#endif
  return res;
}

MATH_ALWAYSINLINE static inline int math128_cmpeqzero( uint128 src )
{
#if MATH128_NATIVE
  return ( src == 0 );
#elif MATH128_MSVC
  return ( ( src.high == 0 ) && ( src.low == 0 ) );
#else
 #error
#endif
}

MATH_ALWAYSINLINE static inline int math128_ucmpgt( uint128 src, uint128 ref )
{
#if MATH128_NATIVE
  return ( (unsigned __int128)src > (unsigned __int128)ref );
#elif MATH128_MSVC
  return ( ( src.high > ref.high ) || ( ( src.high == ref.high ) && ( src.low > ref.low ) ) );
#else
 #error
#endif
}

/* GCC is smart enough to grab the higher half of the uint128 register pair, without any store/load/shift/whatever */
/* Can all compilers do that? Feel free to tweak as necessary for your compiler to emit good code */
#if MATH128_NATIVE
 #define MATH_INT128_GET_LOW64(x) (int64_t)(x)
 #define MATH_INT128_GET_HIGH64(x) (int64_t)((x)>>64)
#elif MATH128_MSVC
 #define MATH_INT128_GET_LOW64(x) (int64_t)((x).low)
 #define MATH_INT128_GET_HIGH64(x) (int64_t)((x).high)
#else
 #error
#endif


MATH_ALWAYSINLINE static inline int mathLineLine_DoubleFinish( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, double alphalow, double denomlow, double mulx, double muly, int64_t alphaxormask )
{
  double fdet;
  fdet = (double)alphalow / (double)denomlow;
  hitpt[0] = l0p0x + ( (int64_t)nearbyint( (double)mulx * fdet ) ^ alphaxormask ) - alphaxormask;
  hitpt[1] = l0p0y + ( (int64_t)nearbyint( (double)muly * fdet ) ^ alphaxormask ) - alphaxormask;
  return 1;
}

MATH_ALWAYSINLINE static inline int mathLineLine_DoubleFinish_DetectOverflow( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, double alphalow, double denomlow, double mulx, double muly, int64_t alphaxormask )
{
  double fdet, fmx, fmy;
  int64_t offx, offy;
  fdet = (double)alphalow / (double)denomlow;
  fmx = (double)mulx * fdet;
  fmy = (double)muly * fdet;
  if( MATH_UNLIKELY( fmin( fmx, fmy ) < (double)INT64_MIN ) )
    return 0;
  if( MATH_UNLIKELY( fmax( fmx, fmy ) > (double)INT64_MAX ) )
    return 0;
  offx = ( (int64_t)nearbyint( fmx ) ^ alphaxormask ) - alphaxormask;
  offy = ( (int64_t)nearbyint( fmy ) ^ alphaxormask ) - alphaxormask;
  if( MATH_UNLIKELY( !mathOverflow_Add64s( &hitpt[0], l0p0x, offx ) ) )
    return 0;
  if( MATH_UNLIKELY( !mathOverflow_Add64s( &hitpt[1], l0p0y, offy ) ) )
    return 0;
  return 1;
}


////


/* Check line intersection: yes */
/* Return non-zero if line segments intersect, don't compute the intersection point */
int mathLineLine_Check( int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int64_t xormask;
  uint128 denom;
  uint128 alpha;
  uint128 beta;
  denom = math128_sub( math128_imul( l0p1x - l0p0x, l1p1y - l1p0y ), math128_imul( l0p1y - l0p0y, l1p1x - l1p0x ) );
  if( MATH_UNLIKELY( math128_cmpeqzero( denom ) ) )
    return 0;
  else
  {
    alpha = math128_sub( math128_imul( l1p0x - l0p0x, l1p1y - l1p0y ), math128_imul( l1p0y - l0p0y, l1p1x - l1p0x ) );
    xormask = (int64_t)MATH_INT128_GET_HIGH64( denom ) >> 63;
    denom = math128_xorsub( denom, xormask );
    alpha = math128_xorsub( alpha, xormask );
    if( math128_ucmpgt( alpha, denom ) )
      return 0;
    beta = math128_sub( math128_imul( l1p0x - l0p0x, l0p1y - l0p0y ), math128_imul( l1p0y - l0p0y, l0p1x - l0p0x ) );
    beta = math128_xorsub( beta, xormask );
    if( math128_ucmpgt( beta, denom ) )
      return 0;
  }
  return 1;
}


/* Check line intersection: yes */
/* Get intersection hit point: yes */
/* Return non-zero if line segments intersect, intersection point stored in hitpt */
int mathLineLine_CheckHit( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int shift;
  int64_t mulx, muly;
  uint64_t denomlow, alphalow, alphahigh, umulx, umuly;
  int64_t xormask, alphaxormask, highmask, mulxmask, mulymask;
  uint128 denom;
  uint128 alpha;
  uint128 hitx, hity;
  uint128 beta;

  mulx = l0p1x - l0p0x;
  muly = l0p1y - l0p0y;
  denom = math128_sub( math128_imul( mulx, l1p1y - l1p0y ), math128_imul( muly, l1p1x - l1p0x ) );
  if( MATH_UNLIKELY( math128_cmpeqzero( denom ) ) )
  {
    // segments lay on parallel lines
    // special case with segments on the same line -- may be considered if necessary
    return 0;
  }
  else
  {
    alpha = math128_sub( math128_imul( l1p0x - l0p0x, l1p1y - l1p0y ), math128_imul( l1p0y - l0p0y, l1p1x - l1p0x ) );
    xormask = (int64_t)MATH_INT128_GET_HIGH64( denom ) >> 63;
    alphaxormask = (int64_t)MATH_INT128_GET_HIGH64( alpha ) >> 63;
    denom = math128_xorsub( denom, xormask );
    alpha = math128_xorsub( alpha, xormask );
    if( math128_ucmpgt( alpha, denom ) )
      return 0;
    beta = math128_sub( math128_imul( l1p0x - l0p0x, muly ), math128_imul( l1p0y - l0p0y, mulx ) );
    beta = math128_xorsub( beta, xormask );
    if( math128_ucmpgt( beta, denom ) )
      return 0;
    alphaxormask ^= xormask;
    alpha = math128_xorsub( alpha, alphaxormask );

    highmask = MATH_INT128_GET_HIGH64( denom );
    if( highmask )
    {
#if defined(__GNUC__)
      shift = 64 - __builtin_clzll( highmask );
#elif defined(_MSC_VER)
      shift = 64 - __lzcnt64( highmask );
#else
 #error
#endif
      denomlow = math128_shrdRoundLow64( denom, shift );
      alphalow = math128_shrdRoundLow64( alpha, shift );
    }
    else
    {
      denomlow = MATH_INT128_GET_LOW64( denom );
      alphalow = MATH_INT128_GET_LOW64( alpha );
      highmask = denomlow | alphalow;
      if( !( ( highmask | ( ( mulx >> 63 ) ^ mulx ) | ( ( muly >> 63 ) ^ muly ) ) & ~(((int64_t)1<<50)-1) ) )
      {
        /* All operands are low, switch to double-based math ~ max error is 0.125 */
        return mathLineLine_DoubleFinish( hitpt, l0p0x, l0p0y, (double)alphalow, (double)denomlow, (double)mulx, (double)muly, alphaxormask );
      }
    }

    mulxmask = mulx >> 63;
    mulymask = muly >> 63;
    umulx = ( mulx ^ mulxmask ) - mulxmask;
    umuly = ( muly ^ mulymask ) - mulymask;
    hitx = math128_mul( alphalow, umulx );
    xormask = alphaxormask ^ mulxmask;
    hitpt[0] = l0p0x + (int64_t)( ( math128_divRound( hitx, denomlow ) ^ xormask ) - xormask );
    hity = math128_mul( alphalow, umuly );
    xormask = alphaxormask ^ mulymask;
    hitpt[1] = l0p0y + (int64_t)( ( math128_divRound( hity, denomlow ) ^ xormask ) - xormask );
  }
  return 1;
}


/* Check line intersection: no */
/* Get intersection hit point: yes */
/* Accurate intersection outside lines: no */
/* SIGFPE protection: yes */
/* Overflow checking: no */
/* Return non-zero on success, intersection point stored in hitpt */
/* This function does NOT check against integer overflow, if the intersection point is out of numerical range */
/* Use only when you know the intersection point is in the neighborhood of the two segments */
int mathLineLine_HitSafe( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int shift;
  int64_t mulx, muly;
  uint64_t denomlow, alphalow, alphahigh, umulx, umuly;
  int64_t xormask, alphaxormask, highmask, mulxmask, mulymask;
  uint128 denom;
  uint128 alpha;
  uint128 hitx, hity;
  uint128 beta;

  mulx = l0p1x - l0p0x;
  muly = l0p1y - l0p0y;
  denom = math128_sub( math128_imul( mulx, l1p1y - l1p0y ), math128_imul( muly, l1p1x - l1p0x ) );
  if( MATH_UNLIKELY( math128_cmpeqzero( denom ) ) )
  {
    // segments lay on parallel lines
    // special case with segments on the same line -- may be considered if necessary
    return 0;
  }
  else
  {
    alpha = math128_sub( math128_imul( l1p0x - l0p0x, l1p1y - l1p0y ), math128_imul( l1p0y - l0p0y, l1p1x - l1p0x ) );
    xormask = (int64_t)MATH_INT128_GET_HIGH64( denom ) >> 63;
    alphaxormask = (int64_t)MATH_INT128_GET_HIGH64( alpha ) >> 63;
    denom = math128_xorsub( denom, xormask );
    alpha = math128_xorsub( alpha, xormask );
    if( math128_ucmpgt( alpha, denom ) )
      return 0;
    beta = math128_sub( math128_imul( l1p0x - l0p0x, muly ), math128_imul( l1p0y - l0p0y, mulx ) );
    beta = math128_xorsub( beta, xormask );
    if( math128_ucmpgt( beta, denom ) )
      return 0;
    alphaxormask ^= xormask;
    alpha = math128_xorsub( alpha, alphaxormask );

    highmask = MATH_INT128_GET_HIGH64( denom );
    if( highmask )
    {
#if defined(__GNUC__)
      shift = 64 - __builtin_clzll( highmask );
#elif defined(_MSC_VER)
      shift = 64 - __lzcnt64( highmask );
#else
 #error
#endif
      denomlow = math128_shrdRoundLow64( denom, shift );
      alphalow = math128_shrdRoundLow64( alpha, shift );
    }
    else
    {
      denomlow = MATH_INT128_GET_LOW64( denom );
      alphalow = MATH_INT128_GET_LOW64( alpha );
      highmask = denomlow | alphalow;
      if( !( ( highmask | ( ( mulx >> 63 ) ^ mulx ) | ( ( muly >> 63 ) ^ muly ) ) & ~(((int64_t)1<<50)-1) ) )
      {
        /* All operands are low, switch to double-based math ~ max error is 0.125 */
        return mathLineLine_DoubleFinish( hitpt, l0p0x, l0p0y, (double)alphalow, (double)denomlow, (double)mulx, (double)muly, alphaxormask );
      }
    }

    mulxmask = mulx >> 63;
    mulymask = muly >> 63;
    umulx = ( mulx ^ mulxmask ) - mulxmask;
    umuly = ( muly ^ mulymask ) - mulymask;
    hitx = math128_mul( alphalow, umulx );
    if( MATH_UNLIKELY( (uint64_t)MATH_INT128_GET_HIGH64( hitx ) >= (uint64_t)denomlow ) )
      return 0;
    xormask = alphaxormask ^ mulxmask;
    hitpt[0] = l0p0x + (int64_t)( ( math128_divRound( hitx, denomlow ) ^ xormask ) - xormask );
    hity = math128_mul( alphalow, umuly );
    if( MATH_UNLIKELY( (uint64_t)MATH_INT128_GET_HIGH64( hity ) >= (uint64_t)denomlow ) )
      return 0;
    xormask = alphaxormask ^ mulymask;
    hitpt[1] = l0p0y + (int64_t)( ( math128_divRound( hity, denomlow ) ^ xormask ) - xormask );
  }
  return 1;
}


/* Check line intersection: no */
/* Get intersection hit point: yes */
/* Accurate intersection outside lines: yes */
/* SIGFPE protection: yes */
/* Overflow checking: yes */
/* Return non-zero on success, intersection point stored in hitpt */
/* Return zero if parallel or intersection point outside of numerical range */
int mathLineLine_HitAnySafeRobust( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int shift;
  int64_t mulx, muly;
  uint64_t denomlow, alphalow, alphahigh, umulx, umuly;
  int64_t xormask, alphaxormask, highmask, mulxmask, mulymask;
  uint128 denom;
  uint128 alpha;
  uint128 hitx, hity;
  int64_t offx, offy;
  mulx = l0p1x - l0p0x;
  muly = l0p1y - l0p0y;
  denom = math128_sub( math128_imul( mulx, l1p1y - l1p0y ), math128_imul( muly, l1p1x - l1p0x ) );
  if( MATH_UNLIKELY( math128_cmpeqzero( denom ) ) )
  {
    // segments lay on parallel lines
    // special case with segments on the same line -- may be considered if necessary
    return 0;
  }
  else
  {
    alpha = math128_sub( math128_imul( l1p0x - l0p0x, l1p1y - l1p0y ), math128_imul( l1p0y - l0p0y, l1p1x - l1p0x ) );
    xormask = (int64_t)MATH_INT128_GET_HIGH64( denom ) >> 63;
    alphaxormask = (int64_t)MATH_INT128_GET_HIGH64( alpha ) >> 63;
    denom = math128_xorsub( denom, xormask );
    alpha = math128_xorsub( alpha, alphaxormask );
    alphaxormask ^= xormask;

    /* Reduce denom to significant 64 bits ~ alpha might still have more bits */
    denomlow = MATH_INT128_GET_LOW64( denom );
    highmask = MATH_INT128_GET_HIGH64( denom );
    if( highmask )
    {
#if defined(__GNUC__)
      shift = 64 - __builtin_clzll( highmask );
#elif defined(_MSC_VER)
      shift = 64 - __lzcnt64( highmask );
#else
 #error
#endif
      denomlow = math128_shrdRoundLow64( denom, shift );
      alpha = math128_shrdRound( alpha, shift );
    }

    /* If alpha has more than 64 significant bits, reduce */
    alphalow = MATH_INT128_GET_LOW64( alpha );
    alphahigh = MATH_INT128_GET_HIGH64( alpha );
    if( alphahigh )
    {
      uint64_t c, r;
      if( MATH_UNLIKELY( (uint64_t)alphahigh >= (uint64_t)denomlow ) )
        return 0;
      c = math128_divmod( alpha, denomlow, &r );
      if( MATH_UNLIKELY( (int64_t)c < 0 ) )
        return 0;
      if( MATH_UNLIKELY( !mathOverflow_Mul64s( &offx, c, mulx ) ) )
        return 0;
      if( MATH_UNLIKELY( !mathOverflow_Mul64s( &offy, c, muly ) ) )
        return 0;
      if( MATH_UNLIKELY( !mathOverflow_Add64s( &l0p0x, l0p0x, ( offx ^ alphaxormask ) - alphaxormask ) ) )
        return 0;
      if( MATH_UNLIKELY( !mathOverflow_Add64s( &l0p0y, l0p0y, ( offy ^ alphaxormask ) - alphaxormask ) ) )
        return 0;
      alphalow = (uint64_t)r;
    }

    /* Denomlow and alphalow both fit in 64 bits unsigned */
    highmask = denomlow | alphalow;
    if( !( ( highmask | ( ( mulx >> 63 ) ^ mulx ) | ( ( muly >> 63 ) ^ muly ) ) & ~(((int64_t)1<<25)-1) ) )
    {
      /* All operands are low, switch to double-based math ~ max error is 0.125 */
      return mathLineLine_DoubleFinish_DetectOverflow( hitpt, l0p0x, l0p0y, (double)alphalow, (double)denomlow, (double)mulx, (double)muly, alphaxormask );
    }

    mulxmask = mulx >> 63;
    mulymask = muly >> 63;
    umulx = ( mulx ^ mulxmask ) - mulxmask;
    umuly = ( muly ^ mulymask ) - mulymask;
    hitx = math128_mul( alphalow, umulx );
    if( MATH_UNLIKELY( (uint64_t)MATH_INT128_GET_HIGH64( hitx ) >= (uint64_t)denomlow ) )
      return 0;
    xormask = alphaxormask ^ mulxmask;
    offx = (int64_t)math128_divRound( hitx, denomlow );
    if( MATH_UNLIKELY( offx < 0 ) )
      return 0;
    offx = (int64_t)( ( (uint64_t)offx ^ xormask ) - xormask );
    if( MATH_UNLIKELY( !mathOverflow_Add64s( &hitpt[0], l0p0x, offx ) ) )
      return 0;
    hity = math128_mul( alphalow, umuly );
    if( MATH_UNLIKELY( (uint64_t)MATH_INT128_GET_HIGH64( hity ) >= (uint64_t)denomlow ) )
      return 0;
    xormask = alphaxormask ^ mulymask;
    offy = (int64_t)math128_divRound( hity, denomlow );
    if( MATH_UNLIKELY( offy < 0 ) )
      return 0;
    offy = (int64_t)( ( (uint64_t)offy ^ xormask ) - xormask );
    if( MATH_UNLIKELY( !mathOverflow_Add64s( &hitpt[1], l0p0y, offy ) ) )
      return 0;
  }
  return 1;
}


/* Else for #ifndef MATH128_NONE */
#else
/* We do not have a math128 implementation available, fallback to slow double-based math */


/* Fallback inaccurate double-based path */
int mathLineLine_Check( int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  double denom;
  double alpha;
  double beta;
  denom = ( (double)( l0p1x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l0p1y - l0p0y ) * (double)( l1p1x - l1p0x ) );
  if( ( denom == 0.0 ) )
    return 0;
  else
  {
    alpha = ( (double)( l1p0x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l1p0y - l0p0y ) * (double)( l1p1x - l1p0x ) );
    beta = ( (double)( l1p0x - l0p0x ) * (double)( l0p1y - l0p0y ) ) - ( (double)( l1p0y - l0p0y ) * (double)( l0p1x - l0p0x ) );
    if( denom < 0.0 )
    {
      denom = -denom;
      alpha = -alpha;
      beta = -beta;
    }
    if( ( fmin( alpha, beta ) < 0.0 ) || ( fmax( alpha, beta ) > denom ) )
      return 0;
  }
  return 1;
}

/* Fallback inaccurate double-based path */
int mathLineLine_CheckHit( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int64_t mulx, muly;
  double denom;
  double alpha;
  double beta;
  double fdet;
  denom = ( (double)( l0p1x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l0p1y - l0p0y ) * (double)( l1p1x - l1p0x ) );
  if( ( denom == 0.0 ) )
    return 0;
  else
  {
    alpha = ( (double)( l1p0x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l1p0y - l0p0y ) * (double)( l1p1x - l1p0x ) );
    beta = ( (double)( l1p0x - l0p0x ) * (double)( l0p1y - l0p0y ) ) - ( (double)( l1p0y - l0p0y ) * (double)( l0p1x - l0p0x ) );
    if( denom < 0.0 )
    {
      denom = -denom;
      alpha = -alpha;
      beta = -beta;
    }
    if( ( fmin( alpha, beta ) < 0.0 ) || ( fmax( alpha, beta ) > denom ) )
      return 0;
  }
  mulx = l0p1x - l0p0x;
  muly = l0p1y - l0p0y;
  fdet = (double)alpha / (double)denom;
  hitpt[0] = l0p0x + (int64_t)nearbyint( (double)mulx * fdet );
  hitpt[1] = l0p0y + (int64_t)nearbyint( (double)muly * fdet );
  return 1;
}

/* Fallback inaccurate double-based path */
int mathLineLine_HitSafe( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int64_t mulx, muly;
  double denom;
  double alpha;
  double fdet;
  denom = ( (double)( l0p1x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l0p1y - l0p0y ) * (double)( l1p1x - l1p0x ) );
  if( ( denom == 0.0 ) )
    return 0;
  else
  {
    alpha = ( (double)( l1p0x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l1p0y - l0p0y ) * (double)( l1p1x - l1p0x ) );
    if( denom < 0.0 )
    {
      denom = -denom;
      alpha = -alpha;
    }
  }
  mulx = l0p1x - l0p0x;
  muly = l0p1y - l0p0y;
  fdet = (double)alpha / (double)denom;
  hitpt[0] = l0p0x + (int64_t)nearbyint( (double)mulx * fdet );
  hitpt[1] = l0p0y + (int64_t)nearbyint( (double)muly * fdet );
  return 1;
}

/* Fallback inaccurate double-based path */
int mathLineLine_HitAnySafeRobust( int64_t *hitpt, int64_t l0p0x, int64_t l0p0y, int64_t l0p1x, int64_t l0p1y, int64_t l1p0x, int64_t l1p0y, int64_t l1p1x, int64_t l1p1y )
{
  int64_t mulx, muly;
  int64_t offx, offy;
  double denom;
  double alpha;
  double fdet, fmx, fmy;
  denom = ( (double)( l0p1x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l0p1y - l0p0y ) * (double)( l1p1x - l1p0x ) );
  if( ( denom == 0.0 ) )
    return 0;
  else
  {
    alpha = ( (double)( l1p0x - l0p0x ) * (double)( l1p1y - l1p0y ) ) - ( (double)( l1p0y - l0p0y ) * (double)( l1p1x - l1p0x ) );
    if( denom < 0.0 )
    {
      denom = -denom;
      alpha = -alpha;
    }
  }
  mulx = l0p1x - l0p0x;
  muly = l0p1y - l0p0y;
  fdet = (double)alpha / (double)denom;
  fmx = (double)mulx * fdet;
  fmy = (double)muly * fdet;
  if( MATH_UNLIKELY( fmin( fmx, fmy ) < (double)INT64_MIN ) )
    return 0;
  if( MATH_UNLIKELY( fmax( fmx, fmy ) > (double)INT64_MAX ) )
    return 0;
  offx = (int64_t)nearbyint( fmx );
  offy = (int64_t)nearbyint( fmy );
  if( MATH_UNLIKELY( !mathOverflow_Add64s( &hitpt[0], l0p0x, offx ) ) )
    return 0;
  if( MATH_UNLIKELY( !mathOverflow_Add64s( &hitpt[1], l0p0y, offy ) ) )
    return 0;
  return 1;
}


/* End of #ifndef MATH128_NONE */
#endif