Euclidean Integer Division – Efficient Implementation of Floored Division

c++divisioninteger-divisionmath

Floored division is when the result is always floored down (towards −∞), not towards 0:

division types

Is it possible to efficiently implement floored or euclidean integer division in C/C++?

(the obvious solution is to check the dividend's sign)

Best Answer

I've written a test program to benchmark the ideas presented here:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <windows.h>

#define N 10000000
#define M 100

int dividends[N], divisors[N], results[N];

__forceinline int floordiv_signcheck(int a, int b)
{
    return (a<0 ? a-(b-1) : a) / b;
}

__forceinline int floordiv_signcheck2(int a, int b)
{
    return (a - (a<0 ? b-1 : 0)) / b;
}

__forceinline int floordiv_signmultiply(int a, int b)
{
    return (a + (a>>(sizeof(a)*8-1))*(b-1)) / b;
}

__forceinline int floordiv_floatingpoint(int a, int b)
{
    // I imagine that the call to floor can be replaced to a cast
    // if you can get FPU rounding control to work (I couldn't).
    return floor((double)a / b);
}

void main()
{
    for (int i=0; i<N; i++)
    {
        dividends[i] = rand();
        do
            divisors[i] = rand();
        while (divisors[i]==0);
    }

    LARGE_INTEGER t0, t1;

    QueryPerformanceCounter(&t0);
    for (int j=0; j<M; j++)
        for (int i=0; i<N; i++)
            results[i] = floordiv_signcheck(dividends[i], divisors[i]);
    QueryPerformanceCounter(&t1);
    printf("signcheck    : %9llu\n", t1.QuadPart-t0.QuadPart);

    QueryPerformanceCounter(&t0);
    for (int j=0; j<M; j++)
        for (int i=0; i<N; i++)
            results[i] = floordiv_signcheck2(dividends[i], divisors[i]);
    QueryPerformanceCounter(&t1);
    printf("signcheck2   : %9llu\n", t1.QuadPart-t0.QuadPart);

    QueryPerformanceCounter(&t0);
    for (int j=0; j<M; j++)
        for (int i=0; i<N; i++)
            results[i] = floordiv_signmultiply(dividends[i], divisors[i]);
    QueryPerformanceCounter(&t1);
    printf("signmultiply : %9llu\n", t1.QuadPart-t0.QuadPart);

    QueryPerformanceCounter(&t0);
    for (int j=0; j<M; j++)
        for (int i=0; i<N; i++)
            results[i] = floordiv_floatingpoint(dividends[i], divisors[i]);
    QueryPerformanceCounter(&t1);
    printf("floatingpoint: %9llu\n", t1.QuadPart-t0.QuadPart);
}

Results:

signcheck    :  61458768
signcheck2   :  61284370
signmultiply :  61625076
floatingpoint: 287315364

So, according to my results, checking the sign is the fastest:

(a - (a<0 ? b-1 : 0)) / b
Related Question