高速剰余演算

ちょっと長いのでコード全文は「続きを読む」のほうで参照して頂きたいのですが...

const unsigned int tbl3 [] =
{
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<3>(unsigned int a)
{
    a = (a >>16) +(a &0x0000FFFF);
    a = (a >>8) +(a &0x000000FF);
    a = (a >>4) +(a &0x0000000F);
    return tbl3[a];
}

...このようなコードを chinemod<b>(a) の形で利用すると、普通に % 演算子を使って a %b の形で剰余演算するより高速に処理できることがあります。多少パフォーマンスの比較を行ったところでは % 演算子を使った場合に比べ最大で7〜8倍ぐらい高速化されることがありました。

アルゴリズムとしては割る側の数が 2 の n 乗の場合は、単純に a &((2^n) -1) で求め、それ以外の場合は 2 の n 乗に対し剰余演算を行い余りが m であれば、2 の n 乗ひとつぶんに付き余りが m になるといった理屈で...

a = ((a >>n) *m) +(a &((2^n) -1));

...のような形で桁のたたみ込みを行っていきある程度小さくなったところで最後はテーブル参照により解を得ます。

尚、このコードはテーブルのサイズをより大きくすることでたたみ込みの回数を減らしたりだとか、たたみ込みを行う位置(桁)の調整を行うなどしてより高速化を図る余地があると思います。

このやり方で通常の % 演算子より高速に処理されるかどうかは割られる側の値、割る側の値、コンパイラの種類、最適化の指示、CPUの種類など多様な影響を受けますのでこのやり方を試してみようと考えた場合は利用予定の各種条件で様々なテストを行った上で適用することをオススメします。


どういった条件が整えば高速に処理されるのか調べてみようかとも思いましたが、詳細且つ厳密に調べるのはかな〜り面倒くさそうなんで多分私が実際に調査することはないと思います。

ちなみにちゃんと調査した場合でも予想される結果としては、若干試してみたところでは Intel C++ Compiler とともに % 演算子を使えという結論に落ち着きそうです。

#if defined(NDEBUG)
#define CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
#else
#if !defined(CHINEMOD_TABLE_SENTINEL_VALUE)
#define CHINEMOD_TABLE_SENTINEL_VALUE 999
#endif
#define CHINEMOD_TABLE_SENTINEL_FOR_DEBUG \
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,\
    CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,CHINEMOD_TABLE_SENTINEL_VALUE,
#endif

template<unsigned int b> inline unsigned int chinemod(unsigned int a)
{
    return a %b;
}
template<> inline unsigned int chinemod<1>(unsigned int a)
{
    return a;
}
template<> inline unsigned int chinemod<2>(unsigned int a)
{
    return a &1;
}
const unsigned int tbl3 [] =
{
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<3>(unsigned int a)
{
    a = (a >>16) +(a &0x0000FFFF);
    a = (a >>8) +(a &0x000000FF);
    a = (a >>4) +(a &0x0000000F);
    return tbl3[a];
}

template<> inline unsigned int chinemod<4>(unsigned int a)
{
    return a &3;
}

const unsigned int tbl5 [] =
{
    0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,
    0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,
    0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,
    0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<5>(unsigned int a)
{
    a = (a >>16) +(a &0x0000FFFF);
    a = (a >>8) +(a &0x000000FF);
    a = (a >>4) +(a &0x0000000F);
    return tbl5[a];
}

const unsigned int tbl6 [] =
{
    0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
    0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
    0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
    0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
    0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<6>(unsigned int a)
{
    a = ((a >>14) &~3) +(a &0x0000FFFF);
    a = ((a >>6) &~3) +(a &0x000000FF);
    a = ((a >>6) &~3) +(a &0x000000FF);
    a = ((a >>2) &~3) +(a &0x0000000F);
    return tbl6[a];
}

const unsigned int tbl7 [] =
{
    0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
    0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
    0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
    0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
    0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3,4,5,6,
    0,1,2,3,4,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<7>(unsigned int a)
{
    a = (a >>15) +(a &0x00007FFF);
    a = (a >>9) +(a &0x000001FF);
    a = (a >>3) +(a &0x00000007);
    return tbl7[a];
}
template<> inline unsigned int chinemod<8>(unsigned int a)
{
    return a &7;
}
const unsigned int tbl9 [] =
{
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6,7,8,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<9>(unsigned int a)
{
    a = (a >>18) +(a &0x0003FFFF);
    a = (a >>12) +(a &0x00000FFF);
    a = (a >>6) +(a &0x0000003F);
    return tbl9[a];
}

const unsigned int tbl10 [] =
{
    0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,
    0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,
    0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,
    0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,
    0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,
    0,1,2,3,4,5,6,7,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<10>(unsigned int a)
{
    a = ((a >>16) &~1) +(a &0x0001FFFF);
    a = ((a >>8) &~1) +(a &0x000001FF);
    a = ((a >>4) &~1) +(a &0x0000001F);
    return tbl10[a];
}

const unsigned int tbl11 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,
    0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,
    0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,
    0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9,10,
    0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4,5,6,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<11>(unsigned int a)
{
    a = (a >>10) +(a &0x000003FF);
    a = (a >>10) +(a &0x000003FF);
    a = ((a >>8) *3) +(a &0x000000FF);
    a = ((a >>4) *5) +(a &0x0000000F);
    return tbl11[a];
}

const unsigned int tbl12 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,
    0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,
    0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,
    0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7,8,9,10,11,
    0,1,2,3,4,5,6,7,8,9,10,11,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<12>(unsigned int a)
{
    a = ((a >>14) &~3) +(a &0x0000FFFF);
    a = ((a >>6) &~3) +(a &0x000000FF);
    a = ((a >>4) &~3) +(a &0x0000003F);
    a = ((a >>2) &~3) +(a &0x0000000F);
    return tbl12[a];
}

const unsigned int tbl13 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,0,1,2,3,4,5,6,7,8,9,10,11,12,
    0,1,2,3,4,5,6,7,8,9,10,11,12,0,1,2,3,4,5,6,7,8,9,10,11,12,
    0,1,2,3,4,5,6,7,8,9,10,11,12,0,1,2,3,4,5,6,7,8,9,10,11,12,
    0,1,2,3,4,5,6,7,8,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<13>(unsigned int a)
{
    a = (a >>12) +(a &0x00000FFF);
    a = (a >>12) +(a &0x00000FFF);
    a = ((a >>8) *9) +(a &0x000000FF);
    a = ((a >>4) *3) +(a &0x0000000F);
    return tbl13[a];
}

const unsigned int tbl14 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1,2,3,4,5,6,7,8,9,10,11,12,13,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1,2,3,4,5,6,7,8,9,10,11,12,13,
    0,1,2,3,4,5,6,7,8,9,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<14>(unsigned int a)
{
    a = ((a >>15) &~1) +(a &0x0000FFFF);
    a = ((a >>6) &~3) +(a &0x000000FF);
    a = ((a >>3) &~1) +(a &0x0000000F);
    a = ((a >>3) &~1) +(a &0x0000000F);
    return tbl14[a];
}

const unsigned int tbl15 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<15>(unsigned int a)
{
    a = (a >>16) +(a &0x0000FFFF);
    a = (a >>8) +(a &0x000000FF);
    a = (a >>4) +(a &0x0000000F);
    return tbl15[a];
}

template<> inline unsigned int chinemod<16>(unsigned int a)
{
    return a &15;
}

const unsigned int tbl17 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
    0,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<17>(unsigned int a)
{
    a = (a >>16) +(a &0x0000FFFF);
    a = (a >>8) +(a &0x000000FF);
    a = ((a >>7) *9) +(a &0x0000007F);
    return tbl17[a];
}

const unsigned int tbl18 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    0,1,2,3,4,5,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<18>(unsigned int a)
{
    a = ((a >>12) &~1) +(a &0x00001FFF);
    a = ((a >>6) &~1) +(a &0x0000007F);
    a = ((a >>6) &~1) +(a &0x0000007F);
    a = ((a >>6) *10) +(a &0x0000003F);
    return tbl18[a];
}

const unsigned int tbl19 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<19>(unsigned int a)
{
    a = (a >>18) +(a &0x0003FFFF);
    a = ((a >>13) *3) +(a &0x00001FFF);
    a = ((a >>8) *9) +(a &0x000000FF);
    a = ((a >>6) *7) +(a &0x0000003F);
    return tbl19[a];
}

const unsigned int tbl20 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
    0,1,2,3,4,5,6,7,8,9,10,11,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<20>(unsigned int a)
{
    a = ((a >>16) &~3) +(a &0x0003FFFF);
    a = ((a >>8) &~3) +(a &0x000003FF);
    a = ((a >>4) &~3) +(a &0x0000003F);
    a = ((a >>4) &~3) +(a &0x0000003F);
    return tbl20[a];
}

const unsigned int tbl21 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<21>(unsigned int a)
{
    a = (a >>18) +(a &0x0003FFFF);
    a = (a >>12) +(a &0x00000FFF);
    a = (a >>6) +(a &0x0000003F);
    return tbl21[a];
}

const unsigned int tbl22 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<22>(unsigned int a)
{
    a = ((a >>10) &~1) +(a &0x000007FF);
    a = ((a >>10) &~1) +(a &0x000007FF);
    a = ((a >>7) *18) +(a &0x0000007F);
    a = ((a >>5) *10) +(a &0x0000001F);
    //a = ((a >>5) *10) +(a &0x0000001F);
    return tbl22[a];
}

const unsigned int tbl23 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,
    0,1,2,3,4,5,6,7,8,9,10,11,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<23>(unsigned int a)
{
    a = (a >>11) +(a &0x000007FF);
    a = (a >>11) +(a &0x000007FF);
    a = ((a >>8) *3) +(a &0x000000FF);
    a = ((a >>5) *9) +(a &0x0000001F);
    return tbl23[a];
}

const unsigned int tbl24 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<24>(unsigned int a)
{
    a = ((a >>12) &~7) +(a &0x00007FFF);
    a = ((a >>12) &~7) +(a &0x00007FFF);
    a = ((a >>4) &~7) +(a &0x0000007F);
    a = ((a >>4) &~7) +(a &0x0000007F);
    //a = ((a >>2) &~7) +(a &0x0000001F);
    return tbl24[a];
}

const unsigned int tbl25 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
    0,1,2,3,4,5,6,7,8,9,10,11,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<25>(unsigned int a)
{
    a = (a >>20) +(a &0x000FFFFF);
    a = ((a >>14) *9) +(a &0x00003FFF);
    a = ((a >>7) *3) +(a &0x0000007F);
    a = ((a >>5) *7) +(a &0x0000001F);
    return tbl25[a];
}

const unsigned int tbl26 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<26>(unsigned int a)
{
    a = ((a >>12) &~1) +(a &0x00001FFF);
    a = ((a >>12) &~1) +(a &0x00001FFF);
    a = ((a >>5) *6) +(a &0x0000001F);
    a = ((a >>5) *6) +(a &0x0000001F);
    return tbl26[a];
}

const unsigned int tbl27 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<27>(unsigned int a)
{
    a = (a >>18) +(a &0x0003FFFF);
    a = ((a >>8) *13)+(a &0x000000FF);
    a = ((a >>8) *13)+(a &0x000000FF);
    a = ((a >>5) *5) +(a &0x0000001F);
    return tbl27[a];
}

const unsigned int tbl28 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
    0,1,2,3,4,5,6,7,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<28>(unsigned int a)
{
    a = ((a >>12) &~3) +(a &0x00003FFF);
    a = ((a >>6) &~3) +(a &0x000000FF);
    a = ((a >>6) &~3) +(a &0x000000FF);
    a = ((a >>3) &~3) +(a &0x0000001F);
    return tbl28[a];
}

const unsigned int tbl29 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
    0,1,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<29>(unsigned int a)
{
    a = ((a >>12) *7) +(a &0x00000FFF);
    a = ((a >>12) *7) +(a &0x00000FFF);
    a = ((a >>5) *3) +(a &0x0000001F);
    a = ((a >>5) *3) +(a &0x0000001F);
    return tbl29[a];
}

const unsigned int tbl30 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<30>(unsigned int a)
{
    a = ((a >>16) &~1) +(a &0x0001FFFF);
    a = ((a >>8) &~1) +(a &0x000001FF);
    a = ((a >>4) &~1) +(a &0x0000001F);
    return tbl30[a];
}

const unsigned int tbl31 [] =
{
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
    0,1,2,3,4,
    CHINEMOD_TABLE_SENTINEL_FOR_DEBUG
};
template<> inline unsigned int chinemod<31>(unsigned int a)
{
    a = (a >>15) +(a &0x00007FFF);
    a = (a >>10) +(a &0x000003FF);
    a = (a >>5) +(a &0x0000001F);
    return tbl31[a];
}

template<> inline unsigned int chinemod<32>(unsigned int a)
{
    return a &31;
}