Is this place so dead that a post from January 2021 is still on the first page. TalkChess died today. Maybe this place will liven up a bit.
The current state of the art may have just changed from Black Magic bitboards to Kindergarten Supper SISSY bitboards.
A recent modification and test by martinn at TalkChess has shown a 13% increase in performance of KSS over BM. Of course compiler and cpu can make a huge difference.
Here is working code for KSS if someone would like to investigate further.
Code: Select all
#pragma once
// Datum : 21.01.2023; updated code as of 16.03.2023 (martinn)
// Author : Michael J Sherwin 2023
// Content: The name is Kindergarten Super SISSY Bitboards - https://www.talkchess.com/forum3/viewtopic.php?f=7&t=81234&start=30
// The code can be further "minimized" according to the author
// C++20 constexpr port by Daniel Infuehr
#include <stdint.h>
namespace Chess_Lookup::KGSSB
{
static uint64_t vMask[64];
static uint64_t hMask[64];
static uint64_t dMask[64];
static uint64_t aMask[64];
static uint64_t vSubset[64][64];
static uint64_t hSubset[64][64];
static uint64_t dSubset[64][64];
static uint64_t aSubset[64][64];
// new lookup table for shifts in calculation of hIndex - martinn
static unsigned int shift_horizontal_table[64]; // new lookup table for shifts in calculation of hIndex
static constexpr uint64_t Size = (64 * 64 * 4 + 64 * 4) * sizeof(uint64_t);
enum { FILEa, FILEb, FILEc, FILEd, FILEe, FILEf, FILEg, FILEh };
enum { RANK1, RANK2, RANK3, RANK4, RANK5, RANK6, RANK7, RANK8 };
static void InitSquare(int sq)
{
int ts, dx, dy;
uint64_t occ, index;
int x = sq % 8;
int y = sq / 8;
// Initialize Kindergarten Super SISSY Bitboards
// diagonals
for (ts = sq + 9, dx = x + 1, dy = y + 1; dx < FILEh && dy < RANK8; dMask[sq] |= 1ull << ts, ts += 9, dx++, dy++);
for (ts = sq - 9, dx = x - 1, dy = y - 1; dx > FILEa && dy > RANK1; dMask[sq] |= 1ull << ts, ts -= 9, dx--, dy--);
// anti diagonals
for (ts = sq + 7, dx = x - 1, dy = y + 1; dx > FILEa && dy < RANK8; aMask[sq] |= 1ull << ts, ts += 7, dx--, dy++);
for (ts = sq - 7, dx = x + 1, dy = y - 1; dx < FILEh && dy > RANK1; aMask[sq] |= 1ull << ts, ts -= 7, dx++, dy--);
// diagonal indexes
for (index = 0; index < 64; index++) {
dSubset[sq][index] = 0;
occ = index << 1;
if ((sq & 7) != FILEh && (sq >> 3) != RANK8) {
for (ts = sq + 9; ; ts += 9) {
dSubset[sq][index] |= (1ull << ts);
if (occ & (1ull << (ts & 7))) break;
if ((ts & 7) == FILEh || (ts >> 3) == RANK8) break;
}
}
if ((sq & 7) != FILEa && (sq >> 3) != RANK1) {
for (ts = sq - 9; ; ts -= 9) {
dSubset[sq][index] |= (1ull << ts);
if (occ & (1ull << (ts & 7))) break;
if ((ts & 7) == FILEa || (ts >> 3) == RANK1) break;
}
}
}
// anti diagonal indexes
for (index = 0; index < 64; index++) {
aSubset[sq][index] = 0;
occ = index << 1;
if ((sq & 7) != FILEa && (sq >> 3) != RANK8) {
for (ts = sq + 7; ; ts += 7) {
aSubset[sq][index] |= (1ull << ts);
if (occ & (1ull << (ts & 7))) break;
if ((ts & 7) == FILEa || (ts >> 3) == RANK8) break;
}
}
if ((sq & 7) != FILEh && (sq >> 3) != RANK1) {
for (ts = sq - 7; ; ts -= 7) {
aSubset[sq][index] |= (1ull << ts);
if (occ & (1ull << (ts & 7))) break;
if ((ts & 7) == FILEh || (ts >> 3) == RANK1) break;
}
}
}
// horizontal lines
for (ts = sq + 1, dx = x + 1; dx < FILEh; hMask[sq] |= 1ull << ts, ts += 1, dx++);
for (ts = sq - 1, dx = x - 1; dx > FILEa; hMask[sq] |= 1ull << ts, ts -= 1, dx--);
// vertical indexes
for (index = 0; index < 64; index++) {
vSubset[sq][index] = 0;
uint64_t blockers = 0;
for (int i = 0; i <= 5; i++) {
if (index & (1ull << i)) {
blockers |= (1ull << (((5 - i) << 3) + 8));
}
}
if ((sq >> 3) != RANK8) {
for (ts = sq + 8; ; ts += 8) {
vSubset[sq][index] |= (1ull << ts);
if (blockers & (1ull << (ts - (ts & 7)))) break;
if ((ts >> 3) == RANK8) break;
}
}
if ((sq >> 3) != RANK1) {
for (ts = sq - 8; ; ts -= 8) {
vSubset[sq][index] |= (1ull << ts);
if (blockers & (1ull << (ts - (ts & 7)))) break;
if ((ts >> 3) == RANK1) break;
}
}
}
// horizontal indexes
for (index = 0; index < 64; index++) {
hSubset[sq][index] = 0;
occ = index << 1;
if ((sq & 7) != FILEh) {
for (ts = sq + 1; ; ts += 1) {
hSubset[sq][index] |= (1ull << ts);
if (occ & (1ull << (ts & 7))) break;
if ((ts & 7) == FILEh) break;
}
}
if ((sq & 7) != FILEa) {
for (ts = sq - 1; ; ts -= 1) {
hSubset[sq][index] |= (1ull << ts);
if (occ & (1ull << (ts & 7))) break;
if ((ts & 7) == FILEa) break;
}
}
}
}
static void Init()
{
for (int i = 0; i < 64; i++) {
InitSquare(i);
}
for (int i = 0; i < 64; i++) {
shift_horizontal_table[i] = (i & 56) + 1;
}
}
static constexpr uint64_t file_b2_b7 = 0x0002020202020200ull;
static constexpr uint64_t file_a2_a7 = 0x0001010101010100ull;
static constexpr uint64_t diag_c2h7 = 0x0080402010080400ull;
static uint64_t Bishop(int sq, uint64_t occ)
{
return
dSubset[sq][(((occ & dMask[sq]) * file_b2_b7) >> 58)] |
aSubset[sq][(((occ & aMask[sq]) * file_b2_b7) >> 58)];
}
static uint64_t Rook(int sq, uint64_t occ)
{
return
hSubset[sq][(occ >> shift_horizontal_table[sq]) & 63] |
vSubset[sq][((((occ >> (sq & 7)) & file_a2_a7) * diag_c2h7) >> 58)];
}
static uint64_t Queen(int sq, uint64_t occ)
{
return Rook(sq, occ) | Bishop(sq, occ);
}
}
It appears all that can be done has been done to optimize this code. So for better or worse this ends an ~16 year quest to outperform Magic Bitboards.