lichess.bib

@article{adnan:2024:unleashing-artificial-cognition-integrating-multiple-ai-systems,
  title         = {Unleashing Artificial Cognition: Integrating Multiple {AI} Systems},
  author        = {Muntasir Adnan and Buddhi Gamage and Zhiwei Xu and Damith Herath and Carlos C. N. Kuhn},
  year          = {2024},
  journal       = {CoRR},
  volume        = {abs/2408.04910},
  doi           = {10.48550/ARXIV.2408.04910},
  url           = {https://doi.org/10.48550/arXiv.2408.04910},
  eprinttype    = {arXiv},
  eprint        = {2408.04910},
}

@inproceedings{ambrona:2022:practical-algorithm-chess-unwinnability,
  title         = {A Practical Algorithm for Chess Unwinnability},
  author        = {Miguel Ambrona},
  year          = {2022},
  booktitle     = {11th International Conference on Fun with Algorithms, {FUN} 2022, May 30 to June 3, 2022, Island of Favignana, Sicily, Italy},
  publisher     = {Schloss Dagstuhl - Leibniz-Zentrum f{\"{u}}r Informatik},
  series        = {LIPIcs},
  volume        = {226},
  pages         = {2:1--2:20},
  doi           = {10.4230/LIPICS.FUN.2022.2},
  url           = {https://doi.org/10.4230/LIPIcs.FUN.2022.2},
  editor        = {Pierre Fraigniaud and Yushi Uno},
}

@article{banerjee:2024:skill-v-chance-card-board-games,
  title         = {Skill vs. Chance Quantification for Popular Card {\&} Board Games},
  author        = {Tathagata Banerjee and Anushka De and Subhamoy Maitra and Diganta Mukherjee},
  year          = {2024},
  journal       = {CoRR},
  volume        = {abs/2410.14363},
  doi           = {10.48550/ARXIV.2410.14363},
  url           = {https://doi.org/10.48550/arXiv.2410.14363},
  eprinttype    = {arXiv},
  eprint        = {2410.14363},
}

@inproceedings{barrish:2023:making-superhuman-ai-more-human,
  title         = {Making Superhuman {AI} More Human in Chess},
  author        = {Daniel Barrish and Steve Kroon and Brink van der Merwe},
  year          = {2023},
  booktitle     = {Advances in Computer Games - 18th International Conference, {ACG} 2023, Virtual Event, November 28-30, 2023, Revised Selected Papers},
  publisher     = {Springer},
  series        = {Lecture Notes in Computer Science},
  volume        = {14528},
  pages         = {3--14},
  doi           = {10.1007/978-3-031-54968-7\_1},
  url           = {https://doi.org/10.1007/978-3-031-54968-7\_1},
  editor        = {Michael Hartisch and Chu{-}Hsuan Hsueh and Jonathan Schaeffer},
  abstract      = {Computer chess research has traditionally focused on creating the strongest possible chess engine. Recently, however, attempts have been made to create engines that mimic the playing strength and style of human players. Our research proposes enhancements of models developed in this vein that more accurately imitate master-level players, as well as improve the prediction accuracy of existing models on weaker players. Our proposed enhancements are simple to apply by post-processing the output of existing chess engines. The performance of our enhancements was evaluated and compared using two metrics, prediction accuracy and average centipawn loss. We found that using an ensemble model over search depths maximised prediction accuracy, while an evaluation window filtering approach was preferable with respect to average centipawn loss.},
  keywords      = {Artificial intelligence, Chess, Action prediction},
}

@article{bart:2023:can-artificial-intelligence-identify-creativity,
  title         = {Can artificial intelligence identify creativity?: An empirical study},
  author        = {William Bart},
  year          = {2023},
  journal       = {Journal of Creativity},
  volume        = {33},
  number        = {2},
  pages         = {100057},
  doi           = {https://doi.org/10.1016/j.yjoc.2023.100057},
  issn          = {2713-3745},
  url           = {https://www.sciencedirect.com/science/article/pii/S271337452300016X},
  keywords      = {Chess, Creative move, Creativity, Stockfish 15, Artificial intelligence},
  abstract      = {This article reports an investigation of the extent to which a chess program with an artificial intelligence component (i.e., Stockfish with NNUE) can identify 10 chess moves that are recognized as outstanding chess moves. Stockfish with NNUE was able to identify seven of the ten moves. Although Stockfish with NNUE is a very powerful chess program, it has some limitations in identifying creative chess moves. There is a discussion of those limitations.},
}

@inproceedings{bertrand:2023:limitations-elo-real-world-games-transitive-not-additive,
  title         = {On the Limitations of the Elo, Real-World Games are Transitive, not Additive},
  author        = {Quentin Bertrand and Wojciech Marian Czarnecki and Gauthier Gidel},
  year          = {2023},
  booktitle     = {International Conference on Artificial Intelligence and Statistics, 25-27 April 2023, Palau de Congressos, Valencia, Spain},
  publisher     = {{PMLR}},
  series        = {Proceedings of Machine Learning Research},
  volume        = {206},
  pages         = {2905--2921},
  url           = {https://proceedings.mlr.press/v206/bertrand23a.html},
  editor        = {Francisco J. R. Ruiz and Jennifer G. Dy and Jan{-}Willem van de Meent},
}

@article{chowdhary:2023:quantifying-human-performance-chess,
  title         = {Quantifying human performance in chess},
  author        = {Chowdhary, Sandeep and Iacopini, Iacopo and Battiston, Federico},
  year          = {2023},
  month         = {Feb},
  day           = {06},
  journal       = {Scientific Reports},
  volume        = {13},
  number        = {1},
  pages         = {2113},
  doi           = {10.1038/s41598-023-27735-9},
  issn          = {2045-2322},
  url           = {https://doi.org/10.1038/s41598-023-27735-9},
  abstract      = {From sports to science, the recent availability of large-scale data has allowed to gain insights on the drivers of human innovation and success in a variety of domains. Here we quantify human performance in the popular game of chess by leveraging a very large dataset comprising of over 120 million games between almost 1 million players. We find that individuals encounter hot streaks of repeated success, longer for beginners than for expert players, and even longer cold streaks of unsatisfying performance. Skilled players can be distinguished from the others based on their gaming behaviour. Differences appear from the very first moves of the game, with experts tending to specialize and repeat the same openings while beginners explore and diversify more. However, experts experience a broader response repertoire, and display a deeper understanding of different variations within the same line. Over time, the opening diversity of a player tends to decrease, hinting at the development of individual playing styles. Nevertheless, we find that players are often not able to recognize their most successful openings. Overall, our work contributes to quantifying human performance in competitive settings, providing a first large-scale quantitative analysis of individual careers in chess, helping unveil the determinants separating elite from beginner performance.},
}

@article{chowdhury:2021:predicting-chess-openings-modelling-opponents,
  title         = {Predicting Chess Opening Through Modelling Of Chess Opponents},
  author        = {Chowdhury, Debarpan Bose and Sen, Banashree},
  year          = {2021},
  journal       = {Webology (ISSN: 1735-188X)},
  volume        = {18},
  number        = {6},
}

@inproceedings{comarela:2021:lightweight-approach-prediction-errors-chess,
  title         = {A lightweight approach for predicting errors in chess matches},
  author        = {Giovanni Comarela and Davi Silva},
  year          = {2021},
  booktitle     = {Anais do XVIII Encontro Nacional de Intelig\^{e}ncia Artificial e Computacional},
  location      = {Evento Online},
  publisher     = {SBC},
  address       = {Porto Alegre, RS, Brasil},
  pages         = {703--714},
  doi           = {10.5753/eniac.2021.18296},
  issn          = {2763-9061},
  url           = {https://sol.sbc.org.br/index.php/eniac/article/view/18296},
}

@article{czech:2020:learning-crazyhouse-above-world-champion-deep-neural-networks-human-data,
  title         = {Learning to Play the Chess Variant Crazyhouse Above World Champion Level With Deep Neural Networks and Human Data},
  author        = {Johannes Czech and Moritz Willig and Alena Beyer and Kristian Kersting and Johannes F{\"{u}}rnkranz},
  year          = {2020},
  journal       = {Frontiers Artif. Intell.},
  volume        = {3},
  pages         = {24},
  doi           = {10.3389/FRAI.2020.00024},
  url           = {https://doi.org/10.3389/frai.2020.00024},
}

@inproceedings{czech:2021:improving-alphazero-monte-carlo-graph-search,
  title         = {Improving AlphaZero Using Monte-Carlo Graph Search},
  author        = {Johannes Czech and Patrick Korus and Kristian Kersting},
  year          = {2021},
  booktitle     = {Proceedings of the Thirty-First International Conference on Automated Planning and Scheduling, {ICAPS} 2021, Guangzhou, China (virtual), August 2-13, 2021},
  publisher     = {{AAAI} Press},
  pages         = {103--111},
  url           = {https://ojs.aaai.org/index.php/ICAPS/article/view/15952},
  editor        = {Susanne Biundo and Minh Do and Robert Goldman and Michael Katz and Qiang Yang and Hankz Hankui Zhuo},
}

@inproceedings{das:2020:leveraging-rationales-human-task-performance,
  title         = {Leveraging rationales to improve human task performance},
  author        = {Das, Devleena and Chernova, Sonia},
  year          = {2020},
  booktitle     = {Proceedings of the 25th International Conference on Intelligent User Interfaces},
  location      = {Cagliari, Italy},
  publisher     = {Association for Computing Machinery},
  address       = {New York, NY, USA},
  series        = {IUI '20},
  pages         = {510–518},
  doi           = {10.1145/3377325.3377512},
  isbn          = {9781450371186},
  url           = {https://doi.org/10.1145/3377325.3377512},
  abstract      = {Machine learning (ML) systems across many application areas are increasingly demonstrating performance that is beyond that of humans. In response to the proliferation of such models, the field of Explainable AI (XAI) has sought to develop techniques that enhance the transparency and interpretability of machine learning methods. In this work, we consider a question not previously explored within the XAI and ML communities: Given a computational system whose performance exceeds that of its human user, can explainable AI capabilities be leveraged to improve the performance of the human? We study this question in the context of the game of Chess, for which computational game engines that surpass the performance of the average player are widely available. We introduce the Rationale-Generating Algorithm, an automated technique for generating rationales for utility-based computational methods, which we evaluate with a multi-day user study against two baselines. The results show that our approach produces rationales that lead to statistically significant improvement in human task performance, demonstrating that rationales automatically generated from an AI's internal task model can be used not only to explain what the system is doing, but also to instruct the user and ultimately improve their task performance.},
  numpages      = {9},
  keywords      = {explainable AI, machine learning},
}

@article{de-marzo:2023:complexity-similarity-chess-openings-community-data,
  title         = {Quantifying the complexity and similarity of chess openings using online chess community data},
  author        = {De Marzo, Giordano and Servedio, Vito D. P.},
  year          = {2023},
  month         = {Apr},
  day           = {01},
  journal       = {Scientific Reports},
  volume        = {13},
  number        = {1},
  pages         = {5327},
  doi           = {10.1038/s41598-023-31658-w},
  issn          = {2045-2322},
  url           = {https://doi.org/10.1038/s41598-023-31658-w},
  abstract      = {Chess is a centuries-old game that continues to be widely played worldwide. Opening Theory is one of the pillars of chess and requires years of study to be mastered. In this paper, we use the games played in an online chess platform to exploit the ``wisdom of the crowd'' and answer questions traditionally tackled only by chess experts. We first define a relatedness network of chess openings that quantifies how similar two openings are to play. Using this network, we identify communities of nodes corresponding to the most common opening choices and their mutual relationships. Furthermore, we demonstrate how the relatedness network can be used to forecast future openings players will start to play, with back-tested predictions outperforming a random predictor. We then apply the Economic Fitness and Complexity algorithm to measure the difficulty of openings and players' skill levels. Our study not only provides a new perspective on chess analysis but also opens the possibility of suggesting personalized opening recommendations using complex network theory.},
}

@misc{deletang:2024:generative-reinforcement-learning-with-transformers,
  title         = {Generative Reinforcement Learning with Transformers},
  author        = {Gregoire Deletang and Anian Ruoss and Li Kevin Wenliang and Elliot Catt and Tim Genewein and Jordi Grau-Moya and Marcus Hutter and Joel Veness},
  year          = {2024},
  url           = {https://openreview.net/forum?id=6qtDu7hVPF},
}

@inproceedings{demeter:2021:probing-learning-representation-language-models-closed-domains,
  title         = {Who{'}s on First?: Probing the Learning and Representation Capabilities of Language Models on Deterministic Closed Domains},
  author        = {Demeter, David  and Downey, Doug},
  year          = {2021},
  month         = nov,
  booktitle     = {Proceedings of the 25th Conference on Computational Natural Language Learning},
  publisher     = {Association for Computational Linguistics},
  address       = {Online},
  pages         = {210--222},
  doi           = {10.18653/v1/2021.conll-1.16},
  url           = {https://aclanthology.org/2021.conll-1.16},
  editor        = {Bisazza, Arianna  and Abend, Omri},
  abstract      = {The capabilities of today{'}s natural language processing systems are typically evaluated using large datasets of curated questions and answers. While these are critical benchmarks of progress, they also suffer from weakness due to artificial distributions and incomplete knowledge. Artifacts arising from artificial distributions can overstate language model performance, while incomplete knowledge limits fine-grained analysis. In this work, we introduce a complementary benchmarking approach based on SimPlified Language Activity Traces (SPLAT). SPLATs are corpora of language encodings of activity in some closed domain (we study traces from chess and baseball games in this work). SPLAT datasets use naturally-arising distributions, allow the generation of question-answer pairs at scale, and afford complete knowledge in their closed domains. We show that language models of three different architectures can answer questions about world states using only verb-like encodings of activity. Our approach is extensible to new language models and additional question-answering tasks.},
}

@inproceedings{ding:2024:easy2hard-bench,
  title         = {Easy2Hard-Bench: Standardized Difficulty Labels for Profiling LLM Performance and Generalization},
  author        = {Ding, Mucong and Deng, Chenghao and Choo, Jocelyn and Wu, Zichu and Agrawal, Aakriti and Schwarzschild, Avi and Zhou, Tianyi and Goldstein, Tom and Langford, John and Anandkumar, A. and Huang, Furong},
  year          = {2024},
  month         = {September},
  booktitle     = {NeurIPS 2024},
  url           = {https://www.microsoft.com/en-us/research/publication/easy2hard-bench-standardized-difficulty-labels-for-profiling-llm-performance-and-generalization/},
  abstract      = {While generalization over tasks from easy to hard is crucial to profile language models (LLMs), the datasets with fine-grained difficulty annotations for each problem across a broad range of complexity are still blank. Aiming to address this limitation, we present Easy2Hard-Bench, a consistently formatted collection of 6 benchmark datasets spanning various domains, such as mathematics and programming problems, chess puzzles, and reasoning questions. Each problem within these datasets is annotated with numerical difficulty scores. To systematically estimate problem difficulties, we collect abundant performance data on attempts to each problem by humans in the real world or LLMs on the prominent leaderboard. Leveraging the rich performance data, we apply well-established difficulty ranking systems, such as Item Response Theory (IRT) and Glicko-2 models, to uniformly assign numerical difficulty scores to problems. Moreover, datasets in Easy2Hard-Bench distinguish themselves from previous collections by a higher proportion of challenging problems. Through extensive experiments with six state-of-the-art LLMs, we provide a comprehensive analysis of their performance and generalization capabilities across varying levels of difficulty, with the aim of inspiring future research in LLM generalization. The datasets are available at https://huggingface.co/datasets/furonghuang-lab/Easy2Hard-Bench.},
}

@article{eisma:2024:turing-tests-chess-human-subjectivity,
  title         = {Turing Tests in Chess: An Experiment Revealing the Role of Human Subjectivity},
  author        = {Yke Bauke Eisma and Robin Koerts and Joost {de Winter}},
  year          = {2024},
  journal       = {Computers in Human Behavior Reports},
  pages         = {100496},
  doi           = {https://doi.org/10.1016/j.chbr.2024.100496},
  issn          = {2451-9588},
  url           = {https://www.sciencedirect.com/science/article/pii/S2451958824001295},
  abstract      = {With the growing capabilities of AI, technology is increasingly able to match or even surpass human performance. In the current study, focused on the game of chess, we investigated whether chess players could distinguish if they were playing against a human or a computer, and how they achieved this. A total of 24 chess players each played eight 5+0 Blitz games from different starting positions. They played against (1) a human, (2) Maia, a neural network-based chess engine trained to play in a human-like manner, (3) Stockfish 16, the best chess engine available, downgraded to play at a lower level, and (4) Stockfish 16 at its maximal level. The opponent's move time was fixed at 10 seconds. During the game, participants verbalized their thoughts, and after each game, they indicated by means of a questionnaire whether they thought they had played against a human or a machine and if there were particular moves that revealed the nature of the opponent. The results showed that Stockfish at the highest level was usually correctly identified as an engine, while Maia was often incorrectly identified as a human. The moves of the downgraded Stockfish were relatively often labeled as `strange' by the participants. In conclusion, the Turing test, as applied here in a domain where computers can perform superhumanly, is essentially a test of whether the chess computer can devise suboptimal moves that correspond to human moves, and not necessarily a test of computer intelligence.},
}

@inproceedings{feng:2023:chessgpt-policy-learning-language-modeling,
  title         = {ChessGPT: Bridging Policy Learning and Language Modeling},
  author        = {Xidong Feng and Yicheng Luo and Ziyan Wang and Hongrui Tang and Mengyue Yang and Kun Shao and David Mguni and Yali Du and Jun Wang},
  year          = {2023},
  booktitle     = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023},
  url           = {http://papers.nips.cc/paper\_files/paper/2023/hash/16b14e3f288f076e0ca73bdad6405f77-Abstract-Datasets\_and\_Benchmarks.html},
  editor        = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine},
}

@inproceedings{guntz:2018:role-emotion-problem-solving,
  title         = {The role of emotion in problem solving: first results from observing chess},
  author        = {Thomas Guntz and James L. Crowley and Dominique Vaufreydaz and Raffaella Balzarini and Philippe Dessus},
  year          = {2018},
  booktitle     = {Proceedings of the Workshop on Modeling Cognitive Processes from Multimodal Data, MCPMD\@ICMI 2018, Boulder, CO, USA, October 16, 2018},
  publisher     = {{ACM}},
  pages         = {12},
  url           = {http://dl.acm.org/citation.cfm?id=3279846},
}

@article{helfenstein:2024:checkmating-one-many-mixture-of-experts-mcts-improve-chess,
  title         = {Checkmating One, by Using Many: Combining Mixture of Experts with {MCTS} to Improve in Chess},
  author        = {Felix Helfenstein and Jannis Bl{\"{u}}ml and Johannes Czech and Kristian Kersting},
  year          = {2024},
  journal       = {CoRR},
  volume        = {abs/2401.16852},
  doi           = {10.48550/ARXIV.2401.16852},
  url           = {https://doi.org/10.48550/arXiv.2401.16852},
  eprinttype    = {arXiv},
  eprint        = {2401.16852},
}

@inproceedings{holdaway:2021:risk-taking-adversarial-games-what-billion-chess-games-tell-us,
  title         = {Risk-taking in adversarial games: What can 1 billion online chess games tell us?},
  author        = {Cameron Holdaway and Ed Vul},
  year          = {2021},
  booktitle     = {Proceedings of the 43rd Annual Meeting of the Cognitive Science Society, CogSci 2021, virtual, July 26-29, 2021},
  publisher     = {cognitivesciencesociety.org},
  url           = {https://escholarship.org/uc/item/403764rd},
  editor        = {W. Tecumseh Fitch and Claus Lamm and Helmut Leder and Kristin Te{\ss}mar{-}Raible},
}

@mastersthesis{hoque:2022:classification-anomaly-detection-chess,
  title         = {Classification of Chess Games: An Exploration of Classifiers for Anomaly Detection in Chess},
  author        = {Hoque, Masudul},
  year          = {2021},
  note          = {https://cornerstone.lib.mnsu.edu/etds/1119/},
  school        = {Minnesota State University, Mankato},
  type          = {Master's thesis},
  annote        = {Chess is a strategy board game with its inception dating back to the 15th century. The Covid-19 pandemic has led to a chess boom online with 95,853,038 chess games being played during January, 2021 on lichess.com. Along with the chess boom, instances of cheating have also become more rampant. Classifications have been used for anomaly detection in different fields and thus it is a natural idea to develop classifiers to detect cheating in chess. However, there are no specific examples of this, and it is difficult to obtain data where cheating has occurred. So, in this paper, we develop 4 machine learning classifiers, Linear Discriminant Analysis, Quadratic Discriminant Analysis, Multinomial Logistic Regression, and K-Nearest Neighbour classifiers to predict chess game results and explore predictors that produce the best accuracy performance. We use Confusion Matrix, K Fold Cross-Validation, and Leave-One-Out Cross-Validation methods to find the accuracy metrics. There are three phases of analysis. In phase I, we train classifiers using 1.94 million over the board game as training data and 20 thousand online games as testing data and obtain accuracy metrics. In phase II, we select a smaller pool of 212 games, select additional predictor variables from chess engine evaluation of the moves played in those games and check whether the inclusion of the variables improve performance. Finally, in phase III, we investigate for patterns in misclassified cases to define anomalies. From phase I, the models are not performing at a utilizable level of accuracy (44-63\%). For all classifiers, it is no better than deciding the class with a coin toss. K-Nearest Neighbour with K = 7 was the best model. In phase II, adding the new predictors improved the performance of all the classifiers significantly across all validation methods. In fact, using only significant variables as predictors produced highly accurate classifiers. Finally, from phase III, we could not find any patterns or significant differences between the predictors for both correct classifications and misclassifications. In conclusion, machine learning classification is only one useful tool to spot instances that indicates anomalies. However, we cannot simply judge anomalous games using only this method.},
}

@article{jenner:2024:evidence-lookahead-chess-neural-network,
  title         = {Evidence of Learned Look-Ahead in a Chess-Playing Neural Network},
  author        = {Erik Jenner and Shreyas Kapur and Vasil Georgiev and Cameron Allen and Scott Emmons and Stuart Russell},
  year          = {2024},
  journal       = {CoRR},
  volume        = {abs/2406.00877},
  doi           = {10.48550/ARXIV.2406.00877},
  url           = {https://doi.org/10.48550/arXiv.2406.00877},
  eprinttype    = {arXiv},
  eprint        = {2406.00877},
}

@inproceedings{karn:2024:personalized-recommendation-chess-puzzles,
  title         = {Personalized recommendation of chess puzzles},
  author        = {Karn, Aryan and Biradar, Chinmay Anil and Puranik, Aryan and Kireeti, Attili Krishna and Jayashree, R},
  year          = {2024},
  booktitle     = {Computer Science Engineering: Proceedings of the 1st International Conference on Computing and Intelligent Information Systems (ICCIIS 2024), Bangalore, India, 19-20th April, 2024 Volume 1},
  pages         = {29},
  organization  = {CRC Press},
}

@inproceedings{karvonen:2024:dictionary-learning-board-games,
  title         = {Measuring Progress in Dictionary Learning for Language Model Interpretability with Board Game Models},
  author        = {Adam Karvonen and Benjamin Wright and Can Rager and Rico Angell and Jannik Brinkmann and Logan Riggs Smith and Claudio Mayrink Verdun and David Bau and Samuel Marks},
  year          = {2024},
  booktitle     = {ICML 2024 Workshop on Mechanistic Interpretability},
  url           = {https://openreview.net/forum?id=qzsDKwGJyB},
}

@inproceedings{krishnan:2022:automatic-synthesis-interpretable-chess-tactics,
  title         = {Towards the automatic synthesis of interpretable chess tactics},
  author        = {Krishnan, Abhijeet and Martens, Chris},
  year          = {2022},
  month         = {03},
  booktitle     = {Proceedings of the Explainable Agency in Artificial Intelligence Workshop, 36th AAAI Conference on Artificial Intelligence},
  publisher     = {American Association of Artificial Intelligence},
  pages         = {91--97},
}

@inproceedings{krishnan:2022:synthesizing-interpretable-chess-tactics-player-games,
  title         = {Synthesizing interpretable chess tactics from player games},
  author        = {Krishnan, Abhijeet and Martens, Chris},
  year          = {2022},
  month         = {10},
  booktitle     = {Proceedings of the Workshop on Artificial Intelligence for Strategy Games (SG) and Esports Analytics (EA), 18th AAAI Conference on Artificial Intelligence and Interactive Digital Entertainment},
  publisher     = {American Association for Artificial Intelligence},
}

@article{kuperwajs:2024:learning-from-rewards-social-information-strategic-behavior,
  title         = {Learning from rewards and social information in naturalistic strategic behavior},
  author        = {Kuperwajs, Ionatan and van Opheusden, Bas and Russek, Evan and Griffiths, Tom},
  year          = {2024},
  month         = {Aug},
  publisher     = {PsyArXiv},
  doi           = {10.31234/osf.io/d8zje},
  url           = {osf.io/preprints/psyarxiv/d8zje},
}

@inproceedings{laarhoven:2022:transparent-cheat-detection-online-chess,
  title         = {Towards Transparent Cheat Detection in Online Chess: An Application of Human and Computer Decision-Making Preferences},
  author        = {Thijs Laarhoven and Aditya Ponukumati},
  year          = {2022},
  booktitle     = {Computers and Games - International Conference, {CG} 2022, Virtual Event, November 22-24, 2022, Revised Selected Papers},
  publisher     = {Springer},
  series        = {Lecture Notes in Computer Science},
  volume        = {13865},
  pages         = {163--180},
  doi           = {10.1007/978-3-031-34017-8\_14},
  url           = {https://doi.org/10.1007/978-3-031-34017-8\_14},
  editor        = {Cameron Browne and Akihiro Kishimoto and Jonathan Schaeffer},
}

@inproceedings{le-louedec:2019:chess-player-attention-prediction,
  title         = {Deep learning investigation for chess player attention prediction using eye-tracking and game data},
  author        = {Justin Le Louedec and Thomas Guntz and James L. Crowley and Dominique Vaufreydaz},
  year          = {2019},
  booktitle     = {Proceedings of the 11th {ACM} Symposium on Eye Tracking Research {\&} Applications, {ETRA} 2019, Denver , CO, USA, June 25-28, 2019},
  publisher     = {{ACM}},
  pages         = {1:1--1:9},
  doi           = {10.1145/3314111.3319827},
  url           = {https://doi.org/10.1145/3314111.3319827},
  editor        = {Krzysztof Krejtz and Bonita Sharif},
}

@article{maharaj:2022:gambits-theory-evidence,
  title         = {Gambits: Theory and evidence},
  author        = {Maharaj, Shiva and Polson, Nick and Turk, Christian},
  year          = {2022},
  journal       = {Applied Stochastic Models in Business and Industry},
  volume        = {38},
  number        = {4},
  pages         = {572--589},
  doi           = {https://doi.org/10.1002/asmb.2684},
  url           = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asmb.2684},
  keywords      = {adversarial risk analysis, AI, AlphaZero, behavioral economics, behavioral game theory, behavioral science, chess gambits, decision-making, deep learning, neural network, Q learning, rationality, skewness preference, Stafford Gambit, Stockfish 14},
  eprint        = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/asmb.2684},
  abstract      = {Abstract Gambits are central to human decision-making. Our goal is to provide a theory of Gambits. A Gambit is a combination of psychological and technical factors designed to disrupt predictable play. Chess provides an environment to study gambits and behavioral game theory. Our theory is based on the Bellman optimality path for sequential decision-making. This allows us to calculate the Q\$\$ Q \$\$-values of a Gambit where material (usually a pawn) is sacrificed for dynamic play. On the empirical side, we study the effectiveness of a number of popular chess Gambits. This is a natural setting as chess Gambits require a sequential assessment of a set of moves (a.k.a. policy) after the Gambit has been accepted. Our analysis uses Stockfish 14.1 to calculate the optimal Bellman Q\$\$ Q \$\$-values, which fundamentally measures if a position is winning or losing. To test whether Bellman's equation holds in play, we estimate the transition probabilities to the next board state via a database of expert human play. This then allows us to test whether the Gambiteer is following the optimal path in his decision-making. Our methodology is applied to the popular Stafford and reverse Stafford (a.k.a. Boden–Kieretsky–Morphy) Gambit and other common ones including the Smith–Morra, Goring, Danish and Halloween Gambits. We build on research in human decision-making by proving an irrational skewness preference within agents in chess. We conclude with directions for future research.},
}

@inproceedings{mcilroy-young:2020:aligning-superhuman-ai-human-behavior,
  title         = {Aligning Superhuman {AI} with Human Behavior: Chess as a Model System},
  author        = {Reid McIlroy{-}Young and Siddhartha Sen and Jon M. Kleinberg and Ashton Anderson},
  year          = {2020},
  booktitle     = {{KDD} '20: The 26th {ACM} {SIGKDD} Conference on Knowledge Discovery and Data Mining, Virtual Event, CA, USA, August 23-27, 2020},
  publisher     = {{ACM}},
  pages         = {1677--1687},
  doi           = {10.1145/3394486.3403219},
  url           = {https://doi.org/10.1145/3394486.3403219},
  editor        = {Rajesh Gupta and Yan Liu and Jiliang Tang and B. Aditya Prakash},
}

@article{mcilroy-young:2020:learning-personalized-models-human-behavior-chess,
  title         = {Learning Personalized Models of Human Behavior in Chess},
  author        = {Reid McIlroy{-}Young and Russell Wang and Siddhartha Sen and Jon M. Kleinberg and Ashton Anderson},
  year          = {2020},
  journal       = {CoRR},
  volume        = {abs/2008.10086},
  url           = {https://arxiv.org/abs/2008.10086},
  eprinttype    = {arXiv},
  eprint        = {2008.10086},
}

@inproceedings{mcilroy-young:2021:chess-stylometry,
  title         = {Detecting Individual Decision-Making Style: Exploring Behavioral Stylometry in Chess},
  author        = {Reid McIlroy{-}Young and Yu Wang and Siddhartha Sen and Jon M. Kleinberg and Ashton Anderson},
  year          = {2021},
  booktitle     = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual},
  pages         = {24482--24497},
  url           = {https://proceedings.neurips.cc/paper/2021/hash/ccf8111910291ba472b385e9c5f59099-Abstract.html},
  editor        = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan},
}

@inproceedings{mcilroy-young:2022:learning-models-individual-behavior-chess,
  title         = {Learning Models of Individual Behavior in Chess},
  author        = {Reid McIlroy{-}Young and Russell Wang and Siddhartha Sen and Jon M. Kleinberg and Ashton Anderson},
  year          = {2022},
  booktitle     = {{KDD} '22: The 28th {ACM} {SIGKDD} Conference on Knowledge Discovery and Data Mining, Washington, DC, USA, August 14 - 18, 2022},
  publisher     = {{ACM}},
  pages         = {1253--1263},
  doi           = {10.1145/3534678.3539367},
  url           = {https://doi.org/10.1145/3534678.3539367},
  editor        = {Aidong Zhang and Huzefa Rangwala},
}

@article{mok:2021:time-online-digital-well-being,
  title         = {The Complementary Nature of Perceived and Actual Time Spent Online in Measuring Digital Well-being},
  author        = {Mok, Lillio and Anderson, Ashton},
  year          = {2021},
  month         = apr,
  journal       = {Proc. ACM Hum.-Comput. Interact.},
  publisher     = {Association for Computing Machinery},
  address       = {New York, NY, USA},
  volume        = {5},
  number        = {CSCW1},
  doi           = {10.1145/3449160},
  url           = {https://doi.org/10.1145/3449160},
  issue_date    = {April 2021},
  abstract      = {As online platforms become ubiquitous, there is growing concern that their use can potentially lead to negative outcomes in users' personal lives, such as disrupted sleep and impacted social relationships. A central question in the literature studying these problematic effects is whether they are associated with the amount of time users spend on online platforms. This is often addressed by either analyzing self-reported measures of time spent online, which are generally inaccurate, or using objective metrics derived from server logs or tracking software. Nonetheless, how the two types of time measures comparatively relate to problematic effects -- whether they complement or are redundant with each other in predicting problematicity -- remains unknown. Additionally, transparent research into this question is hindered by the literature's focus on closed platforms with inaccessible data, as well as selective analytical decisions that may lead to reproducibility issues.In this work, we investigate how both self-reported and data-derived metrics of time spent relate to potentially problematic effects arising from the use of an open, non-profit online chess platform. These effects include disruptions to sleep, relationships, school and work performance, and self-control. To this end, we distributed a gamified survey to players and linked their responses with publicly-available game logs. We find problematic effects to be associated with both self-reported and data-derived usage measures to similar degrees. However, analytical models incorporating both self-reported and actual time explain problematic effects significantly more effectively than models with either type of measure alone. Furthermore, these results persist across thousands of possible analytical decisions when using a robust and transparent statistical framework. This suggests that the two methods of measuring time spent measure contain distinct, complementary information about problematic usage outcomes and should be used in conjunction with each other.},
  articleno     = {86},
  numpages      = {27},
  keywords      = {online well-being, problematic platform use, specification curve analysis, survey methodology},
}

@phdthesis{mok:2024:measuring-digital-welfare-online-systems,
  title         = {Measuring the Digital Welfare of Online Social Systems},
  author        = {Mok, Lillio},
  year          = {2024},
  note          = {http://hdl.handle.net/1807/140863},
  school        = {Univesity of Toronto},
  keywords      = {Computationsl Social Science, Data Science, Human-AI Interaction, Human-Computer Interaction, Web Science},
  annote        = {We rely on all manners of digital systems to organize and facilitate our human functions. From social networks connecting us to each other, to content providers keeping us perpetually entertained, to search engines serving each of our informational needs, to computational models informing us how healthy we are, to artificially-intelligent coaches supplementing our natural intelligence, every corner of human existence is permeated by the digital tools we create. Accompanying the boons of these systems, however, are increasingly complex risks to our digital health. Our attention is pulled into cyberspace via algorithms that use billions of datapoints to learn what we like, sometimes to the detriment of our physical wellness. Ideological rifts online threaten our societal harmony as partisans become ever more polarized, whose obsession with political content in turn feeds the underbelly of our social media ecosystem. All the while, the same data underpinning these online interactions also allow others to make finely-optimized decisions about us, often to the detriment of the disadvantaged. This thesis offers a more optimistic vision: that the same computational infrastructure powering our potentially perilous systems can be repurposed to help us understand their perils. We first outline a framework for rigorously assessing the welfare of our digital systems through the well-being of individual users, the cohesion of user communities, and whether the systems themselves deserve trust. We then utilize this framework to conduct four empirical studies measuring the extent to which digital welfare is preserved or endangered by data-driven systems. At the level of individual users, we directly measure how spending time on a large-scale chess platform, Lichess, can be perceived as detrimental to personal well-being. We find that perceived harms are explained not only by the time that people believe they spend online, but also the actual time they spend engaging with the platform. For groups of users, we quantify how partisan users on the Reddit platform are selective towards politically-congruent news outlets, thus consuming and disseminating polarized news. Despite the platform appearing polarized on aggregate, we discover that narrow, hyper-partisan communities are responsible for deeply-ingrained ideological segregation. We then extend this result by identifying whether key individuals can influence the news consumption cycle on Reddit. Through an analysis of where news about political figures is shared on Reddit and the language it attracts, we illustrate that nationally-recognizable politicians are selectively discussed more by in-group online communities than they are by in-group news outlets. Out-group communities, on the other hand, generate the most toxic and hateful commentary. At the level of problematic downstream outcomes, we further probe whether people can tell when systems like algorithmic risk assessments harm data subjects in unfair ways. We find that observers are easily distracted by who makes risk assessments rather than how equitable the assessments are, suggesting that the task of welfare measurement itself needs to be made accessible for laypeople at large. This thesis posits that the online social systems jeopardizing our collective welfare can also be used to understand the very dangers they pose. By empirically measuring how well people are doing when they use or are impacted by these systems, we in turn empirically demonstrate the feasibility of this ideal. We conclude by speculating on the imminent ubiquity of artificial intelligence in our cyber-environment and their implications for the work in this thesis.},
  type          = {Doctoral Thesis},
}

@inproceedings{muecke:2022:check-mate-sanity-check-trustworthy-ai,
  title         = {Check Mate: {A} Sanity Check for Trustworthy {AI}},
  author        = {Sascha M{\"{u}}cke and Lukas Pfahler},
  year          = {2022},
  booktitle     = {Proceedings of the {LWDA} 2022 Workshops: FGWM, FGKD, and FGDB, Hildesheim (Germany), Oktober 5-7th, 2022},
  publisher     = {CEUR-WS.org},
  series        = {{CEUR} Workshop Proceedings},
  volume        = {3341},
  pages         = {91--103},
  url           = {https://ceur-ws.org/Vol-3341/KDML-LWDA\_2022\_CRC\_8977.pdf},
  editor        = {Pascal Reuss and Viktor Eisenstadt and Jakob Michael Sch{\"{o}}nborn and Jero Sch{\"{a}}fer},
}

@inproceedings{mujagic:2024:predictive-analysis-chess-player-performance-maching-learning,
  title         = {Predictive Analysis of Chess Player Performance: An Analysis of Factors Influencing Competitive Success Using Machine Learning Techniques},
  author        = {Mujagi{\'{c}}, Amar and Mujagi{\'{c}}, Adnan and Mehanovi{\'{c}}, D{\v{z}}elila},
  year          = {2024},
  booktitle     = {Advanced Technologies, Systems, and Applications IX},
  publisher     = {Springer Nature Switzerland},
  address       = {Cham},
  pages         = {392--408},
  isbn          = {978-3-031-71694-2},
  editor        = {Ademovi{\'{c}}, Naida and Ak{\v{s}}amija, Zlatan and Karabegovi{\'{c}}, Almir},
  abstract      = {The world of competitive chess has long been a captivating arena for intellectual competition, where human intelligence, strategic thinking, and long-term planning converge. This study delves into the intricate web of factors that influence a chess player's competitive success through the lens of predictive modeling and machine learning techniques.},
}

@misc{narayanan:2023:improving-strength-human-models-chess,
  title         = {Improving the Strength of Human-Like Models in Chess},
  author        = {Saumik Narayanan and Kassa Korley and Chien-Ju Ho and Siddhartha Sen},
  year          = {2023},
  url           = {https://openreview.net/forum?id=fJY2iCssvIs},
}

@misc{nie:2024:discovering-high-quality-chess-puzzles-offline-reinforcement-learning,
  title         = {Discovering High-Quality Chess Puzzles Through One Billion Plays with Offline Reinforcement Learning},
  author        = {Allen Nie and Anirudhan Badrinath and Nicholas Tomlin and Timothy Dai and Carissa Yip and Rose E Wang and Emma Brunskill and Christopher J Piech},
  year          = {2024},
  url           = {https://openreview.net/forum?id=YKW98Icu1X},
}

@mastersthesis{o-rourke:2024:alternative-chess-rating-model-latent-variables,
  title         = {An alternative chess rating model based on latent variables},
  author        = {O'Rourke, Patrick},
  year          = {2024},
  note          = {http://dx.doi.org/10.13140/RG.2.2.18931.13604},
  school        = {University College Dublin},
  type          = {Master's thesis},
  annote        = {The ranking of players and particularly of chess players has been a topic of debate throughout the last 80 years. Such exploration spawned what has become the benchmark of evaluating professional chess players since the 1970s: the Elo rating model. The Elo system, the first to have a sound statistical basis, was designed by Elo (1978) from the assumption that the performance of a player in a game is a normally distributed random variable Alliot (2017). However, this ranking model is not without its limitations and such has led to extreme rating deflation of the World Chess Federation (FIDE) Standard Elo rating system Sonas (2023). Such attention on the FIDE's rating mechanism has ignited focus on the Elo system's drawbacks which we will address in this dissertation.},
}

@misc{omori:2024:chess-rating-estimation-moves,
  title         = {Chess Rating Estimation from Moves and Clock Times Using a CNN-LSTM},
  author        = {Michael Omori and Prasad Tadepalli},
  year          = {2024},
  url           = {https://arxiv.org/abs/2409.11506},
  eprint        = {2409.11506},
  archiveprefix = {arXiv},
  primaryclass  = {cs.LG},
}

@inproceedings{palsson:2024:empirical-evaluation-concept-probing-game-playing-agents,
  title         = {Empirical Evaluation of Concept Probing for Game-Playing Agents},
  author        = {A{\dh}alsteinn P{\'{a}}lsson and Yngvi Bj{\"{o}}rnsson},
  year          = {2024},
  booktitle     = {{ECAI} 2024 - 27th European Conference on Artificial Intelligence, 19-24 October 2024, Santiago de Compostela, Spain - Including 13th Conference on Prestigious Applications of Intelligent Systems {(PAIS} 2024)},
  publisher     = {{IOS} Press},
  series        = {Frontiers in Artificial Intelligence and Applications},
  volume        = {392},
  pages         = {874--881},
  doi           = {10.3233/FAIA240574},
  url           = {https://doi.org/10.3233/FAIA240574},
  editor        = {Ulle Endriss and Francisco S. Melo and Kerstin Bach and Alberto Jos{\'{e}} Bugar{\'{\i}}n Diz and Jose Maria Alonso{-}Moral and Sen{\'{e}}n Barro and Fredrik Heintz},
}

@inproceedings{patria:2021:cheat-detection-online-chess,
  title         = {Cheat Detection on Online Chess Games using Convolutional and Dense Neural Network},
  author        = {Patria, Reyhan and Favian, Sean and Caturdewa, Anggoro and Suhartono, Derwin},
  year          = {2021},
  booktitle     = {2021 4th International Seminar on Research of Information Technology and Intelligent Systems (ISRITI)},
  pages         = {389--395},
  doi           = {10.1109/ISRITI54043.2021.9702792},
  keywords      = {Seminars;Neural networks;Games;Convolutional neural networks;Intelligent systems;Information technology;Engines;Cheat Detection;Online Chess Games;Convolutional Neural Network;Dense Neural Network;Neural Network},
}

@inproceedings{puri:2020:explain-your-move,
  title         = {Explain Your Move: Understanding Agent Actions Using Specific and Relevant Feature Attribution},
  author        = {Nikaash Puri and Sukriti Verma and Piyush Gupta and Dhruv Kayastha and Shripad V. Deshmukh and Balaji Krishnamurthy and Sameer Singh},
  year          = {2020},
  booktitle     = {8th International Conference on Learning Representations, {ICLR} 2020, Addis Ababa, Ethiopia, April 26-30, 2020},
  publisher     = {OpenReview.net},
  url           = {https://openreview.net/forum?id=SJgzLkBKPB},
}

@inproceedings{qian:2020:comparative-study-online-chess-educational-products,
  title         = {A Comparative Study of Chess Online Educational Products},
  author        = {Dong, Qian and Miao, Rong},
  year          = {2020},
  booktitle     = {Blended Learning. Education in a Smart Learning Environment: 13th International Conference, ICBL 2020, Bangkok, Thailand, August 24–27, 2020, Proceedings},
  location      = {Bangkok, Thailand},
  publisher     = {Springer-Verlag},
  address       = {Berlin, Heidelberg},
  pages         = {101–113},
  doi           = {10.1007/978-3-030-51968-1_9},
  isbn          = {978-3-030-51967-4},
  url           = {https://doi.org/10.1007/978-3-030-51968-1_9},
  abstract      = {With the development of technology, more and more online educational products emerge in chess, which makes it difficult for different users to choose from. It's important to develop methodologies to assist different levels chess players to learn in varies environment. List method and rubric evaluation has been conducted, and advice has been put forward based on this approach. The results show that chess online educational products were rich in content and full featured, which could be divided into four categories: overall ecology, video tutorial, tactical training, live broadcast product. However, products still need to improve in product positioning and user experience to promote the development of chess online education.},
  numpages      = {13},
  keywords      = {Chess, Online education, Products, Comparative study},
}

@inproceedings{rabii:2021:revealing-game-dynamics-word-embeddings,
  title         = {Revealing Game Dynamics via Word Embeddings of Gameplay Data},
  author        = {Youn{\`{e}}s Rabii and Michael Cook},
  year          = {2021},
  booktitle     = {Proceedings of the Seventeenth {AAAI} Conference on Artificial Intelligence and Interactive Digital Entertainment, {AIIDE} 2021, virtual, October 11-15, 2021},
  publisher     = {{AAAI} Press},
  pages         = {187--194},
  url           = {https://ojs.aaai.org/index.php/AIIDE/article/view/18907},
  editor        = {David Thue and Stephen G. Ware},
}

@inproceedings{rosemarin:2019:playing-chess-human-level-style,
  title         = {Playing Chess at a Human Desired Level and Style},
  author        = {Hanan Rosemarin and Ariel Rosenfeld},
  year          = {2019},
  booktitle     = {Proceedings of the 7th International Conference on Human-Agent Interaction, {HAI} 2019, Kyoto, Japan, October 06-10, 2019},
  publisher     = {{ACM}},
  pages         = {76--80},
  doi           = {10.1145/3349537.3351904},
  url           = {https://doi.org/10.1145/3349537.3351904},
  editor        = {Natsuki Oka and Tomoko Koda and Mohammad Obaid and Hideyuki Nakanishi and Omar Mubin and Kazuaki Tanaka},
  abstract      = {Human chess players prefer training with human opponents over chess agents as the latter are distinctively different in level and style than humans. Chess agents designed for human-agent play are capable of adjusting their level, however their style is not aligned with that of human players. In this paper, we propose a novel approach for designing such agents by integrating the theory of chess players' decision-making with a state-of-the-art Monte Carlo Tree Search (MCTS) algorithm. We demonstrate the benefits of our approach using two sets of analyses. Quantitatively, we establish that the agents attain their desired Elo ratings. Qualitatively, through a Turing-inspired test with a human chess expert, we show that our agents are indistinguishable from human players.},
  keywords      = {chess, game playing agents, human-agent play},
}

@inproceedings{ruoss:2024:amortized-planning-transformers-case-study-chess,
  title         = {Amortized Planning with Large-Scale Transformers: A Case Study on Chess},
  author        = {Anian Ruoss and Gregoire Deletang and Sourabh Medapati and Jordi Grau-Moya and Li Kevin Wenliang and Elliot Catt and John Reid and Cannada A. Lewis and Joel Veness and Tim Genewein},
  year          = {2024},
  booktitle     = {The Thirty-eighth Annual Conference on Neural Information Processing Systems},
  url           = {https://openreview.net/forum?id=XlpipUGygX},
}

@article{russel:2022:thinking-online-chess-computation,
  title         = {Time spent thinking in online chess reflects the value of computation},
  author        = {Russek, Evan and Acosta-Kane, Daniel and van Opheusden, Bas and Mattar, Marcelo and Griffiths, Tom},
  year          = {2022},
  journal       = {PsyArXiv},
  doi           = {10.31234/osf.io/8j9zx},
  url           = {https://doi.org/10.31234/osf.io/8j9zx},
  abstract      = {Although artificial intelligence systems can now outperform humans in a variety of domains, they still lag behind in the ability to arrive at good solutions to problems using limited resources. Recent proposals have suggested that the key to this cognitive efficiency is intelligent selection of the situations in which computational resources are spent. We tested this hypothesis in the domain of complex planning by analyzing how humans managed time available for thinking in over 12 million online chess matches. We found that players spent more time thinking in board positions where planning was more beneficial. This effect was greater in stronger players, and additionally strengthened by considering only the information available to the player at the time of choice. Finally, we found that the qualitative features of this relationship were consistent with a policy that considers the empirically-measured cost of spending time in chess. This provides evidence that human efficiency is supported by intelligent selection of when to apply computation.},
}

@techreport{salant:2022:complexity-satisficing-theory-evidence-chess,
  title         = {Complexity and Satisficing: Theory with Evidence from Chess},
  author        = {Salant, Yuval and Spenkuch, Jorg L},
  year          = {2022},
  month         = {April},
  series        = {Working Paper Series},
  number        = {30002},
  doi           = {10.3386/w30002},
  url           = {http://www.nber.org/papers/w30002},
  institution   = {National Bureau of Economic Research},
  type          = {Working Paper},
  abstract      = {We develop a satisficing model of choice in which the available alternatives differ in their inherent complexity. We assume--and experimentally validate--that complexity leads to errors in the perception of alternatives' values. The model yields sharp predictions about the effect of complexity on choice probabilities, some of which qualitatively contrast with those of maximization-based choice models. We confirm the predictions of the satisficing model--and thus reject maximization--in a novel data set with information on hundreds of millions of real-world chess moves by highly experienced players. These findings point to the importance of complexity and satisficing for decision making outside of the laboratory.},
}

@article{sanjaya:2022-non-transitivity-chess,
  title         = {Measuring the Non-Transitivity in Chess},
  author        = {Ricky Sanjaya and Jun Wang and Yaodong Yang},
  year          = {2022},
  journal       = {Algorithms},
  volume        = {15},
  number        = {5},
  pages         = {152},
  doi           = {10.3390/A15050152},
  url           = {https://doi.org/10.3390/a15050152},
}

@misc{schultz:2024:mastering-board-games-external-internal-planning-language-models,
  title         = {Mastering Board Games by External and Internal Planning with Language Models},
  author        = {John Schultz and Jakub Adamek and Matej Jusup and Marc Lanctot and Michael Kaisers and Sarah Perrin and Daniel Hennes and Jeremy Shar and Cannada Lewis and Anian Ruoss and Tom Zahavy and Petar Veli\v{c}kovi\'{c} and Laurel Prince and Satinder Singh and Eric Malmi and Nenad Toma\v{s}ev},
  year          = {2024},
  url           = {https://arxiv.org/abs/2412.12119},
  eprint        = {2412.12119},
  archiveprefix = {arXiv},
  primaryclass  = {cs.AI},
}

@inproceedings{schwarzschild:2021:can-you-learn-algorithm-easy-hard-examples,
  title         = {Can You Learn an Algorithm? Generalizing from Easy to Hard Problems with Recurrent Networks},
  author        = {Avi Schwarzschild and Eitan Borgnia and Arjun Gupta and Furong Huang and Uzi Vishkin and Micah Goldblum and Tom Goldstein},
  year          = {2021},
  booktitle     = {Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, December 6-14, 2021, virtual},
  pages         = {6695--6706},
  url           = {https://proceedings.neurips.cc/paper/2021/hash/3501672ebc68a5524629080e3ef60aef-Abstract.html},
  editor        = {Marc'Aurelio Ranzato and Alina Beygelzimer and Yann N. Dauphin and Percy Liang and Jennifer Wortman Vaughan},
}

@article{schwarzschild:2021:datasets-easy-hard-examples,
  title         = {Datasets for Studying Generalization from Easy to Hard Examples},
  author        = {Avi Schwarzschild and Eitan Borgnia and Arjun Gupta and Arpit Bansal and Zeyad Emam and Furong Huang and Micah Goldblum and Tom Goldstein},
  year          = {2021},
  journal       = {CoRR},
  volume        = {abs/2108.06011},
  url           = {https://arxiv.org/abs/2108.06011},
  eprinttype    = {arXiv},
  eprint        = {2108.06011},
}

@article{setiawan:2018:analysis-chess-skills-mathematics-learning,
  title         = {Analysis of Chess Playing Skills on Mathematics Learning Outcomes Junior Athletes Raja Kombi Trenggalek Chess Club},
  author        = {Setiawan, Andika Yogi and Pratama, Henri Gunawan},
  journal       = {PHEDHERAL},
  volume        = {18},
  number        = {1},
  pages         = {37--46},
  doi           = {10.20961/phduns.v18i1.51318},
  url           = {https://doi.org/10.20961/phduns.v18i1.51318},
  keywords      = {Analysis, Chess Skills, Mathematics Learning Outcomes},
  abstract      = {
    This study aims to determine the results of the analysis of chess playing skills on mathematics learning outcomes for junior athletes of the Raja Kombi Trenggalek chess club. The research method used is a qualitative descriptive method with a quantitative approach. Participants in this study were 8 junior athletes of Raja Kombi Trenggalek chess club. Data collection techniques using interviews, skills results and documentation. The data analysis in this study used the mean and percentage formula.

    After analyzing the data, the results of this study concluded that in this study, the average score of playing chess skills was 85.00, then the average score of mathematics learning outcomes was 86.25. This is of course the higher the level of achievement of skills or intellectual intelligence, the higher the level of problem solving such as in learning mathematics. This is also influenced by motor and psychological aspects as a support for intelligence skills that affect the thinking of athletes. Then from the data analysis it can be said that the higher the level of achievement of chess playing skills, the higher the level of problem solving as in learning mathematics
  },
}

@article{song:2023:investigation-sicilian-defense,
  title         = {Investigation of the Sicilian Defense: Winning rates and strategic discrimination},
  author        = {Song, Ziming},
  year          = {2023},
  journal       = {Interdisciplinary Humanities and Communication Studies},
  volume        = {1},
  number        = {4},
}

@misc{stanek:2024:bad-crypto-chessography-weak-randomness-chess-games,
  title         = {Bad Crypto: Chessography and Weak Randomness of Chess Games},
  author        = {Martin Stanek},
  year          = {2024},
  url           = {https://arxiv.org/abs/2412.09742},
  eprint        = {2412.09742},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CR},
}

@inproceedings{stockl:2021:watching-language-model-learning-chess,
  title         = {Watching a Language Model Learning Chess},
  author        = {St{\"o}ckl, Andreas},
  year          = {2021},
  month         = sep,
  booktitle     = {Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)},
  publisher     = {INCOMA Ltd.},
  address       = {Held Online},
  pages         = {1369--1379},
  url           = {https://aclanthology.org/2021.ranlp-1.153},
  editor        = {Mitkov, Ruslan  and Angelova, Galia},
  abstract      = {We analyse how a transformer-based language model learns the rules of chess from text data of recorded games. We show how it is possible to investigate how the model capacity and the available number of training data influence the learning success of a language model with the help of chess-specific metrics. With these metrics, we show that more games used for training in the studied range offers significantly better results for the same training time. However, model size does not show such a clear influence. It is also interesting to observe that the usual evaluation metrics for language models, predictive accuracy and perplexity, give no indication of this here. Further examination of trained models reveals how they store information about board state in the activations of neuron groups, and how the overall sequence of previous moves influences the newly-generated moves.},
}

@inproceedings{tay:2023:social-status-competitors-cause-decision-maker-errors,
  title         = {Can higher social status of competitors cause decision makers to commit more errors?},
  author        = {Tay, Li Qian},
  year          = {2023},
  booktitle     = {Proceedings of the Annual Meeting of the Cognitive Science Society},
  volume        = {45},
  url           = {https://escholarship.org/uc/item/85d620jz},
}

@inproceedings{tijhuis:2023:predicting-chess-rating-single-game,
  title         = {Predicting Chess Player Rating Based on a Single Game},
  author        = {Tim Tijhuis and Paris Mavromoustakos Blom and Pieter Spronck},
  year          = {2023},
  booktitle     = {{IEEE} Conference on Games, CoG 2023, Boston, MA, USA, August 21-24, 2023},
  publisher     = {{IEEE}},
  pages         = {1--8},
  doi           = {10.1109/COG57401.2023.10333133},
  url           = {https://doi.org/10.1109/CoG57401.2023.10333133},
}

@misc{wang:2024:explore-reasoning-capability-llms-chess-testbed,
  title         = {Explore the Reasoning Capability of LLMs in the Chess Testbed},
  author        = {Shu Wang and Lei Ji and Renxi Wang and Wenxiao Zhao and Haokun Liu and Yifan Hou and Ying Nian Wu},
  year          = {2024},
  url           = {https://arxiv.org/abs/2411.06655},
  eprint        = {2411.06655},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CL},
}

@inproceedings{wieczerzak:2022:dataset-experimental-investigation-chess-position-evaluation-neural-network,
  title         = {Dataset Related Experimental Investigation of Chess Position Evaluation Using a Deep Neural Network},
  author        = {Dawid Wieczerzak and Pawel Czarnul},
  year          = {2022},
  booktitle     = {Parallel Processing and Applied Mathematics - 14th International Conference, {PPAM} 2022, Gdansk, Poland, September 11-14, 2022, Revised Selected Papers, Part {I}},
  publisher     = {Springer},
  series        = {Lecture Notes in Computer Science},
  volume        = {13826},
  pages         = {429--440},
  doi           = {10.1007/978-3-031-30442-2\_32},
  url           = {https://doi.org/10.1007/978-3-031-30442-2\_32},
  editor        = {Roman Wyrzykowski and Jack J. Dongarra and Ewa Deelman and Konrad Karczewski},
}

@inproceedings{yamada:2023:estimating-online-ratings-decision-tree,
  title         = {A Method for Estimating Online Chess Game Player Ratings with Decision Tree},
  author        = {Habuki Yamada and Nobuko Kishi and Masato Oguchi and Miyuki Nakano},
  year          = {2023},
  booktitle     = {{IEEE} International Conference on Big Data and Smart Computing, BigComp 2023, Jeju, Republic of Korea, February 13-16, 2023},
  publisher     = {{IEEE}},
  pages         = {320--321},
  doi           = {10.1109/BIGCOMP57234.2023.00066},
  url           = {https://doi.org/10.1109/BigComp57234.2023.00066},
  editor        = {Hyeran Byun and Beng Chin Ooi and Katsumi Tanaka and Sang{-}Won Lee and Zhixu Li and Akiyo Nadamoto and Giltae Song and Young{-}Guk Ha and Kazutoshi Sumiya and Yuncheng Wu and Hyuk{-}Yoon Kwon and Takehiro Yamamoto},
}

@article{zaidi:2024:predicting-user-perception-move-brilliance-chess,
  title         = {Predicting User Perception of Move Brilliance in Chess},
  author        = {Kamron Zaidi and Michael Guerzhoy},
  year          = {2024},
  journal       = {CoRR},
  volume        = {abs/2406.11895},
  doi           = {10.48550/ARXIV.2406.11895},
  url           = {https://doi.org/10.48550/arXiv.2406.11895},
  eprinttype    = {arXiv},
  eprint        = {2406.11895},
}

@mastersthesis{zelek:2022:topological-data-analysis-chess,
  title         = {Topological Data Analysis in chess},
  author        = {Zelek, Jakub},
  year          = {2022},
  note          = {https://ruj.uj.edu.pl/xmlui/handle/item/295689},
  school        = {Jagiellonian University},
  type          = {Master's thesis},
  keywords      = {Chess, Topological Data Analysis, Design Patterns, Data modeling, Modules, Category theory, Topology},
  annote        = {This thesis uses Topological Data Analysis to examine the data collectedfrom the lichess.org portal. The analysis was based on the games of players playing at different levels. The purpose of the analysis was to distinguish groups of players and players with the highest ranking from eachother. Each player's game is represented by a multidimensional vectorthat encodes information about the course of the game. There are threeapproaches to creating this vector, allowing us to focus on different aspects of the chess game. The proposed analysis was carried out with theintention of verifying the Topological Data Analysis as a tool for analyzing chess games. As a result, it was shown that Topological Data Analysiscan be a potential tool for recognizing the quality of a given player, if wehave enough number of his games, and to reconstruct player rankings. Asignificant result is also the potential for further research for which this thesis could be the foundation.},
}