{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T20:58:18Z","timestamp":1767819498751,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":20,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,21]]},"DOI":"10.1145\/3772429.3772431","type":"proceedings-article","created":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T13:59:08Z","timestamp":1766498348000},"page":"12-18","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Incentivizing Permissionless Distributed Learning of LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-3801-2076","authenticated-orcid":false,"given":"Joel","family":"Lidin","sequence":"first","affiliation":[{"name":"Templar, Dubai, United Arab Emirates"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3453-0540","authenticated-orcid":false,"given":"Amir","family":"Sarfi","sequence":"additional","affiliation":[{"name":"Templar, Ottawa, Ontario, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1270-4685","authenticated-orcid":false,"given":"Evangelos","family":"Pappas","sequence":"additional","affiliation":[{"name":"Templar, London, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1738-9963","authenticated-orcid":false,"given":"Samuel","family":"Dare","sequence":"additional","affiliation":[{"name":"Templar, Dubai, United Arab Emirates"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2986-596X","authenticated-orcid":false,"given":"Eugene","family":"Belilovsky","sequence":"additional","affiliation":[{"name":"MILA, Concordia University, Montreal, Quebec, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6107-8318","authenticated-orcid":false,"given":"Jacob","family":"Steeves","sequence":"additional","affiliation":[{"name":"Opentensor foundation, Lima, Peru"}]}],"member":"320","published-online":{"date-parts":[[2025,12,23]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Kwangjun Ahn and Byron Xu. 2025. Dion: A Communication-Efficient Optimizer for Large Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.05295 (2025)."},{"key":"e_1_3_3_2_3_2","unstructured":"Arasu Arun Adam\u00a0St Arnaud Alexey Titov Brian Wilcox Viktor Kolobaric Marc Brinkmann Oguzhan Ersoy Ben Fielding and Joseph Bonneau. 2025. Verde: Verification via Refereed Delegation for Machine Learning Programs. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.19405 (2025)."},{"key":"e_1_3_3_2_4_2","first-page":"335","volume-title":"NeurIPS 2021 Competitions and Demonstrations Track","author":"Borzunov Alexander","year":"2022","unstructured":"Alexander Borzunov, Max Ryabinin, Tim Dettmers, Quentin Lhoest, Lucile Saulnier, Michael Diskin, Yacine Jernite, and Thomas Wolf. 2022. Training transformers together. In NeurIPS 2021 Competitions and Demonstrations Track. PMLR, 335\u2013342."},{"key":"e_1_3_3_2_5_2","unstructured":"Arthur Douillard Qixuan Feng Andrei\u00a0A Rusu Rachita Chhaparia Yani Donchev Adhiguna Kuncoro Marc\u2019Aurelio Ranzato Arthur Szlam and Jiajun Shen. 2023. Diloco: Distributed low-communication training of language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.08105 (2023)."},{"key":"e_1_3_3_2_6_2","unstructured":"Priya Goyal Piotr Doll\u00e1r Ross Girshick Pieter Noordhuis Lukasz Wesolowski Aapo Kyrola Andrew Tulloch Yangqing Jia and Kaiming He. 2017. Accurate large minibatch sgd: Training imagenet in 1 hour. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1706.02677 (2017)."},{"key":"e_1_3_3_2_7_2","unstructured":"Sami Jaghouar Jack\u00a0Min Ong Manveer Basra Fares Obeid Jannik Straube Michael Keiblinger Elie Bakouch Lucas Atkins Maziyar Panahi Charles Goddard et\u00a0al. 2024. INTELLECT-1 Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.01152 (2024)."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/SP40001.2021.00106"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"crossref","unstructured":"Vivek Joshy. 2024. OpenSkill: A faster asymmetric multi-team multiplayer rating system. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.05451 (2024).","DOI":"10.21105\/joss.05901"},{"key":"e_1_3_3_2_10_2","first-page":"3252","volume-title":"International Conference on Machine Learning","author":"Karimireddy Sai\u00a0Praneeth","year":"2019","unstructured":"Sai\u00a0Praneeth Karimireddy, Quentin Rebjock, Sebastian Stich, and Martin Jaggi. 2019. Error feedback fixes signsgd and other gradient compression schemes. In International Conference on Machine Learning. PMLR, 3252\u20133261."},{"key":"e_1_3_3_2_11_2","unstructured":"Grigory Malinovsky Peter Richt\u00e1rik Samuel Horv\u00e1th and Eduard Gorbunov. 2023. Byzantine Robustness and Partial Participation Can Be Achieved at Once: Just Clip Gradient Differences. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.14127 (2023)."},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Guilherme Penedo Hynek Kydl\u00ed\u010dek Anton Lozhkov Margaret Mitchell Colin\u00a0A Raffel Leandro Von\u00a0Werra Thomas Wolf et\u00a0al. 2024. The fineweb datasets: Decanting the web for the finest text data at scale. Advances in Neural Information Processing Systems 37 (2024) 30811\u201330849.","DOI":"10.52202\/079017-0970"},{"key":"e_1_3_3_2_13_2","unstructured":"Bowen Peng Jeffrey Quesnelle and Diederik\u00a0P Kingma. 2024. Decoupled Momentum Optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.19870 (2024)."},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"crossref","unstructured":"Krishna Pillutla Sham\u00a0M Kakade and Zaid Harchaoui. 2022. Robust aggregation for federated learning. IEEE Transactions on Signal Processing 70 (2022) 1142\u20131154.","DOI":"10.1109\/TSP.2022.3153135"},{"key":"e_1_3_3_2_15_2","unstructured":"Xun Qian Peter Richt\u00e1rik and Tong Zhang. 2021. Error compensated distributed SGD can be accelerated. Advances in Neural Information Processing Systems 34 (2021) 30401\u201330413."},{"key":"e_1_3_3_2_16_2","unstructured":"Sashank Reddi Zachary Charles Manzil Zaheer Zachary Garrett Keith Rush Jakub Kone\u010dn\u1ef3 Sanjiv Kumar and H\u00a0Brendan McMahan. 2020. Adaptive federated optimization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2003.00295 (2020)."},{"key":"e_1_3_3_2_17_2","unstructured":"Lorenzo Sani Alex Iacob Zeyu Cao Bill Marino Yan Gao Tomas Paulik Wanru Zhao William\u00a0F Shen Preslav Aleksandrov Xinchi Qiu et\u00a0al. 2024. The future of large language model pre-training is federated. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.10853 (2024)."},{"key":"e_1_3_3_2_18_2","unstructured":"Shaohuai Shi Xiaowen Chu Ka\u00a0Chun Cheung and Simon See. 2019. Understanding top-k sparsification in distributed deep learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1911.08772 (2019)."},{"key":"e_1_3_3_2_19_2","unstructured":"Jacob Steeves Ala Shaabana Yuqian Hu Francois Luus Sin\u00a0Tai Liu and Jacqueline\u00a0Dawn Tasker-Steeves. 2022. Incentivizing Intelligence: The Bittensor Approach. https:\/\/ai-secure.github.io\/DMLW2022\/assets\/papers\/6.pdf"},{"key":"e_1_3_3_2_20_2","first-page":"36058","volume-title":"International Conference on Machine Learning","author":"Wang Jue","year":"2023","unstructured":"Jue Wang, Yucheng Lu, Binhang Yuan, Beidi Chen, Percy Liang, Christopher De\u00a0Sa, Christopher Re, and Ce Zhang. 2023. Cocktailsgd: Fine-tuning foundation models over 500mbps networks. In International Conference on Machine Learning. PMLR, 36058\u201336076."},{"key":"e_1_3_3_2_21_2","unstructured":"Cong Xie Oluwasanmi Koyejo and Indranil Gupta. 2018. Generalized byzantine-tolerant sgd. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1802.10116 (2018)."}],"event":{"name":"DAI '25: The Seventh International Conference on Distributed Artificial Intelligence","location":"London United Kingdom","acronym":"DAI '25"},"container-title":["Proceedings of the 2025 The Seventh International Conference on Distributed Artificial Intelligence"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3772429.3772431","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T19:42:59Z","timestamp":1767814979000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3772429.3772431"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,21]]},"references-count":20,"alternative-id":["10.1145\/3772429.3772431","10.1145\/3772429"],"URL":"https:\/\/doi.org\/10.1145\/3772429.3772431","relation":{},"subject":[],"published":{"date-parts":[[2025,11,21]]},"assertion":[{"value":"2025-12-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}