{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T15:45:04Z","timestamp":1772034304909,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,14]]},"DOI":"10.1145\/3677052.3698691","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:38:06Z","timestamp":1731566286000},"page":"669-676","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Mixtures of Experts for Scaling up Neural Networks in Order Execution"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7847-9292","authenticated-orcid":false,"given":"Kang","family":"Li","sequence":"first","affiliation":[{"name":"Department of Statistics, University of Oxford, United Kindom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8464-2152","authenticated-orcid":false,"given":"Mihai","family":"Cucuringu","sequence":"additional","affiliation":[{"name":"Department of Statistics &amp; Mathematical Institute and Oxford-Man Institute of Quantitative Finance, University of Oxford, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6447-7105","authenticated-orcid":false,"given":"Leandro","family":"S\u00e1nchez-Betancourt","sequence":"additional","affiliation":[{"name":"Mathematical Institute, Oxford-Man Institute of Quantitative Finance, University of Oxford, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4405-5700","authenticated-orcid":false,"given":"Timon","family":"Willi","sequence":"additional","affiliation":[{"name":"Foerster Lab for AI Research, University of Oxford, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.21314\/JOR.2001.041"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-022-06250-4"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1137\/21M1407756"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1904.12066"},{"key":"e_1_3_2_1_5_1","volume-title":"Clustering approaches for financial data analysis: a survey. arXiv preprint arXiv:1609.08520","author":"Cai Fan","year":"2016","unstructured":"Fan Cai, Nhien-An Le-Khac, and Tahar Kechadi. 2016. Clustering approaches for financial data analysis: a survey. arXiv preprint arXiv:1609.08520 (2016)."},{"key":"e_1_3_2_1_6_1","volume-title":"Algorithmic and high-frequency trading","author":"Cartea \u00c1lvaro","unstructured":"\u00c1lvaro Cartea, Sebastian Jaimungal, and Jos\u00e9 Penalva. 2015. Algorithmic and high-frequency trading. Cambridge University Press."},{"key":"e_1_3_2_1_7_1","volume-title":"Machine Learning and Data Sciences for Financial Markets: A Guide to Contemporary Practices, Charles-Albert Lehalle and Agostino Capponi (Eds.)","author":"Cartea \u00c1lvaro","unstructured":"\u00c1lvaro Cartea, Sebastian Jaimungal, and Leandro S\u00e1nchez-Betancourt. 2023. Reinforcement Learning for Algorithmic Trading. In Machine Learning and Data Sciences for Financial Markets: A Guide to Contemporary Practices, Charles-Albert Lehalle and Agostino Capponi (Eds.). Cambridge University Press."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1137\/21M1456467"},{"key":"e_1_3_2_1_9_1","volume-title":"Detecting Lead-Lag Relationships in Stock Returns and Portfolio Strategies. SSRN 4599565","author":"Cartea \u00c1lvaro","year":"2023","unstructured":"\u00c1lvaro Cartea, Mihai Cucuringu, and Qi Jin. 2023. Detecting Lead-Lag Relationships in Stock Returns and Portfolio Strategies. SSRN 4599565 (2023). https:\/\/ssrn.com\/abstract=4599565"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00780-022-00491-w"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","unstructured":"Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. https:\/\/doi.org\/10.48550\/arXiv.1412.3555 arXiv:1412.3555.","DOI":"10.48550\/arXiv.1412.3555"},{"key":"e_1_3_2_1_12_1","unstructured":"Samuel Coward Michael Beukman and Jakob Foerster. 2024. JaxUED: A simple and useable UED library in Jax. http:\/\/arxiv.org\/abs\/2403.13091 arXiv:2403.13091 [cs]."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Christian\u00a0Schroeder de Witt Tarun Gupta Denys Makoviichuk Viktor Makoviychuk Philip H.\u00a0S. Torr Mingfei Sun and Shimon Whiteson. 2020. Is Independent Learning All You Need in the StarCraft Multi-Agent Challenge?https:\/\/doi.org\/10.48550\/ARXIV.2011.09533 Version Number: 1.","DOI":"10.48550\/ARXIV.2011.09533"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1080\/1350486X.2022.2161588"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1080\/1350486X.2020.1847672"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","unstructured":"Utku Evci Trevor Gale Jacob Menick Pablo\u00a0Samuel Castro and Erich Elsen. 2021. Rigging the Lottery: Making All Tickets Winners. https:\/\/doi.org\/10.48550\/arXiv.1911.11134 arXiv:1911.11134 [cs stat].","DOI":"10.48550\/arXiv.1911.11134"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Jin Fang Jiacheng Weng Yi Xiang and Xinwen Zhang. 2022. Imitate then Transcend: Multi-Agent Optimal Execution with Dual-Window Denoise PPO. https:\/\/doi.org\/10.48550\/arXiv.2206.10736 arXiv:2206.10736 [cs q-fin].","DOI":"10.48550\/arXiv.2206.10736"},{"key":"e_1_3_2_1_18_1","article-title":"Switch transformers: scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. J. Mach. Learn. Res. 23, 1, Article 120 (jan 2022), 39\u00a0pages.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1080\/14697688.2021.1950919"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","unstructured":"Sascha Frey Kang Li Peer Nagy Silvia Sapora Chris Lu Stefan Zohren Jakob Foerster and Anisoara Calinescu. 2023. JAX-LOB: A GPU-Accelerated limit order book simulator to unlock large scale reinforcement learning for trading. https:\/\/doi.org\/10.48550\/arXiv.2308.13289 arXiv:2308.13289 [cs q-fin].","DOI":"10.48550\/arXiv.2308.13289"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626880"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Trevor Gale Erich Elsen and Sara Hooker. 2019. The State of Sparsity in Deep Neural Networks. https:\/\/doi.org\/10.48550\/arXiv.1902.09574 arXiv:1902.09574 [cs stat].","DOI":"10.48550\/arXiv.1902.09574"},{"key":"e_1_3_2_1_23_1","volume-title":"The Financial Mathematics of Market Liquidity: From optimal execution to market making. Vol.\u00a033","author":"Gu\u00e9ant Olivier","unstructured":"Olivier Gu\u00e9ant. 2016. The Financial Mathematics of Market Liquidity: From optimal execution to market making. Vol.\u00a033. CRC Press."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00780-019-00394-3"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.2139\/ssrn.1977207"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626873"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626894"},{"key":"e_1_3_2_1_28_1","volume-title":"GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In 9th International Conference on Learning Representations, ICLR 2021","author":"Lepikhin Dmitry","year":"2021","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2021. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=qrwe7XHTmYb"},{"key":"e_1_3_2_1_29_1","unstructured":"Mike Lewis Shruti Bhosale Tim Dettmers Naman Goyal and Luke Zettlemoyer. 2021. BASE Layers: Simplifying Training of Large Sparse Models. arxiv:2103.16716\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2103.16716"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2017.05.185"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","unstructured":"Chris Lu Jakub\u00a0Grudzien Kuba Alistair Letcher Luke Metz Christian\u00a0Schroeder de Witt and Jakob Foerster. 2022. Discovered Policy Optimisation. https:\/\/doi.org\/10.48550\/arXiv.2210.05639 arXiv:2210.05639 [cs].","DOI":"10.48550\/arXiv.2210.05639"},{"key":"e_1_3_2_1_32_1","unstructured":"Xiao Ma Shen-Yi Zhao and Wu-Jun Li. 2019. Clustered Reinforcement Learning. arxiv:1906.02457\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/1906.02457"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2402.16801"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2023.1151003"},{"key":"e_1_3_2_1_35_1","volume-title":"An offline learning approach to propagator models. arXiv preprint arXiv:2309.02994","author":"Neuman Eyal","year":"2023","unstructured":"Eyal Neuman, Wolfgang Stockinger, and Yufei Zhang. 2023. An offline learning approach to propagator models. arXiv preprint arXiv:2309.02994 (2023)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1137\/20M1375486"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1080\/1350486X.2022.2077783"},{"key":"e_1_3_2_1_38_1","unstructured":"Johan Obando-Ceron Ghada Sokar Timon Willi Clare Lyle Jesse Farebrother Jakob Foerster Gintare\u00a0Karolina Dziugaite Doina Precup and Pablo\u00a0Samuel Castro. 2024. Mixtures of Experts Unlock Parameter Scaling for Deep RL. http:\/\/arxiv.org\/abs\/2402.08609 arXiv:2402.08609 [cs]."},{"key":"e_1_3_2_1_39_1","unstructured":"Johan Obando-Ceron Ghada Sokar Timon Willi Clare Lyle Jesse Farebrother Jakob Foerster Gintare\u00a0Karolina Dziugaite Doina Precup and Pablo\u00a0Samuel Castro. 2024. Mixtures of Experts Unlock Parameter Scaling for Deep RL. http:\/\/arxiv.org\/abs\/2402.08609 arXiv:2402.08609 [cs]."},{"key":"e_1_3_2_1_40_1","unstructured":"Johan Obando-Ceron Ghada Sokar Timon Willi Clare Lyle Jesse Farebrother Jakob Foerster Gintare\u00a0Karolina Dziugaite Doina Precup and Pablo\u00a0Samuel Castro. 2024. Mixtures of Experts Unlock Parameter Scaling for Deep RL. http:\/\/arxiv.org\/abs\/2402.08609 arXiv:2402.08609 [cs]."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11408-015-0248-2"},{"key":"e_1_3_2_1_42_1","unstructured":"Joan Puigcerver Carlos Riquelme Basil Mustafa and Neil Houlsby. 2024. From Sparse to Soft Mixtures of Experts. arxiv:2308.00951\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2308.00951"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","unstructured":"Joan Puigcerver Carlos Riquelme Basil Mustafa and Neil Houlsby. 2024. From Sparse to Soft Mixtures of Experts. https:\/\/doi.org\/10.48550\/arXiv.2308.00951 arXiv:2308.00951 [cs].","DOI":"10.48550\/arXiv.2308.00951"},{"key":"e_1_3_2_1_44_1","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal policy optimization algorithms. (2017). arXiv:1707.06347arXiv preprint available at arXiv:1707.06347."},{"key":"e_1_3_2_1_45_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=B1ckMDqlg","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, *Azalia Mirhoseini, *Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=B1ckMDqlg"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","unstructured":"Tom Silver Kelsey Allen Josh Tenenbaum and Leslie Kaelbling. 2019. Residual Policy Learning. https:\/\/doi.org\/10.48550\/arXiv.1812.06298 arXiv:1812.06298 [cs].","DOI":"10.48550\/arXiv.1812.06298"},{"key":"e_1_3_2_1_47_1","unstructured":"Sainbayar Sukhbaatar Olga Golovneva Vasu Sharma Hu Xu Xi\u00a0Victoria Lin Baptiste Rozi\u00e8re Jacob Kahn Daniel Li Wen-tau Yih Jason Weston and Xian Li. 2024. Branch-Train-MiX: Mixing Expert LLMs into a Mixture-of-Experts LLM. https:\/\/arxiv.org\/abs\/2403.07816v1"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jedc.2007.01.034"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11222-007-9033-z"},{"key":"e_1_3_2_1_50_1","volume-title":"Pax: Scalable Opponent Shaping in JAX. https:\/\/github.com\/ucl-dark\/pax","author":"Willi Timon","year":"2023","unstructured":"Timon Willi, Akbir Khan, Newton Kwan, Mikayel Samvelyan, Chris Lu, and Jakob Foerster. 2023. Pax: Scalable Opponent Shaping in JAX. https:\/\/github.com\/ucl-dark\/pax"},{"key":"e_1_3_2_1_51_1","volume-title":"Mixture of Experts in a Mixture of RL settings. arXiv preprint arXiv:2406.18420","author":"Willi Timon","year":"2024","unstructured":"Timon Willi, Johan Obando-Ceron, Jakob Foerster, Karolina Dziugaite, and Pablo\u00a0Samuel Castro. 2024. Mixture of Experts in a Mixture of RL settings. arXiv preprint arXiv:2406.18420 (2024)."},{"key":"e_1_3_2_1_52_1","first-page":"2640","volume-title":"Proceedings of the 40th International Conference on Machine Learning. PMLR, 38286\u201338300","author":"Xiong Zheng","year":"2023","unstructured":"Zheng Xiong, Jacob Beck, and Shimon Whiteson. 2023. Universal Morphology Control via Contextual Modulation. In Proceedings of the 40th International Conference on Machine Learning. PMLR, 38286\u201338300. https:\/\/proceedings.mlr.press\/v202\/xiong23a.html ISSN: 2640-3498."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","unstructured":"Chuheng Zhang Yitong Duan Xiaoyu Chen Jianyu Chen Jian Li and Li Zhao. 2023. Towards Generalizable Reinforcement Learning for Trade Execution. https:\/\/doi.org\/10.48550\/arXiv.2307.11685 arXiv:2307.11685 [cs q-fin stat].","DOI":"10.48550\/arXiv.2307.11685"},{"key":"e_1_3_2_1_54_1","unstructured":"Barret Zoph Irwan Bello Sameer Kumar Nan Du Yanping Huang Jeff Dean Noam Shazeer and William Fedus. 2022. ST-MoE: Designing Stable and Transferable Sparse Expert Models. arxiv:2202.08906\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2202.08906"}],"event":{"name":"ICAIF '24: 5th ACM International Conference on AI in Finance","location":"Brooklyn NY USA","acronym":"ICAIF '24"},"container-title":["Proceedings of the 5th ACM International Conference on AI in Finance"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698691","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677052.3698691","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:11:52Z","timestamp":1755882712000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698691"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":54,"alternative-id":["10.1145\/3677052.3698691","10.1145\/3677052"],"URL":"https:\/\/doi.org\/10.1145\/3677052.3698691","relation":{},"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"2024-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}