{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T18:57:22Z","timestamp":1781031442356,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T00:00:00Z","timestamp":1780963200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"NSF","award":["HDR TRIPODS Phase II 2217058"],"award-info":[{"award-number":["HDR TRIPODS Phase II 2217058"]}]},{"name":"NSF","award":["CCF2008733"],"award-info":[{"award-number":["CCF2008733"]}]},{"name":"ONR","award":["N00014-22-1-2713"],"award-info":[{"award-number":["N00014-22-1-2713"]}]},{"name":"NSF","award":["CCF-2238221"],"award-info":[{"award-number":["CCF-2238221"]}]},{"name":"Columbia University","award":["Center of Artificial Intelligence Technology"],"award-info":[{"award-number":["Center of Artificial Intelligence Technology"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,9]]},"DOI":"10.1145\/3798129.3800819","type":"proceedings-article","created":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T17:53:56Z","timestamp":1781027636000},"page":"1073-1084","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On the Computational Hardness of Transformers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6494-3839","authenticated-orcid":false,"given":"Barna","family":"Saha","sequence":"first","affiliation":[{"name":"University of California at San Diego, La Jolla, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6809-2514","authenticated-orcid":false,"given":"Yinzhan","family":"Xu","sequence":"additional","affiliation":[{"name":"University of California at San Diego, La Jolla, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0528-5639","authenticated-orcid":false,"given":"Christopher","family":"Ye","sequence":"additional","affiliation":[{"name":"University of California at San Diego, La Jolla, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8952-3243","authenticated-orcid":false,"given":"Hantao","family":"Yu","sequence":"additional","affiliation":[{"name":"Columbia University, New York, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611975482.3"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS.2017.12"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611978322.63"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems. Curran Associates Inc., Article 2755","author":"Alman Josh","year":"2024","unstructured":"Josh Alman and Zhao Song. 2024. Fast attention requires bounded entries. In Proceedings of the 37th International Conference on Neural Information Processing Systems. Curran Associates Inc., Article 2755, 19 pages."},{"key":"e_1_3_2_1_5_1","first-page":"1868","volume-title":"Proceedings of the 15th Innovations in Theoretical Computer Science Conference (ITCS","author":"Alman Josh","year":"2024","unstructured":"Josh Alman, Ethan Turok, Hantao Yu, and Hengzhi Zhang. 2024. Tensor Ranks and the Fine-Grained Complexity of Dynamic Programming. In Proceedings of the 15th Innovations in Theoretical Computer Science Conference (ITCS 2024). isbn:978-3-95977-309-6 issn:1868-8969"},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 13th International Conference on Learning Representations, ICLR.","author":"Alman Josh","year":"2025","unstructured":"Josh Alman and Hantao Yu. 2025. Fundamental Limitations on Subquadratic Alternatives to Transformers. In Proceedings of the 13th International Conference on Learning Representations, ICLR."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2746539.2746612"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3188745.3188950"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/0304-3975(83)90110-X"},{"key":"e_1_3_2_1_10_1","first-page":"1","article-title":"Optimal Separation and Strong Direct Sum for Randomized Query Complexity. In Proceedings of the 34th Computational Complexity Conference","volume":"29","author":"Blais Eric","year":"2019","unstructured":"Eric Blais and Joshua Brody. 2019. Optimal Separation and Strong Direct Sum for Randomized Query Complexity. In Proceedings of the 34th Computational Complexity Conference, CCC. 29:1\u201329:17.","journal-title":"CCC."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.4086\/TOC.GS.2013.005"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3494540"},{"key":"e_1_3_2_1_13_1","unstructured":"Tom B. Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell Sandhini Agarwal Ariel Herbert-Voss Gretchen Krueger Tom Henighan Rewon Child Aditya Ramesh Daniel M. Ziegler Jeffrey Wu Clemens Winter Christopher Hesse Mark Chen Eric Sigler Mateusz Litwin Scott Gray Benjamin Chess Jack Clark Christopher Berner Sam McCandlish Alec Radford Ilya Sutskever and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Neural Information Processing Systems NeurIPS."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/73007.73024"},{"key":"e_1_3_2_1_15_1","volume-title":"Algebraic complexity theory. 315","author":"B\u00fcrgisser Peter","unstructured":"Peter B\u00fcrgisser, Michael Clausen, and Mohammad A Shokrollahi. 2013. Algebraic complexity theory. 315, Springer Science & Business Media."},{"key":"e_1_3_2_1_16_1","volume-title":"Scatterbrain: Unifying Sparse and Low-rank Attention. In Neural Information Processing Systems, NeurIPS.","author":"Chen Beidi","year":"2021","unstructured":"Beidi Chen, Tri Dao, Eric Winsor, Zhao Song, Atri Rudra, and Christopher R\u00e9. 2021. Scatterbrain: Unifying Sparse and Low-rank Attention. In Neural Information Processing Systems, NeurIPS."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/FOCS63196.2025.00136"},{"key":"e_1_3_2_1_18_1","volume-title":"Rethinking Attention with Performers. In International Conference on Learning Representations, ICLR.","author":"Choromanski Krzysztof Marcin","year":"2021","unstructured":"Krzysztof Marcin Choromanski, Valerii Likhosherstov, David Dohan, Xingyou Song, Andreea Gane, Tam\u00e1s Sarl\u00f3s, Peter Hawkins, Jared Quincy Davis, Afroz Mohiuddin, Lukasz Kaiser, David Benjamin Belanger, Lucy J. Colwell, and Adrian Weller. 2021. Rethinking Attention with Performers. In International Conference on Learning Representations, ICLR."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2736283"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3704631"},{"key":"e_1_3_2_1_21_1","volume-title":"Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in neural information processing systems, 35","author":"Dao Tri","year":"2022","unstructured":"Tri Dao, Dan Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. 2022. Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in neural information processing systems, 35 (2022), 16344\u201316359."},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 9th International Conference on Learning Representations, ICLR. https:\/\/openreview.net\/forum?id=YicbFdNTTy","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In Proceedings of the 9th International Conference on Learning Representations, ICLR. https:\/\/openreview.net\/forum?id=YicbFdNTTy"},{"key":"e_1_3_2_1_23_1","series-title":"SIAM Journal on computing, 24, 4","volume-title":"Amortized communication complexity","author":"Feder Tom\u00e1s","year":"1995","unstructured":"Tom\u00e1s Feder, Eyal Kushilevitz, Moni Naor, and Noam Nisan. 1995. Amortized communication complexity. SIAM Journal on computing, 24, 4 (1995), 736\u2013750."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/800152.804900"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611977912.119"},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the 2023 International Conference on Machine Learning, ICML. 10373\u201310391","author":"Fu Daniel Y.","year":"2023","unstructured":"Daniel Y. Fu, Elliot L. Epstein, Eric Nguyen, Armin W. Thomas, Michael Zhang, Tri Dao, Atri Rudra, and Christopher R\u00e9. 2023. Simple Hardware-Efficient Long Convolutions for Sequence Modeling. In Proceedings of the 2023 International Conference on Machine Learning, ICML. 10373\u201310391. https:\/\/proceedings.mlr.press\/v202\/fu23a.html"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/0304-3975(81)90074-8"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 14th International Conference on Learning Representations, ICLR.","author":"Gupta Shreya","year":"2026","unstructured":"Shreya Gupta, Boyang Huang, Barna Saha, Yinzhan Xu, and Christopher Ye. 2026. Subquadratic Algorithms and Hardness for Attention with Any Temperature. In Proceedings of the 14th International Conference on Learning Representations, ICLR."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00306"},{"key":"e_1_3_2_1_30_1","volume-title":"HyperAttention: Long-context Attention in Near-Linear Time. In The Twelfth International Conference on Learning Representations.","author":"Han Insu","year":"2024","unstructured":"Insu Han, Rajesh Jayaram, Amin Karbasi, Vahab Mirrokni, David Woodruff, and Amir Zandieh. 2024. HyperAttention: Long-context Attention in Near-Linear Time. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2020.EMNLP-MAIN.156"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1006\/jcss.2000.1727"},{"key":"e_1_3_2_1_33_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning, ICML. https:\/\/openreview.net\/forum?id=duRRoGeoQT","author":"Jelassi Samy","year":"2024","unstructured":"Samy Jelassi, David Brandfonbrener, Sham M. Kakade, and Eran Malach. 2024. Repeat After Me: Transformers are Better than State Space Models at Copying. In Proceedings of the 41st International Conference on Machine Learning, ICML. https:\/\/openreview.net\/forum?id=duRRoGeoQT"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2025.ACL-SHORT.76"},{"key":"e_1_3_2_1_35_1","volume-title":"Forty-first International Conference on Machine Learning.","author":"Kacham Praneeth","year":"2024","unstructured":"Praneeth Kacham, Vahab Mirrokni, and Peilin Zhong. 2024. PolySketchFormer: Fast Transformers via Sketching Polynomial Kernels. In Forty-first International Conference on Machine Learning."},{"key":"e_1_3_2_1_36_1","volume-title":"International Conference on Machine Learning, ICML.","author":"Katharopoulos Angelos","year":"2020","unstructured":"Angelos Katharopoulos, Apoorv Vyas, Nikolaos Pappas, and Fran\u00e7ois Fleuret. 2020. Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention. In International Conference on Machine Learning, ICML."},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Algorithmic Learning Theory. 597\u2013619","author":"Keles Feyza Duman","year":"2023","unstructured":"Feyza Duman Keles, Pruthuvi Mahesakya Wijewardena, and Chinmay Hegde. 2023. On the computational complexity of self-attention. In International Conference on Algorithmic Learning Theory. 597\u2013619."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"e_1_3_2_1_39_1","volume-title":"Reformer: The Efficient Transformer. In International Conference on Learning Representations, ICLR.","author":"Kitaev Nikita","year":"2020","unstructured":"Nikita Kitaev, Lukasz Kaiser, and Anselm Levskaya. 2020. Reformer: The Efficient Transformer. In International Conference on Learning Representations, ICLR."},{"key":"e_1_3_2_1_40_1","unstructured":"Alexander Kozachinskiy Felipe Urrutia Hector Jimenez Tomasz Steifer Germ\u00e1n Pizarro Mat\u00edas Fuentes Francisco Meza Cristian B. Calderon and Crist\u00f3bal Rojas. 2025. Strassen Attention Split VC Dimension and Compositionality in Transformers. arxiv:2501.19215. arxiv:2501.19215"},{"key":"e_1_3_2_1_41_1","volume-title":"International Conference on Machine Learning. 19274\u201319286","author":"Leviathan Yaniv","year":"2023","unstructured":"Yaniv Leviathan, Matan Kalman, and Yossi Matias. 2023. Fast inference from transformers via speculative decoding. In International Conference on Machine Learning. 19274\u201319286."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406325.3451045"},{"key":"e_1_3_2_1_43_1","volume-title":"The 39th Annual Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=o9iReV4FGm","author":"Liu Jingwen","year":"2025","unstructured":"Jingwen Liu, Hantao Yu, Clayton Sanford, Alexandr Andoni, and Daniel Hsu. 2025. Fast attention mechanisms: a tale of parallelism. In The 39th Annual Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=o9iReV4FGm"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3717823.3718148"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00562"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00493"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611973105.125"},{"key":"e_1_3_2_1_48_1","volume-title":"Proceedings of the 1st Conference on Language Modeling.","author":"Peng Binghui","year":"2024","unstructured":"Binghui Peng, Srini Narayanan, and Christos Papadimitriou. 2024. On Limitations of the Transformer Architecture. In Proceedings of the 1st Conference on Language Modeling."},{"key":"e_1_3_2_1_49_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog 1 8 (2019) 9."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/2488608.2488673"},{"key":"e_1_3_2_1_51_1","volume-title":"Williams","author":"Rumelhart David E.","year":"1986","unstructured":"David E. Rumelhart, Geoffrey E. Hinton, and Ronald J. Williams. 1986. Learning representations by back-propagating errors. nature, 323, 6088 (1986), 533\u2013536."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3693823"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2408.14332"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3693833"},{"key":"e_1_3_2_1_55_1","unstructured":"Clayton Sanford Daniel J. Hsu and Matus Telgarsky. 2023. Representational Strengths and Limitations of Transformers. In Advances in Neural Information Processing Systems 36 NeurIPS."},{"key":"e_1_3_2_1_56_1","first-page":"184","article-title":"Vermeidung von divisionen","volume":"264","author":"Strassen Volker","year":"1973","unstructured":"Volker Strassen. 1973. Vermeidung von divisionen.. Journal f\u00fcr die reine und angewandte Mathematik, 264 (1973), 184\u2013202.","journal-title":"Journal f\u00fcr die reine und angewandte Mathematik"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3186893"},{"key":"e_1_3_2_1_58_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In Neural Information Processing Systems NeurIPS."},{"key":"e_1_3_2_1_59_1","volume-title":"Proceedings of the 13th International Conference on Learning Representations, ICLR. https:\/\/openreview.net\/forum?id=h3wbI8Uk1Z","author":"Wen Kaiyue","year":"2025","unstructured":"Kaiyue Wen, Xingyu Dang, and Kaifeng Lyu. 2025. RNNs are not Transformers (Yet): The Key Bottleneck on In-Context Retrieval. In Proceedings of the 13th International Conference on Learning Representations, ICLR. https:\/\/openreview.net\/forum?id=h3wbI8Uk1Z"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-27836-8_101"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/2021.ACL-LONG.292"},{"key":"e_1_3_2_1_62_1","volume-title":"Depth-Width Tradeoffs for Transformers on Graph Tasks. In The 39th Annual Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=A2pmNL7L1E","author":"Yehudai Gilad","year":"2025","unstructured":"Gilad Yehudai, Clayton Sanford, Maya Bechler-Speicher, Orr Fischer, Ran Gilad-Bachrach, and Amir Globerson. 2025. Depth-Width Tradeoffs for Transformers on Graph Tasks. In The 39th Annual Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=A2pmNL7L1E"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2506.12220"},{"key":"e_1_3_2_1_64_1","first-page":"9781713829546","volume-title":"Proceedings of the 34th International Conference on Neural Information Processing Systems. Article 1450","author":"Zaheer Manzil","year":"2020","unstructured":"Manzil Zaheer, Guru Guruganesh, Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Ontanon, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, and Amr Ahmed. 2020. Big bird: transformers for longer sequences. In Proceedings of the 34th International Conference on Neural Information Processing Systems. Article 1450, 15 pages. isbn:9781713829546"},{"key":"e_1_3_2_1_65_1","volume-title":"Joshua Ainslie, Chris Alberti, Santiago Onta\u00f1\u00f3n","author":"Zaheer Manzil","year":"2020","unstructured":"Manzil Zaheer, Guru Guruganesh, Kumar Avinava Dubey, Joshua Ainslie, Chris Alberti, Santiago Onta\u00f1\u00f3n, Philip Pham, Anirudh Ravula, Qifan Wang, Li Yang, and Amr Ahmed. 2020. Big Bird: Transformers for Longer Sequences. In Neural Information Processing Systems, NeurIPS."},{"key":"e_1_3_2_1_66_1","volume-title":"International Conference on Machine Learning. 40605\u201340623","author":"Zandieh Amir","year":"2023","unstructured":"Amir Zandieh, Insu Han, Majid Daliri, and Amin Karbasi. 2023. Kdeformer: Accelerating transformers via kernel density estimation. In International Conference on Machine Learning. 40605\u201340623."}],"event":{"name":"STOC '26: 58th Annual ACM Symposium on Theory of Computing","location":"Salt Lake City UT USA","acronym":"STOC '26","sponsor":["SIGACT ACM Special Interest Group on Algorithms and Computation Theory"]},"container-title":["Proceedings of the 58th Annual ACM Symposium on Theory of Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3798129.3800819","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3798129.3800819","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T18:02:40Z","timestamp":1781028160000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3798129.3800819"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,9]]},"references-count":66,"alternative-id":["10.1145\/3798129.3800819","10.1145\/3798129"],"URL":"https:\/\/doi.org\/10.1145\/3798129.3800819","relation":{},"subject":[],"published":{"date-parts":[[2026,6,9]]},"assertion":[{"value":"2026-06-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}