{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T03:48:35Z","timestamp":1769658515442,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,1,28]]},"DOI":"10.1145\/3774934.3786418","type":"proceedings-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T15:25:57Z","timestamp":1769613957000},"page":"452-465","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ParDiff: Efficiently Parallelizing Reverse-Mode Automatic Differentiation with Direct Indexing"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-9442-7375","authenticated-orcid":false,"given":"Shuhong","family":"Huang","sequence":"first","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6543-0859","authenticated-orcid":false,"given":"Shizhi","family":"Tang","sequence":"additional","affiliation":[{"name":"Qingcheng.AI, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6747-947X","authenticated-orcid":false,"given":"Yuan","family":"Wen","sequence":"additional","affiliation":[{"name":"University of Aberdeen, Aberdeen, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3092-1578","authenticated-orcid":false,"given":"Huanqi","family":"Cao","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5935-4217","authenticated-orcid":false,"given":"Ruibai","family":"Tang","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6207-5049","authenticated-orcid":false,"given":"Yidong","family":"Chen","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6643-4405","authenticated-orcid":false,"given":"Jiping","family":"Yu","sequence":"additional","affiliation":[{"name":"Tsinghua University, Beijing, China"}]},{"given":"Yang","family":"Li","sequence":"additional","affiliation":[{"name":"Lenovo Research, China"}]},{"given":"Chao","family":"Jiang","sequence":"additional","affiliation":[{"name":"Lenovo Research, China"}]},{"given":"Limin","family":"Xiao","sequence":"additional","affiliation":[{"name":"Lenovo Research, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7656-6428","authenticated-orcid":false,"given":"Jidong","family":"Zhai","sequence":"additional","affiliation":[{"name":"Tsinghua University, China"}]}],"member":"320","published-online":{"date-parts":[[2026,1,28]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Mart\u00edn Abadi Ashish Agarwal Paul Barham Eugene Brevdo Zhifeng Chen Craig Citro Greg S. Corrado Andy Davis Jeffrey Dean Matthieu Devin Sanjay Ghemawat Ian Goodfellow Andrew Harp Geoffrey Irving Michael Isard Yangqing Jia Rafal Jozefowicz Lukasz Kaiser Manjunath Kudlur Josh Levenberg Dandelion Man\u00e9 Rajat Monga Sherry Moore Derek Murray Chris Olah Mike Schuster Jonathon Shlens Benoit Steiner Ilya Sutskever Kunal Talwar Paul Tucker Vincent Vanhoucke Vijay Vasudevan Fernanda Vi\u00e9gas Oriol Vinyals Pete Warden Martin Wattenberg Martin Wicke Yuan Yu and Xiaoqiang Zheng. 2015. TensorFlow: Large-Scale Machine Learning on Heterogeneous Systems. https:\/\/www.tensorflow.org\/ Software available from tensorflow.org"},{"key":"e_1_3_2_1_2_1","unstructured":"Randy Allen and Ken Kennedy. 2001. Optimizing Compilers for Modern Architectures: A Dependence-based Approach. Morgan Kaufmann. isbn:1-55860-286-0"},{"key":"e_1_3_2_1_3_1","article-title":"Automatic Differentiation in Machine Learning: a Survey","volume":"18","author":"Baydin Atilim Gunes","year":"2017","unstructured":"Atilim Gunes Baydin, Barak A. Pearlmutter, Alexey Andreyevich Radul, and Jeffrey Mark Siskind. 2017. Automatic Differentiation in Machine Learning: a Survey. J. Mach. Learn. Res., 18 (2017), 153:1\u2013153:43. http:\/\/jmlr.org\/papers\/v18\/17-468.html","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3078597.3078616"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2896389"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCSA.2002.1019144"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jocs.2020.101155"},{"key":"e_1_3_2_1_8_1","volume-title":"Training Deep Nets with Sublinear Memory Cost. CoRR, abs\/1604.06174","author":"Chen Tianqi","year":"2016","unstructured":"Tianqi Chen, Bing Xu, Chiyuan Zhang, and Carlos Guestrin. 2016. Training Deep Nets with Sublinear Memory Cost. CoRR, abs\/1604.06174 (2016), arXiv:1604.06174. arxiv:1604.06174"},{"key":"e_1_3_2_1_9_1","volume-title":"Matthew James Johnson, and Chris Leary","author":"Frostig Roy","year":"2018","unstructured":"Roy Frostig, Matthew James Johnson, and Chris Leary. 2018. Compiling machine learning programs via high-level tracing. Systems for Machine Learning."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Markus Grabner Thomas Pock Tobias Gross and Bernhard Kainz. 2008. Automatic differentiation for GPU-accelerated 2D\/3D registration. In Advances in automatic differentiation. 259\u2013269.","DOI":"10.1007\/978-3-540-68942-3_23"},{"key":"e_1_3_2_1_11_1","volume-title":"Achieving logarithmic growth of temporal and spatial complexity in reverse automatic differentiation. Optimization Methods and software, 1, 1","author":"Griewank Andreas","year":"1992","unstructured":"Andreas Griewank. 1992. Achieving logarithmic growth of temporal and spatial complexity in reverse automatic differentiation. Optimization Methods and software, 1, 1 (1992), 35\u201354."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/347837.347846"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2450153.2450158"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/2560359"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3472796"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1177\/1094342017712060"},{"key":"e_1_3_2_1_17_1","volume-title":"Towards Polyhedral Automatic Differentiation. In Program Transformations for ML Workshop at NeurIPS","author":"H\u00fcckelheim Jan","year":"2019","unstructured":"Jan H\u00fcckelheim and Navjot Kukreja. 2019. Towards Polyhedral Automatic Differentiation. In Program Transformations for ML Workshop at NeurIPS 2019."},{"key":"e_1_3_2_1_18_1","volume-title":"Don\u2019t Unroll Adjoint: Differentiating SSA-Form Programs. CoRR, abs\/1810.07951","author":"Innes Michael","year":"2018","unstructured":"Michael Innes. 2018. Don\u2019t Unroll Adjoint: Differentiating SSA-Form Programs. CoRR, abs\/1810.07951 (2018), arXiv:1810.07951. arxiv:1810.07951"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530099"},{"key":"e_1_3_2_1_20_1","volume-title":"Dynamic Tensor Rematerialization. In 9th International Conference on Learning Representations, ICLR 2021","author":"Kirisame Marisa","year":"2021","unstructured":"Marisa Kirisame, Steven Lyubomirsky, Altan Haan, Jennifer Brennan, Mike He, Jared Roesch, Tianqi Chen, and Zachary Tatlock. 2021. Dynamic Tensor Rematerialization. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=Vfs_2RnOD0H"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201383"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00780"},{"key":"e_1_3_2_1_23_1","volume-title":"ICML 2015 AutoML workshop. 238","author":"Maclaurin Dougal","year":"2015","unstructured":"Dougal Maclaurin, David Duvenaud, and Ryan P Adams. 2015. Autograd: Effortless gradients in numpy. In ICML 2015 AutoML workshop. 238."},{"key":"e_1_3_2_1_24_1","volume-title":"Automatically Synthesize Fast Gradients. In Advances in Neural Information Processing Systems","author":"Moses William","year":"2020","unstructured":"William Moses and Valentin Churavy. 2020. Instead of Rewriting Foreign Code for Machine Learning, Automatically Synthesize Fast Gradients. In Advances in Neural Information Processing Systems, H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, and H. Lin (Eds.). 33, Curran Associates, Inc., 12472\u201312485. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/9332c513ef44b682e9347822c2e457ac-Paper.pdf"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476165"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.5555\/3571885.3571964"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Emre \u00d6zkaya Anil Nemili and Nicolas R Gauger. 2012. Application of automatic differentiation to an incompressible URANS solver. In Recent Advances in Algorithmic Differentiation. 35\u201345.","DOI":"10.1007\/978-3-642-30023-3_4"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/7902.7904"},{"key":"e_1_3_2_1_29_1","unstructured":"Adam Paszke Sam Gross Soumith Chintala Gregory Chanan Edward Yang Zachary DeVito Zeming Lin Alban Desmaison Luca Antiga and Adam Lerer. 2017. Automatic differentiation in PyTorch."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3473593"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1330017.1330018"},{"key":"e_1_3_2_1_32_1","unstructured":"Nikhila Ravi Jeremy Reizenstein David Novotny Taylor Gordon Wan-Yen Lo Justin Johnson and Georgia Gkioxari. 2020. Accelerating 3D Deep Learning with PyTorch3D. arXiv:2007.08501."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41404.2022.00063"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1080\/10556788.2018.1435651"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3519939.3523448"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1111\/cgf.14507"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1080\/10556788.2018.1435653"},{"key":"e_1_3_2_1_38_1","volume-title":"Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018","author":"van Merrienboer Bart","year":"2018","unstructured":"Bart van Merrienboer, Olivier Breuleux, Arnaud Bergeron, and Pascal Lamblin. 2018. Automatic differentiation in ML: Where we are and where we should be going. In Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems 2018, NeurIPS 2018, December 3-8, 2018, Montr\u00e9al, Canada, Samy Bengio, Hanna M. Wallach, Hugo Larochelle, Kristen Grauman, Nicol\u00f2 Cesa-Bianchi, and Roman Garnett (Eds.). 8771\u20138781. https:\/\/proceedings.neurips.cc\/paper\/2018\/hash\/770f8e448d07586afbf77bb59f698587-Abstract.html"},{"key":"e_1_3_2_1_39_1","volume-title":"Tangent: Automatic Differentiation Using Source Code Transformation in Python. CoRR, abs\/1711.02712","author":"van Merri\u00ebnboer Bart","year":"2017","unstructured":"Bart van Merri\u00ebnboer, Alexander B. Wiltschko, and Dan Moldovan. 2017. Tangent: Automatic Differentiation Using Source Code Transformation in Python. CoRR, abs\/1711.02712 (2017), arXiv:1711.02712. arxiv:1711.02712"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/608\/1\/012055"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","unstructured":"Sven Verdoolaege. 2016. Presburger formulas and polyhedral compilation. https:\/\/doi.org\/10.13140\/RG.2.1.1174.6323 10.13140\/RG.2.1.1174.6323","DOI":"10.13140\/RG.2.1.1174.6323"},{"key":"e_1_3_2_1_42_1","volume-title":"Automatic differentiation for solid mechanics. CoRR, abs\/2001.07366","author":"Vigliotti Andrea","year":"2020","unstructured":"Andrea Vigliotti and Ferdinando Auricchio. 2020. Automatic differentiation for solid mechanics. CoRR, abs\/2001.07366 (2020), arXiv:2001.07366. arxiv:2001.07366"},{"key":"e_1_3_2_1_43_1","volume-title":"6th International Conference on Learning Representations, ICLR","author":"Wang Fei","year":"2018","unstructured":"Fei Wang and Tiark Rompf. 2018. A Language and Compiler View on Differentiable Programming. In 6th International Conference on Learning Representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Workshop Track Proceedings. OpenReview.net. https:\/\/openreview.net\/forum?id=SJxJtYkPG"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2212.00964"}],"event":{"name":"PPoPP '26: 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","location":"Sydney NSW Australia","acronym":"PPoPP '26","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing","SIGPLAN ACM Special Interest Group on Programming Languages"]},"container-title":["Proceedings of the 31st ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3774934.3786418","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T15:29:34Z","timestamp":1769614174000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3774934.3786418"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,28]]},"references-count":44,"alternative-id":["10.1145\/3774934.3786418","10.1145\/3774934"],"URL":"https:\/\/doi.org\/10.1145\/3774934.3786418","relation":{},"subject":[],"published":{"date-parts":[[2026,1,28]]},"assertion":[{"value":"2026-01-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}