{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:00:19Z","timestamp":1750309219168,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R&D Program of China","award":["2022YFB2901103"],"award-info":[{"award-number":["2022YFB2901103"]}]},{"name":"Research fund of CIE-SmartChip","award":["20220370-0060"],"award-info":[{"award-number":["20220370-0060"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3676536.3676770","type":"proceedings-article","created":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T13:26:26Z","timestamp":1744205186000},"page":"1-9","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["OFT: An accelerator with eager gradient prediction for attention training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3277-7204","authenticated-orcid":false,"given":"Miao","family":"Wang","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2854-729X","authenticated-orcid":false,"given":"Shengbing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-1645-1662","authenticated-orcid":false,"given":"Sijia","family":"Wang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9525-2096","authenticated-orcid":false,"given":"Zhao","family":"Yang","sequence":"additional","affiliation":[{"name":"Chang'an University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0637-249X","authenticated-orcid":false,"given":"Meng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University, Xi'an, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00061"},{"key":"e_1_3_2_1_2_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_3_1","volume-title":"Semeval-2017 task 1: Semantic textual similarity-multilingual and cross-lingual focused evaluation. arXiv preprint arXiv:1708.00055","author":"Cer Daniel","year":"2017","unstructured":"Daniel Cer, Mona Diab, Eneko Agirre, Inigo Lopez-Gazpio, and Lucia Specia. 2017. Semeval-2017 task 1: Semantic textual similarity-multilingual and cross-lingual focused evaluation. arXiv preprint arXiv:1708.00055 (2017)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD51958.2021.9643522"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2020.3005786"},{"volume-title":"Machine learning challenges workshop","author":"Dagan Ido","key":"e_1_3_2_1_6_1","unstructured":"Ido Dagan, Oren Glickman, and Bernardo Magnini. 2005. The pascal recognising textual entailment challenge. In Machine learning challenges workshop. Springer, 177--190."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2020.2976475"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/V1\/N19-1423"},{"key":"e_1_3_2_1_9_1","volume-title":"Third international workshop on paraphrasing (IWP2005)","author":"Dolan Bill","year":"2005","unstructured":"Bill Dolan and Chris Brockett. 2005. Automatically constructing a corpus of sentential paraphrases. In Third international workshop on paraphrasing (IWP2005)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589040"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247716"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613424.3623779"},{"key":"e_1_3_2_1_15_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527404"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00066"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2022.3175582"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480125"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00069"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589057"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507738"},{"key":"e_1_3_2_1_23_1","volume-title":"100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250","author":"Rajpurkar Pranav","year":"2016","unstructured":"Pranav Rajpurkar, Jian Zhang, Konstantin Lopyrev, and Percy Liang. 2016. Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250 (2016)."},{"key":"e_1_3_2_1_24_1","volume-title":"Natural language understanding with the quora question pairs dataset. arXiv preprint arXiv:1907.01041","author":"Sharma Lakshay","year":"2019","unstructured":"Lakshay Sharma, Laura Graesser, Nikita Nangia, and Utku Evci. 2019. Natural language understanding with the quora question pairs dataset. arXiv preprint arXiv:1907.01041 (2019)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18072.2020.9218650"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3370748.3406554"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D13-1170"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480095"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2023.3273992"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2303.13745"},{"key":"e_1_3_2_1_31_1","volume-title":"TransCODE: Co-design of transformers and accelerators for efficient training and inference","author":"Tuli Shikhar","year":"2023","unstructured":"Shikhar Tuli and Niraj K Jha. 2023. TransCODE: Co-design of transformers and accelerators for efficient training and inference. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS48785.2022.9937567"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","unstructured":"Alex Wang Amanpreet Singh Julian Michael Felix Hill Omer Levy and Samuel Bowman. 2018. GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding. In Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP Tal Linzen Grzegorz Chrupa\u0142a and Afra Alishahi (Eds.). Association for Computational Linguistics Brussels Belgium 353--355. 10.18653\/v1\/W18-5446","DOI":"10.18653\/v1\/W18-5446"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"e_1_3_2_1_36_1","volume-title":"SPACE: Sparsity Propagation Based DCNN Training Accelerator on Edge. In International Conference on Algorithms and Architectures for Parallel Processing. Springer, 391--405","author":"Wang Miao","year":"2021","unstructured":"Miao Wang, Zhen Chen, Chuxi Li, Zhao Yang, Lei Li, Meng Zhang, and Shengbing Zhang. 2021. SPACE: Sparsity Propagation Based DCNN Training Accelerator on Edge. In International Conference on Algorithms and Architectures for Parallel Processing. Springer, 391--405."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3213521"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00290"},{"key":"e_1_3_2_1_39_1","volume-title":"A broad-coverage challenge corpus for sentence understanding through inference. arXiv preprint arXiv:1704.05426","author":"Williams Adina","year":"2017","unstructured":"Adina Williams, Nikita Nangia, and Samuel R Bowman. 2017. A broad-coverage challenge corpus for sentence understanding through inference. arXiv preprint arXiv:1704.05426 (2017)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00064"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378514"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00082"}],"event":{"name":"ICCAD '24: 43rd IEEE\/ACM International Conference on Computer-Aided Design","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE CAS","IEEE CEDA","IEEE EDS"],"location":"Newark Liberty International Airport Marriott New York NY USA","acronym":"ICCAD '24"},"container-title":["Proceedings of the 43rd IEEE\/ACM International Conference on Computer-Aided Design"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676770","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3676536.3676770","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:43:58Z","timestamp":1750290238000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3676536.3676770"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":42,"alternative-id":["10.1145\/3676536.3676770","10.1145\/3676536"],"URL":"https:\/\/doi.org\/10.1145\/3676536.3676770","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2025-04-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}