{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T12:31:01Z","timestamp":1762432261333,"version":"3.37.3"},"reference-count":33,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/tpami.2022.3213654","type":"journal-article","created":{"date-parts":[[2022,10,11]],"date-time":"2022-10-11T19:32:44Z","timestamp":1665516764000},"page":"1-13","source":"Crossref","is-referenced-by-count":5,"title":["An Efficient Fisher Matrix Approximation Method for Large-Scale Neural Network Optimization"],"prefix":"10.1109","author":[{"given":"Minghan","family":"Yang","sequence":"first","affiliation":[{"name":"DAMO Academy, Alibaba Group, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong","family":"Xu","sequence":"additional","affiliation":[{"name":"Beijing International Center for Mathematical Research, Peking University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiwen","family":"Cui","sequence":"additional","affiliation":[{"name":"Paul G. Allen School of Computer Science &amp; Engineering, University of Washington, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1762-0671","authenticated-orcid":false,"given":"Zaiwen","family":"Wen","sequence":"additional","affiliation":[{"name":"Beijing International Center for Mathematical Research, College of Engineering and Center for Machine Learning Research, Peking University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengxiang","family":"Xu","sequence":"additional","affiliation":[{"name":"Peng Cheng Laboratory, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref2","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"J. Mach. Learn. Res."},{"article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","key":"ref3"},{"key":"ref4","first-page":"8026","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Paszke"},{"key":"ref5","first-page":"265","article-title":"Tensorflow: A system for large-scale machine learning","volume-title":"Proc. 12th USENIX Symp. Oper. Syst. Des. Implementation","author":"Abadi"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s10107-018-1346-5"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1137\/15M1021106"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1137\/140954362"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01051"},{"article-title":"Practical quasi-Newton methods for training deep neural networks","year":"2020","author":"Goldfarb","key":"ref10"},{"key":"ref11","first-page":"127","article-title":"Neural learning in structured parameter spaces-natural Riemannian gradient","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Amari"},{"key":"ref12","first-page":"1","article-title":"New insights and perspectives on the natural gradient method","volume":"21","author":"Martens","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"ref13","first-page":"849","article-title":"Topmoumoute online natural gradient algorithm","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Roux"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5016-8"},{"key":"ref15","first-page":"2408","article-title":"Optimizing neural networks with kronecker-factored approximate curvature","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Martens"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s10915-022-01911-x"},{"key":"ref17","first-page":"1842","article-title":"Shampoo: Preconditioned stochastic tensor optimization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gupta"},{"article-title":"Scalable second order optimization for deep learning","year":"2020","author":"Anil","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s40305-020-00309-6"},{"key":"ref20","first-page":"4156","article-title":"Limitations of the empirical fisher approximation for natural gradient descent","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kunstner"},{"issue":"18","key":"ref21","first-page":"1","article-title":"Information-geometric optimization algorithms: A unifying picture via invariance principles","volume":"18","author":"Ollivier","year":"2017","journal-title":"J. Mach. Learn. Res."},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s10915-018-0777-8"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1137\/15M1053141"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"article-title":"MLPerf training benchmark","year":"2019","author":"Mattson","key":"ref26"},{"article-title":"Large batch training of convolutional networks","year":"2017","author":"You","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356137"},{"article-title":"Accurate, large minibatch SGD: Training ImageNet in 1 hour","year":"2017","author":"Goyal","key":"ref29"},{"key":"ref30","first-page":"992","article-title":"SchNet: A continuous-filter convolutional neural network for modeling quantum interactions","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sch\u00fctt"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2014.22"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1063\/1.4812323"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2018.10.045"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/4359286\/09916144.pdf?arnumber=9916144","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T23:16:53Z","timestamp":1705965413000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9916144\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3213654","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"type":"print","value":"0162-8828"},{"type":"electronic","value":"2160-9292"},{"type":"electronic","value":"1939-3539"}],"subject":[],"published":{"date-parts":[[2022]]}}}