{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:28:38Z","timestamp":1740101318328,"version":"3.37.3"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T00:00:00Z","timestamp":1671235200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,17]],"date-time":"2022-12-17T00:00:00Z","timestamp":1671235200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,12,17]]},"DOI":"10.1109\/bigdata55660.2022.10020262","type":"proceedings-article","created":{"date-parts":[[2023,1,26]],"date-time":"2023-01-26T19:35:23Z","timestamp":1674761723000},"page":"1286-1291","source":"Crossref","is-referenced-by-count":0,"title":["MBAG: A Scalable Mini-Block Adaptive Gradient Method for Deep Neural Networks"],"prefix":"10.1109","author":[{"given":"Jaewoo","family":"Lee","sequence":"first","affiliation":[{"name":"University of Georgia,Department of Computer Science,Athens,USA"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/0041-5553(64)90137-5"},{"key":"ref3","first-page":"543","article-title":"A method for solving the convex programming problem with convergence rate O(1\/k2)","volume":"269","author":"Nesterov","year":"1983","journal-title":"Doklady Akademii Nauk SSSR"},{"issue":"7","key":"ref4","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"Duchi","year":"2011","journal-title":"Journal of machine learning research"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1406.3269"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.4135\/9781412983907.n1717"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.5555\/2999611.2999647"},{"key":"ref8","first-page":"2408","article-title":"Optimizing neural networks with Kronecker-factored approximate curvature","volume-title":"Proceedings of the 32nd International Conference on International Conference on Machine Learning","volume":"37","author":"Martens"},{"key":"ref9","first-page":"1842","article-title":"Shampoo: Preconditioned Stochastic Tensor Optimization","volume-title":"Proceedings of the 35th International Conference on Machine Learning","author":"Gupta"},{"key":"ref10","first-page":"2386","article-title":"Practical quasi-newton methods for training deep neural networks","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Goldfarb"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/15M1053141"},{"key":"ref12","first-page":"557","article-title":"Practical Gauss-Newton Optimi-sation for Deep Learning","volume-title":"Proceedings of the 34th International Conference on Machine Learning","author":"Botev"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729893"},{"key":"ref14","article-title":"Neural learning in structured parameter spaces-natural Riemannian gradient","volume-title":"Advances in neural information processing systems","volume":"9","author":"Amari"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1137\/1.9780898717778"},{"article-title":"Block-diagonal hessian-free optimization for training neural networks","year":"2017","author":"Zhang","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013337"},{"article-title":"A Mini-Block Natural Gradient Method for Deep Neural Networks","year":"2022","author":"Bahamou","key":"ref18"},{"key":"ref19","first-page":"2574","article-title":"AdaBlock: SGD with Practical Block Diagonal Matrix Adaptation for Deep Learning","volume-title":"Proceedings of The 25th International Conference on Artificial Intelligence and Statistics","author":"Yun"},{"key":"ref20","first-page":"799","article-title":"Modular Block-diagonal Curvature Approximations for Feedforward Architectures","volume-title":"Proceedings of the Twenty Third International Conference on Artificial Intelligence and Statistics","author":"Dangel"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/BF01589116"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015420"},{"key":"ref23","first-page":"14873","article-title":"M-fac: Efficient matrix-free approximations of second-order information","volume-title":"Advances in Neural Information Processing Systems","volume":"34","author":"Frantar"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-007-5016-8"},{"key":"ref25","first-page":"18098","article-title":"WoodFisher: Efficient second-order approximation for neural network compression","volume-title":"Advances in Neural Information Processing Systems","author":"Singh"},{"key":"ref26","first-page":"1758","article-title":"Scalable adaptive stochastic optimization using random projections","volume-title":"Proceedings of the 30th International Conference on Neural Information Processing Systems","author":"Krummenacher"},{"key":"ref27","first-page":"102","article-title":"Efficient Full-Matrix Adaptive Regularization","volume-title":"Proceedings of the 36th International Conference on Machine Learning","author":"Agarwal"},{"article-title":"On the Convergence of A Class of Adam-Type Algorithms for Non-Convex Optimization","volume-title":"International Conference on Learning Representations","author":"Chen","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref30","first-page":"735","article-title":"Deep learning via hessian-free optimization","volume-title":"Proceedings of the 27th International Conference on International Conference on Machine Learning","author":"Martens"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"}],"event":{"name":"2022 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2022,12,17]]},"location":"Osaka, Japan","end":{"date-parts":[[2022,12,20]]}},"container-title":["2022 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10020192\/10020156\/10020262.pdf?arnumber=10020262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,13]],"date-time":"2024-02-13T07:40:35Z","timestamp":1707810035000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10020262\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,17]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/bigdata55660.2022.10020262","relation":{},"subject":[],"published":{"date-parts":[[2022,12,17]]}}}