{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T07:35:07Z","timestamp":1773732907609,"version":"3.50.1"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/bigdata47090.2019.9006104","type":"proceedings-article","created":{"date-parts":[[2020,2,25]],"date-time":"2020-02-25T06:05:34Z","timestamp":1582610734000},"page":"1971-1980","source":"Crossref","is-referenced-by-count":104,"title":["Demystifying Learning Rate Policies for High Accuracy Training of Deep Neural Networks"],"prefix":"10.1109","author":[{"given":"Yanzhao","family":"Wu","sequence":"first","affiliation":[]},{"given":"Ling","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Juhyun","family":"Bae","sequence":"additional","affiliation":[]},{"given":"Ka-Ho","family":"Chow","sequence":"additional","affiliation":[]},{"given":"Arun","family":"Iyengar","sequence":"additional","affiliation":[]},{"given":"Calton","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Wenqi","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Lei","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Qi","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330701"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"507","DOI":"10.1007\/978-3-642-25566-3_40","article-title":"Sequential modelbased optimization for general algorithm configuration","author":"hutter","year":"2011","journal-title":"Learning and Intelligent Optimization"},{"key":"ref10","first-page":"arxiv:1212.5701","article-title":"ADADELTA: An Adaptive Learning Rate Method","author":"zeiler","year":"2012","journal-title":"ArXiv e-prints"},{"key":"ref11","article-title":"Adam: A method for stochastic optimization","volume":"abs 1412 6980","author":"kingma","year":"2014","journal-title":"CoRR"},{"key":"ref12","first-page":"437","author":"bengio","year":"2012","journal-title":"Practical Recommendations for Gradient-based Training of Deep Architectures"},{"key":"ref13","first-page":"arxiv:1412.6599","article-title":"Hot Swapping for Online Adaptation of Optimization Hyperparameters","author":"bache","year":"2014","journal-title":"ArXiv e-prints"},{"key":"ref14","first-page":"arxiv:1508.02788","article-title":"The Effects of Hyperparameters on SGD Training of Neural Networks","author":"breuel","year":"2015","journal-title":"ArXiv e-prints"},{"key":"ref15","article-title":"Dawnbench: An end-to-end deep learning benchmark and competition","author":"coleman","year":"2017","journal-title":"NIPS ML Systems Workshop"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2018.00125"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8621930"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2019.2928551"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref28","article-title":"understanding learning rates and how it improves performance in deep learning","author":"zulkifli","year":"2018"},{"key":"ref4","first-page":"arxiv:1609.04747","article-title":"An overview of gradient descent optimization algorithms","author":"ruder","year":"2016","journal-title":"ArXiv e-prints"},{"key":"ref27","article-title":"Accurate, large minibatch SGD: training imagenet in 1 hour","volume":"abs 1706 2677","author":"goyal","year":"2017","journal-title":"CoRR"},{"key":"ref3","first-page":"iii-343","article-title":"No more pesky learning rates","author":"schaul","year":"2013","journal-title":"Proceedings of the 30th International Conference on International Conference on Machine Learning - Volume 28"},{"key":"ref6","doi-asserted-by":"crossref","first-page":"421","DOI":"10.1007\/978-3-642-35289-8_25","article-title":"Stochastic gradient descent tricks","author":"bottou","year":"2012","journal-title":"Neural Networks Tricks of the Trade"},{"key":"ref29","article-title":"hyperopt - distributed asynchronous hyperparameter optimization in python","author":"developers","year":"2019"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref8","first-page":"1139","article-title":"On the importance of initialization and momentum in deep learning","volume":"28","author":"sutskever","year":"2013","journal-title":"Proceedings of the 30th International Conference on Machine Learning"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/S0893-6080(98)00116-6"},{"key":"ref2","first-page":"1097","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Advances in Neural Information Processing Systems 25"},{"key":"ref9","first-page":"2121","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","volume":"12","author":"duchi","year":"2011","journal-title":"J Mach Learn Res"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"ref22","first-page":"arxiv:1506.01186","article-title":"Cyclical Learning Rates for Training Neural Networks","author":"smith","year":"2015","journal-title":"ArXiv e-prints"},{"key":"ref21","first-page":"arxiv:1708.07120","article-title":"Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates","author":"smith","year":"2017","journal-title":"ArXiv e-prints"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2732482"},{"key":"ref23","first-page":"arxiv:1608.03983","article-title":"SGDR: Stochastic Gradient Descent with Warm Restarts","author":"loshchilov","year":"2016","journal-title":"ArXiv e-prints"},{"key":"ref26","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume":"9","author":"glorot","year":"2010","journal-title":"Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref25","year":"2018","journal-title":"Caffe a fast open framework for deep learning"}],"event":{"name":"2019 IEEE International Conference on Big Data (Big Data)","location":"Los Angeles, CA, USA","start":{"date-parts":[[2019,12,9]]},"end":{"date-parts":[[2019,12,12]]}},"container-title":["2019 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8986695\/9005444\/09006104.pdf?arnumber=9006104","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:47:57Z","timestamp":1658094477000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9006104\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/bigdata47090.2019.9006104","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}