{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T03:41:26Z","timestamp":1752982886362},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9534014","type":"proceedings-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T21:27:41Z","timestamp":1632173261000},"page":"1-8","source":"Crossref","is-referenced-by-count":5,"title":["ASLR: An Adaptive Scheduler for Learning Rate"],"prefix":"10.1109","author":[{"given":"Alireza","family":"Khodamoradi","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kristof","family":"Denolf","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kees","family":"Vissers","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ryan C.","family":"Kastner","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref38","article-title":"Adaptive gradient methods with dynamic bound of learning rate","author":"luo","year":"0","journal-title":"International Conference on Learning Representations (ICLR)"},{"journal-title":"Minimizing finite sums with the stochastic average gradient","year":"2016","author":"schmidt","key":"ref33"},{"journal-title":"ADADELTA An Adaptive Learning Rate Method","year":"2012","author":"zeiler","key":"ref32"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.2140\/pjm.1966.16.1"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1137\/1013035"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1137\/1011036"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586"},{"key":"ref35","article-title":"Saga: A fast incremental gradient method with support for non-strongly convex composite objectives","author":"defazio","year":"0","journal-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems"},{"key":"ref34","first-page":"315","article-title":"Accelerating stochastic gradient descent using predictive variance reduction","author":"johnson","year":"0","journal-title":"Proceedings of the 26th International Conference on Neural Information Processing Systems"},{"key":"ref10","article-title":"Very deep convlutional networks for large-scale image classification","author":"simonyan","year":"0","journal-title":"Proceedings of the 3rd International Conference on Learning Representations (ICLR 2015)"},{"journal-title":"Learning multiple layers of features from tiny images","year":"2009","author":"krizhevsky","key":"ref40"},{"journal-title":"Wide residual networks","year":"2017","author":"zagoruyko","key":"ref11"},{"journal-title":"Densely Connected Convolutional Networks","year":"2018","author":"huang","key":"ref12"},{"journal-title":"Numerical Optimization","year":"2000","author":"nocedal","key":"ref13"},{"key":"ref14","article-title":"A simple weight decay can improve generalization","author":"krogh","year":"0","journal-title":"Proceedings of the 4th International Conference on Neural Information Processing Systems"},{"journal-title":"Cyclical learning rates for training neural networks","year":"2017","author":"smith","key":"ref15"},{"key":"ref16","article-title":"Don't decay the learning rate, increase the batch size","author":"smith","year":"0","journal-title":"Proceedings of the 6th International Conference on Learning Representations (ICLR 2018)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/72.471360"},{"journal-title":"Large batch size training of neural networks with adversarial training and second-order information","year":"2020","author":"yao","key":"ref18"},{"journal-title":"Sharp minima can generalize for deep nets","year":"2017","author":"dinh","key":"ref19"},{"key":"ref28","article-title":"Painless stochastic gradient: Interpolation, line-search, and convergence rates","author":"vaswani","year":"0","journal-title":"Proceedings of the 32Nd International Conference on Neural Information Processing Systems"},{"key":"ref4","article-title":"Adam: a method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proceedings of the 3rd International Conference on Learning Representations (ICLR 2015)"},{"key":"ref27","article-title":"L4: practical loss-based stepsize adaption for deep learning","author":"rolinek","year":"0","journal-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s13218-015-0381-0"},{"journal-title":"Optimizing neural networks with kronecker-factored approximation curvature","year":"2016","author":"martens","key":"ref6"},{"key":"ref29","first-page":"181","article-title":"Probabilistic line searches for stochastic optimization","author":"mahsereci","year":"2015","journal-title":"Proceedings of the 28th International Conference on Neural Information Processing Systems"},{"key":"ref5","first-page":"735","article-title":"Deep learning via hessian-free optimization","author":"martens","year":"0","journal-title":"Proceedings of the 27th International Conference on Machine Learning (ICML 2010)"},{"journal-title":"Neural Smithing Supervised Learning in Feedforward Artificial Neural Networks","year":"1998","author":"reed","key":"ref8"},{"journal-title":"Deep Learning","year":"2016","author":"goodfellow","key":"ref7"},{"journal-title":"Bag of tricks for image classification with convolutional neural networks","year":"2018","author":"he","key":"ref2"},{"journal-title":"Deep residual learning for image recognition","year":"2015","author":"he","key":"ref9"},{"key":"ref1","first-page":"5813","article-title":"Training quantized nets: a deeper understanding","author":"li","year":"0","journal-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems"},{"journal-title":"On large-batch training for deep learning Generalization gap and sharp minima","year":"2017","author":"keskar","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.1.1"},{"journal-title":"Understanding deep learning requires rethinking generalization","year":"2016","author":"zhang","key":"ref21"},{"journal-title":"FINN A framework for fast scalable binarized neural network inference","year":"2016","author":"umuroglu","key":"ref42"},{"key":"ref24","article-title":"Adaptive subgradient methods for online learning and stochastic optimization","author":"duchi","year":"2011","journal-title":"Journal of Machine Learning Research (JMLR)"},{"journal-title":"Knowledge distillation for optimization of quantized deep neural networks","year":"2019","author":"shin","key":"ref41"},{"key":"ref23","article-title":"Adaptive gradient methods with dynamic bound of learning rate","author":"luo","year":"0","journal-title":"Proceedings of the 7th International Conference on Learning Representations (ICLR 2019)"},{"journal-title":"Revisiting small batch training for deep neural networks","year":"2018","author":"masters","key":"ref26"},{"key":"ref25","article-title":"On the convergence of adam and beyond","author":"reddi","year":"0","journal-title":"Proceedings of the 6th International Conference on Learning Representations (ICLR 2018)"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","start":{"date-parts":[[2021,7,18]]},"location":"Shenzhen, China","end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09534014.pdf?arnumber=9534014","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T15:45:52Z","timestamp":1652197552000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9534014\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9534014","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}