{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,2]],"date-time":"2025-06-02T17:33:05Z","timestamp":1748885585395,"version":"3.40.5"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030461461"},{"type":"electronic","value":"9783030461478"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-46147-8_27","type":"book-chapter","created":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T02:03:39Z","timestamp":1588298619000},"page":"449-464","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Learning with Random Learning Rates"],"prefix":"10.1007","author":[{"given":"L\u00e9onard","family":"Blier","sequence":"first","affiliation":[]},{"given":"Pierre","family":"Wolinski","sequence":"additional","affiliation":[]},{"given":"Yann","family":"Ollivier","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,4,30]]},"reference":[{"key":"27_CR1","unstructured":"Zoph, B., Le, Q.V.: Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578 (2016)"},{"key":"27_CR2","unstructured":"Guyon, I., et al.: A brief review of the ChaLearn AutoML challenge: any-time any-dataset learning without human intervention. In: Workshop on Automatic Machine Learning, pp. 21\u201330 (2016)"},{"key":"27_CR3","volume-title":"Machine Learning: A Bayesian and Optimization Perspective","author":"S Theodoridis","year":"2015","unstructured":"Theodoridis, S.: Machine Learning: A Bayesian and Optimization Perspective. Academic Press, Cambridge (2015)"},{"key":"27_CR4","unstructured":"Jastrzebski, S., et al.: Three factors influencing minima in SGD. arXiv preprint arXiv:1711.04623 (2017)"},{"key":"27_CR5","unstructured":"Kurita, K.: Learning Rate Tuning in Deep Learning: A Practical Guide\u2014Machine Learning Explained (2018)"},{"key":"27_CR6","unstructured":"Mack, D.: How to pick the best learning rate for your machine learning project (2016)"},{"key":"27_CR7","unstructured":"Surmenok, P.: Estimating an optimal learning rate for a deep neural network (2017)"},{"issue":"2","key":"27_CR8","first-page":"26","volume":"4","author":"T Tieleman","year":"2012","unstructured":"Tieleman, T., Hinton, G.: Lecture 6.5-rmsprop: divide the gradient by a running average of its recent magnitude. COURSERA: Neural Netw. Mach. Learn. 4(2), 26\u201331 (2012)","journal-title":"COURSERA: Neural Netw. Mach. Learn."},{"key":"27_CR9","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. JMLR 12, 2121\u20132159 (2011)","journal-title":"JMLR"},{"key":"27_CR10","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: International Conference on Learning Representations (2015)"},{"key":"27_CR11","unstructured":"Schaul, T., Zhang, S., LeCun, Y.: No more pesky learning rates. In: International Conference on Machine Learning, pp. 343\u2013351 (2013)"},{"key":"27_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/978-3-642-35289-8_3","volume-title":"Neural Networks: Tricks of the Trade","author":"YA LeCun","year":"2012","unstructured":"LeCun, Y.A., Bottou, L., Orr, G.B., M\u00fcller, K.-R.: Efficient BackProp. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 9\u201348. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-35289-8_3"},{"key":"27_CR13","doi-asserted-by":"crossref","unstructured":"Denkowski, M., Neubig, G.: Stronger baselines for trustable results in neural machine translation. arXiv preprint arXiv:1706.09733 (2017)","DOI":"10.18653\/v1\/W17-3203"},{"key":"27_CR14","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., Salakhutdinov, R.: Dropout: a simple way to prevent neural networks from overfitting. J. Mach. Learn. Res. 15, 1929\u20131958 (2014)","journal-title":"J. Mach. Learn. Res."},{"key":"27_CR15","unstructured":"LeCun, Y., Denker, J.S., Solla, S.A.: Optimal brain damage. In: Touretzky, D.S. (ed.) NIPS, vol. 2, pp. 598\u2013605. Morgan-Kaufmann (1990)"},{"key":"27_CR16","unstructured":"Han, S., Mao, H., Dally, W.J.: Deep Compression: Compressing Deep Neural Networks with Pruning, Trained Quantization and Huffman Coding. arXiv preprint arXiv:1510.00149 (2015)"},{"key":"27_CR17","unstructured":"Han, S., Pool, J., Tran, J., Dally, W.J.: Learning both weights and connections for efficient neural networks. In: NIPS (2015)"},{"key":"27_CR18","doi-asserted-by":"crossref","unstructured":"See, A., Luong, M.T., Manning, C.D.: Compression of neural machine translation models via pruning. In: CoNLL 2016, p. 291 (2016)","DOI":"10.18653\/v1\/K16-1029"},{"key":"27_CR19","unstructured":"Bengio, Y., Roux, N.L., Vincent, P., Delalleau, O., Marcotte, P.: Convex neural networks. In: Weiss, Y., Sch\u00f6lkopf, B., Platt, J.C. (eds.) Advances in Neural Information Processing Systems, vol. 18, pp. 123\u2013130. MIT Press (2006)"},{"key":"27_CR20","unstructured":"Hinton, G., Vinyals, O., Dean, J.: Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)"},{"key":"27_CR21","unstructured":"Zhang, C., Bengio, S., Hardt, M., Recht, B., Vinyals, O.: Understanding deep learning requires rethinking generalization (2017)"},{"key":"27_CR22","unstructured":"Frankle, J., Carbin, M.: The Lottery Ticket Hypothesis: Finding Small, Trainable Neural Networks. arXiv preprint arXiv:1704.04861, March 2018"},{"key":"27_CR23","unstructured":"Frankle, J., Dziugaite, G.K., Roy, D.M., Carbin, M.: The lottery ticket hypothesis at scale (2019)"},{"key":"27_CR24","unstructured":"Wilson, A.C., Roelofs, R., Stern, M., Srebro, N., Recht, B.: The marginal value of adaptive gradient methods in machine learning. In: NIPS, pp. 4148\u20134158 (2017)"},{"key":"27_CR25","doi-asserted-by":"crossref","unstructured":"Liu, C., et al.: Progressive neural architecture search. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 19\u201334 (2018)","DOI":"10.1007\/978-3-030-01246-5_2"},{"key":"27_CR26","doi-asserted-by":"publisher","first-page":"251","DOI":"10.1162\/089976698300017746","volume":"10","author":"SI Amari","year":"1998","unstructured":"Amari, S.I.: Natural gradient works efficiently in learning. Neural Comput. 10, 251\u2013276 (1998)","journal-title":"Neural Comput."},{"issue":"4","key":"27_CR27","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1016\/0893-6080(88)90003-2","volume":"1","author":"RA Jacobs","year":"1988","unstructured":"Jacobs, R.A.: Increased rates of convergence through learning rate adaptation. Neural Netw. 1(4), 295\u2013307 (1988)","journal-title":"Neural Netw."},{"key":"27_CR28","doi-asserted-by":"crossref","unstructured":"Schraudolph, N.N.: Local gain adaptation in stochastic gradient descent (1999)","DOI":"10.1049\/cp:19991170"},{"key":"27_CR29","doi-asserted-by":"crossref","unstructured":"Mahmood, A.R., Sutton, R.S., Degris, T., Pilarski, P.M.: Tuning-free step-size adaptation. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2121\u20132124, IEEE (2012)","DOI":"10.1109\/ICASSP.2012.6288330"},{"key":"27_CR30","unstructured":"Maclaurin, D., Duvenaud, D., Adams, R.: Gradient-based hyperparameter optimization through reversible learning. In: International Conference on Machine Learning, pp. 2113\u20132122 (2015)"},{"key":"27_CR31","unstructured":"Mass\u00e9, P.Y., Ollivier, Y.: Speed learning on the fly. arXiv preprint arXiv:1511.02540 (2015)"},{"key":"27_CR32","unstructured":"Baydin, A.G., Cornish, R., Rubio, D.M., Schmidt, M., Wood, F.: Online learning rate adaptation with hypergradient descent. In: International Conference on Learning Representations (2018)"},{"key":"27_CR33","unstructured":"Erraqabi, A., Le Roux, N.: Combining adaptive algorithms and hypergradient method: a performance and robustness study (2018)"},{"key":"27_CR34","unstructured":"Baker, B., Gupta, O., Naik, N., Raskar, R.: Designing neural network architectures using reinforcement learning. arXiv preprint arXiv:1611.02167 (2016)"},{"issue":"1","key":"27_CR35","first-page":"6765","volume":"18","author":"L Li","year":"2017","unstructured":"Li, L., Jamieson, K., DeSalvo, G., Rostamizadeh, A., Talwalkar, A.: Hyperband: a novel bandit-based approach to hyperparameter optimization. JMLR 18(1), 6765\u20136816 (2017)","journal-title":"JMLR"},{"issue":"2","key":"27_CR36","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1162\/106365602320169811","volume":"10","author":"KO Stanley","year":"2002","unstructured":"Stanley, K.O., Miikkulainen, R.: Evolving neural networks through augmenting topologies. Evol. Comput. 10(2), 99\u2013127 (2002)","journal-title":"Evol. Comput."},{"key":"27_CR37","unstructured":"Jozefowicz, R., Zaremba, W., Sutskever, I.: An empirical exploration of recurrent network architectures. In: International Conference on Machine Learning, pp. 2342\u20132350 (2015)"},{"key":"27_CR38","unstructured":"Real, E., et al.: Large-scale evolution of image classifiers. In: Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 2902\u20132911, JMLR. org (2017)"},{"key":"27_CR39","unstructured":"Bergstra, J., Yamins, D., Cox, D.D.: Making a science of model search: hyperparameter optimization in hundreds of dimensions for vision architectures (2013)"},{"key":"27_CR40","unstructured":"Liu, H., Simonyan, K., Yang, Y.: DARTS: differentiable architecture search. arXiv preprint arXiv:1806.09055 (2018)"},{"key":"27_CR41","unstructured":"Paszke, A., et al.: Automatic differentiation in PyTorch. In: NIPS-W (2017)"},{"key":"27_CR42","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1006\/jmps.1999.1278","volume":"44","author":"L Wasserman","year":"2000","unstructured":"Wasserman, L.: Bayesian model selection and model averaging. J. Math. Psychol. 44, 92\u2013107 (2000)","journal-title":"J. Math. Psychol."},{"issue":"3","key":"27_CR43","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1111\/j.1467-9868.2011.01025.x","volume":"74","author":"T Van Erven","year":"2012","unstructured":"Van Erven, T., Gr\u00fcnwald, P., De Rooij, S.: Catching up faster by switching sooner: a predictive approach to adaptive estimation with an application to the AIC-BIC dilemma. J. R. Stat. Soc.: Ser. B (Stat. Methodol.) 74(3), 361\u2013417 (2012)","journal-title":"J. R. Stat. Soc.: Ser. B (Stat. Methodol.)"},{"key":"27_CR44","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR 2009 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"27_CR45","unstructured":"Krizhevsky, A.: Learning multiple layers of features from tiny images (2009)"},{"key":"27_CR46","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: ICCV, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"27_CR47","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Van Der Maaten, L., Weinberger, K.Q.: Densely connected convolutional networks. In: CVPR, vol. 1, p. 3 (2017)","DOI":"10.1109\/CVPR.2017.243"},{"key":"27_CR48","unstructured":"Krizhevsky, A.: One weird trick for parallelizing convolutional neural networks. arXiv preprint arXiv:1404.5997 (2014)"},{"key":"27_CR49","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: ICCV, pp. 1\u20139 (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"27_CR50","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. CoRR abs\/1409.1556 (2014)"},{"key":"27_CR51","unstructured":"Howard, A.G., et al.: MobileNets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"key":"27_CR52","unstructured":"Kianglu: Pytorch-cifar (2018)"},{"issue":"2","key":"27_CR53","first-page":"313","volume":"19","author":"MP Marcus","year":"1993","unstructured":"Marcus, M.P., Marcinkiewicz, M.A., Santorini, B.: Building a large annotated corpus of English: the penn treebank. Comput. Linguist. 19(2), 313\u2013330 (1993)","journal-title":"Comput. Linguist."},{"issue":"8","key":"27_CR54","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"issue":"10","key":"27_CR55","doi-asserted-by":"publisher","first-page":"1550","DOI":"10.1109\/5.58337","volume":"78","author":"PJ Werbos","year":"1990","unstructured":"Werbos, P.J.: Backpropagation through time: what it does and how to do it. Proc. IEEE 78(10), 1550\u20131560 (1990)","journal-title":"Proc. IEEE"},{"key":"27_CR56","unstructured":"Brockman, G., et al.: OpenAI Gym (2016)"},{"issue":"7540","key":"27_CR57","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"27_CR58","unstructured":"Keskar, N.S., Socher, R.: Improving generalization performance by switching from Adam to SGD. arXiv preprint arXiv:1712.07628 (2017)"},{"key":"27_CR59","unstructured":"Jozefowicz, R., Vinyals, O., Schuster, M., Shazeer, N., Wu, Y.: Exploring the limits of language modeling. arXiv preprint arXiv:1602.02410 (2016)"},{"key":"27_CR60","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"437","DOI":"10.1007\/978-3-642-35289-8_26","volume-title":"Neural Networks: Tricks of the Trade","author":"Y Bengio","year":"2012","unstructured":"Bengio, Y.: Practical recommendations for gradient-based training of deep architectures. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 437\u2013478. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-35289-8_26"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-46147-8_27","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,6]],"date-time":"2025-05-06T09:28:46Z","timestamp":1746523726000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-46147-8_27"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030461461","9783030461478"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-46147-8_27","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"30 April 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"W\u00fcrzburg","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ecmlpkdd2019.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Microsoft CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"733","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"130","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"18% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.04","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5.3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"ECML PKDD Workshops Information: single-blind review, submissions: 200, full papers accepted: 70, short papers accepted: 46","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}