{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:58:21Z","timestamp":1740099501689,"version":"3.37.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030304836"},{"type":"electronic","value":"9783030304843"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-30484-3_18","type":"book-chapter","created":{"date-parts":[[2019,9,8]],"date-time":"2019-09-08T23:02:47Z","timestamp":1567983767000},"page":"208-219","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Sign Based Derivative Filtering for Stochastic Gradient Descent"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4812-3731","authenticated-orcid":false,"given":"Konstantin","family":"Berestizshevsky","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5407-330X","authenticated-orcid":false,"given":"Guy","family":"Even","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2019,9,9]]},"reference":[{"key":"18_CR1","unstructured":"Abadi, M., et al.: TensorFlow: large-scale machine learning on heterogeneous systems (2015). Software http:\/\/tensorflow.org\/"},{"key":"18_CR2","unstructured":"Bernstein, J., Wang, Y.X., Azizzadenesheli, K., Anandkumar, A.: signSGD: compressed optimisation for non-convex problems. In: Proceedings of Machine Learning Research, PMLR, Stockholmsm\u00e4ssan, Stockholm Sweden, 10\u201315 July 2018, vol. 80, pp. 560\u2013569 (2018)"},{"key":"18_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"421","DOI":"10.1007\/978-3-642-35289-8_25","volume-title":"Neural Networks: Tricks of the Trade: Second Edition","author":"L Bottou","year":"2012","unstructured":"Bottou, L.: Stochastic gradient descent tricks. In: Montavon, G., Orr, G.B., M\u00fcller, K.-R. (eds.) Neural Networks: Tricks of the Trade. LNCS, vol. 7700, pp. 421\u2013436. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-35289-8_25"},{"key":"18_CR4","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi, J., Hazan, E., Singer, Y.: Adaptive subgradient methods for online learning and stochastic optimization. J. Mach. Learn. Res. 12, 2121\u20132159 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"18_CR5","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. CoRR abs\/1512.03385 (2015). http:\/\/arxiv.org\/abs\/1512.03385"},{"key":"18_CR6","unstructured":"Hoffer, E., Banner, R., Golan, I., Soudry, D.: Norm matters: efficient and accurate normalization schemes in deep networks, pp. 2164\u20132174 (2018). http:\/\/dl.acm.org\/citation.cfm?id=3326943.3327143"},{"key":"18_CR7","unstructured":"Hoffer, E., Hubara, I., Soudry, D.: Train longer, generalize better: closing the generalization gap in large batch training of neural networks. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, NIPS 2017, pp. 1729\u20131739. Curran Associates Inc., Red Hook (2017). http:\/\/dl.acm.org\/citation.cfm?id=3294771.3294936"},{"key":"18_CR8","unstructured":"Huang, G., Liu, Z., Weinberger, K.Q.: Densely connected convolutional networks. CoRR abs\/1608.06993 (2016). http:\/\/arxiv.org\/abs\/1608.06993"},{"key":"18_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"646","DOI":"10.1007\/978-3-319-46493-0_39","volume-title":"Computer Vision \u2013 ECCV 2016","author":"G Huang","year":"2016","unstructured":"Huang, G., Sun, Y., Liu, Z., Sedra, D., Weinberger, K.Q.: Deep networks with stochastic depth. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 646\u2013661. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_39"},{"key":"18_CR10","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Bach, F., Blei, D. (eds.) Proceedings of the 32nd International Conference on Machine Learning. Proceedings of Machine Learning Research, PMLR, Lille, France, 07\u201309 July 2015, vol. 37, pp. 448\u2013456 (2015). http:\/\/proceedings.mlr.press\/v37\/ioffe15.html"},{"key":"18_CR11","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: 3rd International Conference on Learning Representations, ICLR 2015, Conference Track Proceedings, San Diego, CA, USA, 7\u20139 May 2015. http:\/\/arxiv.org\/abs\/1412.6980"},{"key":"18_CR12","unstructured":"Levy, Y.K., Yurtsever, A., Cevher, V.: Online adaptive methods, universality and acceleration. In: Bengio, S., Wallach, H., Larochelle, H., Grauman, K., Cesa-Bianchi, N., Garnett, R. (eds.) Advances in Neural Information Processing Systems 31, pp. 6500\u20136509. Curran Associates, Inc. (2018). http:\/\/papers.nips.cc\/paper\/7885-online-adaptive-methods-universality-and-acceleration.pdf"},{"key":"18_CR13","unstructured":"Loshchilov, I., Hutter, F.: SGDR: stochastic gradient descent with restarts. CoRR abs\/1608.03983 (2016). http:\/\/arxiv.org\/abs\/1608.03983"},{"key":"18_CR14","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"177","DOI":"10.1007\/978-3-642-17622-7_18","volume-title":"Database Theory and Application, Bio-Science and Bio-Technology","author":"NM Nawi","year":"2010","unstructured":"Nawi, N.M., Ransing, R.S., Salleh, M.N.M., Ghazali, R., Hamid, N.A.: An improved back propagation neural network algorithm on classification problems. In: Zhang, Y., Cuzzocrea, A., Ma, J., Chung, K., Arslan, T., Song, X. (eds.) FGIT 2010. CCIS, vol. 118, pp. 177\u2013188. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-17622-7_18"},{"key":"18_CR15","unstructured":"Neelakantan, A., et al.: Adding gradient noise improves learning for very deep networks. arXiv preprint arXiv:1511.06807 (2015)"},{"issue":"1","key":"18_CR16","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/S0893-6080(98)00116-6","volume":"12","author":"N Qian","year":"1999","unstructured":"Qian, N.: On the momentum term in gradient descent learning algorithms. Neural Netw. 12(1), 145\u2013151 (1999). https:\/\/doi.org\/10.1016\/S0893-6080(98)00116-6","journal-title":"Neural Netw."},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Seide, F., Fu, H., Droppo, J., Li, G., Yu, D.: 1-bit stochastic gradient descent and application to data-parallel distributed training of speech DNNs. In: Interspeech 2014, September 2014","DOI":"10.21437\/Interspeech.2014-274"},{"key":"18_CR18","unstructured":"Smith, S.L., Le, Q.V.: A Bayesian perspective on generalization and stochastic gradient descent. CoRR abs\/1710.06451 (2017). http:\/\/arxiv.org\/abs\/1710.06451"},{"key":"18_CR19","unstructured":"Tieleman, T., Hinton, G.: Lecture 6.5 - RMSProp. Technical report (2012). https:\/\/www.cs.toronto.edu\/~tijmen\/csc321\/slides\/lecture_slides_lec6.pdf"},{"key":"18_CR20","unstructured":"Zeiler, M.D.: ADADELTA: an adaptive learning rate method. CoRR abs\/1212.5701 (2012). http:\/\/arxiv.org\/abs\/1212.5701"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2019: Deep Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-30484-3_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,27]],"date-time":"2022-09-27T21:21:12Z","timestamp":1664313672000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-30484-3_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030304836","9783030304843"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-30484-3_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"9 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}