{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:16:00Z","timestamp":1776111360428,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"19-20","license":[{"start":{"date-parts":[[2020,1,22]],"date-time":"2020-01-22T00:00:00Z","timestamp":1579651200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,22]],"date-time":"2020-01-22T00:00:00Z","timestamp":1579651200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"National Science Foundation","award":["IIS-1763452"],"award-info":[{"award-number":["IIS-1763452"]}]},{"name":"National Science Foundation","award":["CNS-1828181"],"award-info":[{"award-number":["CNS-1828181"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1007\/s11042-019-08453-9","type":"journal-article","created":{"date-parts":[[2020,1,22]],"date-time":"2020-01-22T03:02:30Z","timestamp":1579662150000},"page":"12777-12815","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":458,"title":["Dropout vs. batch normalization: an empirical study of their impact to deep learning"],"prefix":"10.1007","volume":"79","author":[{"given":"Christian","family":"Garbin","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4129-9611","authenticated-orcid":false,"given":"Xingquan","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Oge","family":"Marques","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,1,22]]},"reference":[{"key":"8453_CR1","first-page":"437","volume-title":"Lecture Notes in Computer Science","author":"Yoshua Bengio","year":"2012","unstructured":"Bengio Y (2012) Practical recommendations for gradient-based training of deep architectures"},{"key":"8453_CR2","unstructured":"Brock A, Lim T, Ritchie JM, Weston N (2017) Freezeout: accelerate training by progressively freezing layers. arXiv:1706.04983"},{"key":"8453_CR3","doi-asserted-by":"crossref","unstructured":"Cortes C, Vapnik V (1995) Mach Learning, 273\u2013297","DOI":"10.1007\/BF00994018"},{"key":"8453_CR4","doi-asserted-by":"publisher","unstructured":"Deng L, Hinton G, Kingsbury B (2013) New types of deep neural network learning for speech recognition and related applications: an overview. https:\/\/doi.org\/10.1109\/icassp.2013.6639344","DOI":"10.1109\/icassp.2013.6639344"},{"key":"8453_CR5","volume-title":"Deep learning. Adaptive Computation and Machine Learning","author":"IJ Goodfellow","year":"2016","unstructured":"Goodfellow IJ, Bengio Y, Courville AC (2016) Deep learning. Adaptive Computation and Machine Learning. MIT Press, Cambridge. http:\/\/www.deeplearningbook.org\/"},{"issue":"2","key":"8453_CR6","doi-asserted-by":"publisher","first-page":"1850008","DOI":"10.1142\/S1469026818500086","volume":"17","author":"T Hinz","year":"2018","unstructured":"Hinz T, Navarro-Guerrero N, Magg S, Wermter S (2018) Speeding up the Hyperparameter Optimization of Deep Convolutional Neural Networks. International Journal of Computation Intelligence and Applications 17(2):1850008. https:\/\/doi.org\/10.1142\/s1469026818500086","journal-title":"International Journal of Computation Intelligence and Applications"},{"key":"8453_CR7","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift"},{"key":"8453_CR8","unstructured":"KerasTeam (2016) Using test data as validation data during training. https:\/\/github.com\/keras-team\/keras\/issues\/1753"},{"key":"8453_CR9","unstructured":"Kohler JM, Daneshmand H, Lucchi A, Zhou M, Neymeyr K, Hofmann T (2018) Towards a theoretical understanding of batch normalization, arXiv:1805.10694"},{"key":"8453_CR10","unstructured":"Krizhevsky A (2009) Learning multiple layers of features from tiny images. https:\/\/www.cs.toronto.edu\/kriz\/learning-features-2009-TR.pdf"},{"key":"8453_CR11","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) . In: Bartlett PL, Pereira FCN, Burges CJC, Bottou L, Weinberger KQ (eds) Advances in neural information processing systems 25: 26th annual conference on neural information processing systems 2012. Proceedings of a meeting held December 3-6, 2012, Lake Tahoe, Nevada, USA, pp 1106\u20131114. http:\/\/papers.nips.cc\/paper\/4824-imagenet-classification-with-deep-convolutional-neural-networks"},{"key":"8453_CR12","unstructured":"Krizhevsky A, Nair V, Hinton G (2019) The cifar-10 dataset. https:\/\/www.cs.toronto.edu\/kriz\/cifar.html"},{"issue":"6","key":"8453_CR13","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"Alex Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks 60: 84. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Communications of the ACM"},{"key":"8453_CR14","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.patrec.2014.01.008","volume":"42","author":"Martin L\u00e4ngkvist","year":"2014","unstructured":"L\u00e4ngkvist M, Karlsson L, Loutfi A (2014) A review of unsupervised feature learning and deep learning for time-series modeling 42: 11. https:\/\/doi.org\/10.1016\/j.patrec.2014.01.008","journal-title":"Pattern Recognition Letters"},{"key":"8453_CR15","unstructured":"LeCun Y (1999) The mnist database of handwritten digits. http:\/\/yann.lecun.com\/exdb\/mnist\/"},{"key":"8453_CR16","doi-asserted-by":"publisher","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) . Deep Learning 521:436. https:\/\/doi.org\/10.1038\/nature14539","journal-title":"Deep Learning"},{"key":"8453_CR17","doi-asserted-by":"crossref","unstructured":"Li X, Chen S, Hu X, Yang J (2018) Understanding the disharmony between dropout and batch normalization by variance shift","DOI":"10.1109\/CVPR.2019.00279"},{"key":"8453_CR18","unstructured":"Lipton ZC, Berkowitz J, Elkan C (2015) A critical review of recurrent neural networks for sequence learning"},{"issue":"3","key":"8453_CR19","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1111\/insr.12016","volume":"82","author":"Wei-Yin Loh","year":"2014","unstructured":"Loh WY (2014) Fifty years of classification and regression trees 82: 329. https:\/\/doi.org\/10.1111\/insr.12016","journal-title":"International Statistical Review"},{"key":"8453_CR20","unstructured":"Luo P, Wang X, Shao W, Peng Z (2018) Towards understanding regularization in batch normalization"},{"key":"8453_CR21","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1016\/j.cviu.2017.05.007","volume":"161","author":"Dmytro Mishkin","year":"2017","unstructured":"Mishkin D, Sergievskiy N, Matas J (2017) Systematic evaluation of convolution neural network advances on the imagenet. Comput. Vision Image Understanding. https:\/\/doi.org\/10.1016\/j.cviu.2017.05.007. http:\/\/www.sciencedirect.com\/science\/article\/pii\/S1077314217300814","journal-title":"Computer Vision and Image Understanding"},{"key":"#cr-split#-8453_CR22.1","unstructured":"Morgan N, Bourlard H (1989) . In: Touretzky DS"},{"key":"#cr-split#-8453_CR22.2","unstructured":"(ed) Advances in neural information processing systems 2, [NIPS Conference, Denver, Colorado, USA, November 27-30, 1989]. http:\/\/papers.nips.cc\/paper\/275-generalization-and-parameter-estimation-in-feedforward-nets-some-experiments. Morgan Kaufmann, pp 630-637"},{"key":"8453_CR23","unstructured":"Nair V, Hinton GE (2010) . In: F\u00fcrnkranz J, Joachims T (eds) Proceedings of the 27th international conference on machine learning (ICML-10), June 21-24, 2010, Haifa, Israel. Omnipress, pp 807\u2013814. http:\/\/www.icml2010.org\/papers\/432.pdf"},{"key":"8453_CR24","unstructured":"Perez L, Wang J (2017) The effectiveness of data augmentation in image classification using deep learning"},{"key":"8453_CR25","unstructured":"Ruder S (2016) An overview of gradient descent optimization algorithms"},{"key":"8453_CR26","doi-asserted-by":"publisher","unstructured":"Rumelhart DE, Hinton G, Williams RJ (1985) Learning internal representations by error propagation. https:\/\/doi.org\/10.21236\/ada164453","DOI":"10.21236\/ada164453"},{"key":"8453_CR27","unstructured":"Smith LN, A disciplined approach to neural network hyper-parameters: part 1 \u2013 learning rate batch size momentum and weight decay (2018)"},{"issue":"1","key":"8453_CR28","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929. http:\/\/dl.acm.org\/citation.cfm?id=?2627435.2670313","journal-title":"J Mach Learn Res"},{"key":"8453_CR29","unstructured":"Team K (2019) https:\/\/github.com\/keras-team\/keras\/blob\/master\/examples\/mnist_cnn.py"},{"key":"8453_CR30","unstructured":"Tieleman T, Hinton G (2012) Lecture 6.5\u2014Rmsprop: divide the gradient by a running average of its recent magnitude. COURSERA: Neural Networks for Machine Learning"},{"key":"8453_CR31","unstructured":"University S (2018) Stanford university cs231n: convolutional neural networks for visual recognition. http:\/\/cs231n.github.io\/classification\/#nn"},{"issue":"4","key":"8453_CR32","doi-asserted-by":"publisher","first-page":"510","DOI":"10.1109\/LSP.2016.2611485","volume":"24","author":"X Wang","year":"2017","unstructured":"Wang X, Gao L, Song J, Shen H, Beyond frame-level CNN (2017) Saliency-aware 3-D CNN with lstm for video action recognition. IEEE Signal Process Lett 24(4):510. https:\/\/doi.org\/10.1109\/LSP.2016.2611485","journal-title":"IEEE Signal Process Lett"},{"issue":"3","key":"8453_CR33","doi-asserted-by":"publisher","first-page":"634","DOI":"10.1109\/TMM.2017.2749159","volume":"20","author":"X Wang","year":"2018","unstructured":"Wang X, Gao L, Wang P, Sun X, Liu X (2018) Two-stream 3-D convnet fusion for action recognition in videos with arbitrary size and length. IEEE Trans Multimed 20(3):634. https:\/\/doi.org\/10.1109\/TMM.2017.2749159","journal-title":"IEEE Trans Multimed"},{"key":"8453_CR34","unstructured":"Zhu X (2005) Semi-supervised learning literature survey. Tech. Rep. 1530, Computer Sciences, University of Wisconsin-Madison"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-019-08453-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11042-019-08453-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-019-08453-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,1,21]],"date-time":"2021-01-21T00:11:39Z","timestamp":1611187899000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11042-019-08453-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,1,22]]},"references-count":35,"journal-issue":{"issue":"19-20","published-print":{"date-parts":[[2020,5]]}},"alternative-id":["8453"],"URL":"https:\/\/doi.org\/10.1007\/s11042-019-08453-9","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,1,22]]},"assertion":[{"value":"26 April 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 September 2019","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 November 2019","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 January 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}