{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T07:43:19Z","timestamp":1775547799304,"version":"3.50.1"},"reference-count":35,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2020,10,20]],"date-time":"2020-10-20T00:00:00Z","timestamp":1603152000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,10,20]],"date-time":"2020-10-20T00:00:00Z","timestamp":1603152000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100009483","name":"Universidad Tecnol\u00f3gica Nacional","doi-asserted-by":"publisher","award":["PID UTN 4103"],"award-info":[{"award-number":["PID UTN 4103"]}],"id":[{"id":"10.13039\/100009483","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Prog Artif Intell"],"published-print":{"date-parts":[[2020,12]]},"DOI":"10.1007\/s13748-020-00218-y","type":"journal-article","created":{"date-parts":[[2020,10,20]],"date-time":"2020-10-20T15:02:36Z","timestamp":1603206156000},"page":"351-360","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":109,"title":["A new approach for the vanishing gradient problem on sigmoid activation"],"prefix":"10.1007","volume":"9","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8344-1383","authenticated-orcid":false,"given":"Mat\u00edas","family":"Roodschild","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6995-9584","authenticated-orcid":false,"given":"Jorge","family":"Gotay\u00a0Sardi\u00f1as","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4935-8842","authenticated-orcid":false,"given":"Adri\u00e1n","family":"Will","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,10,20]]},"reference":[{"key":"218_CR1","first-page":"1991","volume":"1","author":"S Hochreiter","year":"1991","unstructured":"Hochreiter, S.: Untersuchungen zu dynamischen neuronalen Netzen. Diploma, Tech. Univ. M\u00fcnchen 1, 1991 (1991)","journal-title":"Diploma, Tech. Univ. M\u00fcnchen"},{"key":"218_CR2","volume-title":"Gradient Flow in Recurrent Nets: The Difficulty of Learning Long-term Dependencies, A Field Guide to Dynamical Recurrent Neural Networks","author":"S Hochreiter","year":"2001","unstructured":"Hochreiter, S., Bengio, Y., Frasconi, P., Schmidhuber, J.: Gradient Flow in Recurrent Nets: The Difficulty of Learning Long-term Dependencies, A Field Guide to Dynamical Recurrent Neural Networks. IEEE Press, New Jersey (2001)"},{"key":"218_CR3","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. JMLR: W & CP, In: Proceedings of the 30th International Conference on Machine Learning, 28, Atlanta, Georgia, USA (2013)"},{"key":"218_CR4","doi-asserted-by":"publisher","first-page":"691","DOI":"10.1016\/S0893-6080(98)00012-4","volume":"11","author":"RLT Hahnloser","year":"1998","unstructured":"Hahnloser, R.L.T.: On the piecewise analysis of networks of linear threshold neurons. Neural Netw. 11, 691\u2013697 (1998). https:\/\/doi.org\/10.1016\/S0893-6080(98)00012-4","journal-title":"Neural Netw."},{"issue":"5","key":"218_CR5","doi-asserted-by":"publisher","first-page":"3814","DOI":"10.1109\/TIE.2018.2856205","volume":"66","author":"Y Qin","year":"2018","unstructured":"Qin, Y., Wang, X., Zou, J.: The optimized deep belief networks with improved logistic Sigmoid units and their application in fault diagnosis for planetary gearboxes of wind turbines. IEEE Trans. Ind. Electron. 66(5), 3814\u20133824 (2018). https:\/\/doi.org\/10.1109\/TIE.2018.2856205","journal-title":"IEEE Trans. Ind. Electron."},{"issue":"8","key":"218_CR6","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"218_CR7","unstructured":"Chung, J., Gulcehre, C., Cho, K., Bengio, Y.: Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv:1412.3555 (2014)"},{"key":"218_CR8","unstructured":"Ng, A. et al.: Sparse autoencoder. CS294A Lecture notes (2011)"},{"key":"218_CR9","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton, G.E., Osindero, S., Teh, Y.: A fast learning algorithm for deep belief nets. Neural Comput. 18, 527\u20131554 (2006). https:\/\/doi.org\/10.1162\/neco.2006.18.7.1527","journal-title":"Neural Comput."},{"key":"218_CR10","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: Accelerating deep network training by reducing internal covariate shift. arXiv:1502.03167 (2015)"},{"key":"218_CR11","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P.: Gradient-based learning applied to document recognition. Proc. IEEE 86, 2278\u20132324 (1998). https:\/\/doi.org\/10.1109\/5.726791","journal-title":"Proc. IEEE"},{"key":"218_CR12","unstructured":"Xiao, H., Rasul, K., Vollgraf, R.: Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms. arXiv:abs\/1708.07747 (2017)"},{"key":"218_CR13","doi-asserted-by":"crossref","unstructured":"Cohen, G., Afshar, S., Tapson, J., van Schaik, A.: EMNIST: an extension of MNIST to handwritten letters. arXiv:abs\/1702.05373 (2017)","DOI":"10.1109\/IJCNN.2017.7966217"},{"key":"218_CR14","doi-asserted-by":"publisher","unstructured":"Freund, Y., Haussler, D.: Unsupervised learning of distributions on binary vectors using two layer networks. University of California, Santa Cruz (1994). https:\/\/doi.org\/10.5555\/2986916.2987028","DOI":"10.5555\/2986916.2987028"},{"key":"218_CR15","first-page":"153","volume-title":"Advances in Neural Information Processing Systems","author":"Y Bengio","year":"2007","unstructured":"Bengio, Y., Pascal, L., Popovici, D., Larochelle, H.: Greedy layer-wise training of deep networks. In: Sch\u00f6 lkopf, B., Platt, J.C., Hoffman, T. (eds.) Advances in Neural Information Processing Systems, vol. 19, pp. 153\u2013160. MIT Press, Cambridge (2007)"},{"key":"218_CR16","first-page":"1137","volume-title":"Advances in Neural Information Processing Systems","author":"MA Ranzato","year":"2007","unstructured":"Ranzato, M.A., Poultney, C., Chopra, S., Cun, Y.L.: Efficient learning of sparse representations with an energy-based model. In: Sch\u00f6lkopf, B., Platt, J.C., Hoffman, T. (eds.) Advances in Neural Information Processing Systems, vol. 19, pp. 1137\u20131144. MIT Press, Cambridge (2007)"},{"key":"218_CR17","first-page":"3371","volume":"11","author":"P Vincent","year":"2010","unstructured":"Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y., Manzagol, P.A.: Stacked denoising autoencoders: learning useful representations in a deep network with a local denoising criterion. Mach. Learn. Res. 11, 3371\u20133408 (2010)","journal-title":"Mach. Learn. Res."},{"key":"218_CR18","doi-asserted-by":"publisher","unstructured":"Jarrett, K., Kavukcuoglu, K., Ranzato, Marc\u2019 Aurelio, LeCun, Y.: What is the best multi-stage architecture for object recognition? In: 2009 IEEE 12th International Conference on Computer Vision, pp. 2146\u20132153, IEEE (2009). https:\/\/doi.org\/10.1109\/ICCV.2009.5459469","DOI":"10.1109\/ICCV.2009.5459469"},{"key":"218_CR19","doi-asserted-by":"publisher","unstructured":"Nair, V., Hinton, G.E.: Rectified linear units improve restricted boltzmann machines. In: Proceedings of the 27th International Conference on Machine Learning (ICML-10), pp. 807\u2013814 (2010). https:\/\/doi.org\/10.5555\/3104322.3104425","DOI":"10.5555\/3104322.3104425"},{"key":"218_CR20","unstructured":"Glorot, X., Bengio, Y.: Understanding the difficulty of training deep feedforward neural networks. In: Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS) 2010, pp. 249-256, Chia Laguna Resort, Sardinia, Italy (2010)"},{"key":"218_CR21","unstructured":"Glorot, X., Bordes, A., Bengio, Y.: Deep sparse rectifier neural networks. In: Proceedings of the 14th International Conference on Artificial Intelligence and Statistics (AISTATS) 2011, pp. 315-323, Fort Lauderdale, FL, USA (2011). http:\/\/proceedings.mlr.press\/v15\/glorot11a.html"},{"key":"218_CR22","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: Understanding the exploding gradient problem. arXiv:abs\/1211.5063 (2012)"},{"key":"218_CR23","unstructured":"Gulcehre, C., Moczulski, M., Denil, M., Bengio, Y.: Noisy activation functions. In: International Conference on Machine Learning, pp. 3059\u20133068 (2016)"},{"key":"218_CR24","doi-asserted-by":"publisher","unstructured":"Kong, S., Takatsuka, M.: Hexpo: A vanishing-proof activation function. In: 2017 International Joint Conference on Neural Networks (IJCNN), pp. 2562\u20132567 (2017). https:\/\/doi.org\/10.1109\/IJCNN.2017.7966168","DOI":"10.1109\/IJCNN.2017.7966168"},{"key":"218_CR25","unstructured":"MacDonald, G., Godbout, A., Gillcash, B., Cairns, S.: Volume-preserving Neural Networks: A Solution to the Vanishing Gradient Problem (2019). arXiv:1911.09576"},{"key":"218_CR26","doi-asserted-by":"publisher","first-page":"38287","DOI":"10.1109\/ACCESS.2019.2907000","volume":"7","author":"S Dai","year":"2019","unstructured":"Dai, S., Li, L., Li, Z.: Modeling vehicle interactions via modified LSTM models for trajectory prediction. IEEE Access 7, 38287\u201338296 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2907000","journal-title":"IEEE Access"},{"key":"218_CR27","unstructured":"Hu, Y., Huber, A., Anumula, J., Liu, S.: Overcoming the vanishing gradient problem in plain recurrent networks (2018). arXiv:1801.06105"},{"key":"218_CR28","unstructured":"Kerg, G., Goyette, K., Touzel, M.P., Gidel, G., Vorontsov, E., Bengio, Y., Lajoie, G.: Non-normal Recurrent Neural Network (nnRNN): learning long time dependencies while improving expressivity with transient dynamics. In: Advances in Neural Information Processing Systems, pp. 13613\u201313623, Curran Associates, Inc. (2019)"},{"key":"218_CR29","unstructured":"Neelakantan, A., Vilnis, L., Le, Q.V., Sutskever, I., Kaiser, L., Kurach, K., Martens, J.: Adding gradient noise improves learning for very deep networks (2015). arXiv:1511.06807"},{"key":"218_CR30","doi-asserted-by":"publisher","unstructured":"Larochelle, H., Bengio, Y.: Classification using discriminative restricted Boltzmann machines. In: Proceedings of the 25th International Conference on Machine Learning, pp. 536\u2013543, ACM (2008). https:\/\/doi.org\/10.1145\/1390156.1390224","DOI":"10.1145\/1390156.1390224"},{"key":"218_CR31","unstructured":"Frosst, N., Papernot, N., Hinton, G.: Analyzing and improving representations with the soft nearest neighbor loss (2019). arXiv:1902.01889"},{"key":"218_CR32","unstructured":"Qin, Y., Frosst, N., Sabour, S., Raffel, C., Cottrell, G., Hinton, G.: Detecting and diagnosing adversarial images with class-conditional capsule reconstructions (2019). arXiv:1907.02957"},{"key":"218_CR33","doi-asserted-by":"publisher","unstructured":"Garg, A., Gupta, D., Saxena, S., Sahadev, P.P.: Validation of random dataset using an efficient CNN model trained on MNIST handwritten dataset. In: 2019 6th International Conference on Signal Processing and Integrated Networks (SPIN), pp. 602\u2013606, IEEE (2019). https:\/\/doi.org\/10.1109\/SPIN.2019.8711703","DOI":"10.1109\/SPIN.2019.8711703"},{"key":"218_CR34","unstructured":"Cutkosky, A., Orabona, F.: Momentum-based variance reduction in non-convex SGD (2019). arXiv:1905.10018"},{"key":"218_CR35","unstructured":"Tran-Dinh, Q., Pham, N.H., Phan, D.T., Nguyen, L.M.: A hybrid stochastic optimization framework for stochastic composite nonconvex optimization (2019). arXiv:1907.03793"}],"container-title":["Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-020-00218-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13748-020-00218-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13748-020-00218-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T13:00:38Z","timestamp":1634734838000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13748-020-00218-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,10,20]]},"references-count":35,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2020,12]]}},"alternative-id":["218"],"URL":"https:\/\/doi.org\/10.1007\/s13748-020-00218-y","relation":{},"ISSN":["2192-6352","2192-6360"],"issn-type":[{"value":"2192-6352","type":"print"},{"value":"2192-6360","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,10,20]]},"assertion":[{"value":"6 March 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 September 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 October 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}