{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:24:19Z","timestamp":1772119459284,"version":"3.50.1"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,2,27]],"date-time":"2024-02-27T00:00:00Z","timestamp":1708992000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,2,27]],"date-time":"2024-02-27T00:00:00Z","timestamp":1708992000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s10115-023-02052-9","type":"journal-article","created":{"date-parts":[[2024,2,27]],"date-time":"2024-02-27T02:02:41Z","timestamp":1708999361000},"page":"3427-3458","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["AMAdam: adaptive modifier of Adam method"],"prefix":"10.1007","volume":"66","author":[{"given":"Hichame","family":"Kabiri","sequence":"first","affiliation":[]},{"given":"Youssef","family":"Ghanou","sequence":"additional","affiliation":[]},{"given":"Hamid","family":"Khalifi","sequence":"additional","affiliation":[]},{"given":"Gabriella","family":"Casalino","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,2,27]]},"reference":[{"key":"2052_CR1","doi-asserted-by":"publisher","unstructured":"Miikkulainen R, Liang J, Meyerson E, Rawal A, Fink D, Francon O, Raju B, Shahrzad H, Navruzyan A, Duffy N, Hodjat B (2019) Chapter 15\u2014evolving deep neural networks. In: Kozma R, Alippi C, Choe Y, Morabito FC (eds) Artificial intelligence in the age of neural networks and brain computing. Academic Press, New York, pp 293\u2013312. https:\/\/doi.org\/10.1016\/B978-0-12-815480-9.00015-3","DOI":"10.1016\/B978-0-12-815480-9.00015-3"},{"issue":"3","key":"2052_CR2","doi-asserted-by":"publisher","first-page":"2455","DOI":"10.1007\/s00521-021-06544-z","volume":"34","author":"A Abbaszadeh Shahri","year":"2022","unstructured":"Abbaszadeh Shahri A, Khorsand Zak M, Abbaszadeh Shahri H (2022) A modified firefly algorithm applying on multi-objective radial-based function for blasting. Neural Comput Appl 34(3):2455\u20132471. https:\/\/doi.org\/10.1007\/s00521-021-06544-z","journal-title":"Neural Comput Appl"},{"key":"2052_CR3","doi-asserted-by":"publisher","unstructured":"Kabiri H, Ghanou Y (2022) Predicting the mode of transport from GPS trajectories, pp 194\u2013207. https:\/\/doi.org\/10.1007\/978-3-031-07969-6_15","DOI":"10.1007\/978-3-031-07969-6_15"},{"key":"2052_CR4","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177729586","author":"H Robbins","year":"1951","unstructured":"Robbins H, Monro S (1951) A stochastic approximation method. Ann Math Stat. https:\/\/doi.org\/10.1214\/aoms\/1177729586","journal-title":"Ann Math Stat"},{"issue":"7","key":"2052_CR5","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1038\/s42256-019-0067-7","volume":"1","author":"A Button","year":"2019","unstructured":"Button A, Merk D, Hiss JA, Schneider G (2019) Automated de novo molecular design by hybrid machine intelligence and rule-driven chemical synthesis. Nat Mach Intell 1(7):307\u2013315. https:\/\/doi.org\/10.1038\/s42256-019-0067-7","journal-title":"Nat Mach Intell"},{"key":"2052_CR6","unstructured":"Abadi M, Barham P, Chen J, Chen Z, Davis A, Dean J, Devin M, Ghemawat S, Irving G, Isard M, Kudlur M, Levenberg J, Monga R, Moore S, Murray DG, Steiner B, Tucker P, Vasudevan V, Warden P, Wicke M, Yu Y, Zheng X (2016) TensorFlow: a system for large-scale machine learning. In: Proceedings of the 12th USENIX symposium on operating systems design and implementation, OSDI 2016"},{"key":"2052_CR7","unstructured":"Chollet F (2015) Keras: the Python deep learning library. Keras.Io"},{"key":"2052_CR8","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, Desmaison A, K\u00f6pf A, Yang E, DeVito Z, Raison M, Tejani A, Chilamkurthy S, Steiner B, Fang L, Bai J, Chintala S (2019) Pytorch: an imperative style, high-performance deep learning library. arXiv:1912.01703 [cs.LG]"},{"key":"2052_CR9","unstructured":"Singh A, Plumbley, MD (2023) Efficient CNNs via passive filter pruning. arXiv:2304.02319 [cs.LG]"},{"key":"2052_CR10","unstructured":"Hosseini S, Akilan T (2023) Advanced deep regression models for forecasting time series oil production. arXiv:2308.16105 [cs.LG]"},{"key":"2052_CR11","doi-asserted-by":"publisher","DOI":"10.1007\/s00366-023-01852-5","author":"A Abbaszadeh Shahri","year":"2023","unstructured":"Abbaszadeh Shahri A, Chunling S, Larsson S (2023) A hybrid ensemble-based automated deep learning approach to generate 3d geo-models and uncertainty analysis. Eng Comput. https:\/\/doi.org\/10.1007\/s00366-023-01852-5","journal-title":"Eng Comput"},{"key":"2052_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2018.01.108","author":"H Khalifi","year":"2018","unstructured":"Khalifi H, Elqadi A, Ghanou Y (2018) Support vector machines for a new hybrid information retrieval system. Procedia Comput Sci. https:\/\/doi.org\/10.1016\/j.procs.2018.01.108","journal-title":"Procedia Comput Sci"},{"key":"2052_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2023.106668","volume":"156","author":"S Nazir","year":"2023","unstructured":"Nazir S, Dickson DM, Akram MU (2023) Survey of explainable artificial intelligence techniques for biomedical imaging with deep neural networks. Comput Biol Med 156:106668. https:\/\/doi.org\/10.1016\/j.compbiomed.2023.106668","journal-title":"Comput Biol Med"},{"key":"2052_CR14","unstructured":"Hu J, Doshi V, Eun DY (2022) Efficiency ordering of stochastic gradient descent. arXiv:2209.07446 [cs.LG]"},{"key":"2052_CR15","unstructured":"An J, Lu J (2023) Convergence of stochastic gradient descent under a local Lajasiewicz condition for deep neural networks. arXiv:2304.09221 [cs.LG]"},{"key":"2052_CR16","unstructured":"Koloskova A, Doikov N, Stich SU, Jaggi M (2023) Shuffle SGD is always better than SGD: improved analysis of SGD with arbitrary data orders. arXiv:2305.19259 [cs.LG]"},{"key":"2052_CR17","doi-asserted-by":"publisher","unstructured":"Huang H, Wang C, Dong B (2019) Nostalgic ADAM: weighting more of the past gradients when designing the adaptive learning rate. In: IJCAI international joint conference on artificial intelligence 2019-August, pp 2556\u20132562. https:\/\/doi.org\/10.24963\/ijcai.2019\/355. arXiv:1805.07557","DOI":"10.24963\/ijcai.2019\/355"},{"key":"2052_CR18","doi-asserted-by":"publisher","unstructured":"Gridin I (2022) Hyperparameter optimization under shell, pp 111\u2013184. https:\/\/doi.org\/10.1007\/978-1-4842-8149-9_3","DOI":"10.1007\/978-1-4842-8149-9_3"},{"key":"2052_CR19","unstructured":"Abbe E, Boix-Adsera E, Misiakiewicz T (2023) SGD learning on neural networks: leap complexity and saddle-to-saddle dynamics. arXiv:2302.11055 [cs.LG]"},{"key":"2052_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2916341","author":"PT Tran","year":"2019","unstructured":"Tran PT, Phong LT (2019) On the convergence proof of AMSGrad and a new version. IEEE Access. https:\/\/doi.org\/10.1109\/ACCESS.2019.2916341. arXiv:1904.03590","journal-title":"IEEE Access"},{"key":"2052_CR21","unstructured":"Defossez A, Bottou L, Bach F, Usunier N (2020) On the convergence of adam and adagrad. arXiv arXiv:2003.02395"},{"key":"2052_CR22","unstructured":"Frangella Z, Rathore P, Zhao S, Udell M (2023) Sketchysgd: reliable stochastic optimization via randomized curvature estimates. arXiv:2211.08597 [math.OC]"},{"key":"2052_CR23","first-page":"2121","volume":"12","author":"J Duchi","year":"2011","unstructured":"Duchi J, Hazan E, Singer Y (2011) Adaptive subgradient methods for online learning and stochastic optimization. J Mach Learn Res 12:2121\u20132159","journal-title":"J Mach Learn Res"},{"issue":"2","key":"2052_CR24","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1007\/s10208-015-9296-2","volume":"17","author":"Y Nesterov","year":"2017","unstructured":"Nesterov Y, Spokoiny V (2017) Random gradient-free minimization of convex functions. Found Comput Math 17(2):527\u2013566. https:\/\/doi.org\/10.1007\/s10208-015-9296-2","journal-title":"Found Comput Math"},{"key":"2052_CR25","unstructured":"Luo L, Xiong Y, Liu Y, Sun X (2019) Adaptive gradient methods with dynamic bound of learning rate. CoRR arXiv:1902.09843"},{"key":"2052_CR26","unstructured":"Tieleman T, Hinton G (2012) Lecture 6.5-RMSProp, COURSERA: neural networks for machine learning. Technical report"},{"key":"2052_CR27","unstructured":"Zeiler MD (2012) ADADELTA: an adaptive learning rate method. arXiv:1212.5701"},{"key":"2052_CR28","unstructured":"Kingma DP, Ba JL (2015) Adam: a method for stochastic optimization. In: 3rd International conference on learning representations, ICLR 2015\u2014conference track proceedings, pp 1\u201315. arXiv:1412.6980"},{"key":"2052_CR29","unstructured":"Loshchilov I, Hutter F (2017) Fixing weight decay regularization in Adam. CoRR arXiv:1711.05101"},{"key":"2052_CR30","unstructured":"Liu L, Jiang H, He P, Chen W, Liu X, Gao J, Han J (2020) On the variance of the adaptive learning rate and beyond. In: Proceedings of the eighth international conference on learning representations (ICLR 2020)"},{"issue":"2","key":"2052_CR31","doi-asserted-by":"publisher","first-page":"527","DOI":"10.1007\/s10208-015-9296-2","volume":"17","author":"Y Nesterov","year":"2017","unstructured":"Nesterov Y, Spokoiny V (2017) Random gradient-free minimization of convex functions. Found Comput Math 17(2):527\u2013566. https:\/\/doi.org\/10.1007\/s10208-015-9296-2","journal-title":"Found Comput Math"},{"key":"2052_CR32","unstructured":"Zhuang J, Tang T, Ding Y, Tatikonda S, Dvornek N, Papademetris X, Duncan JS (2020) AdaBelief optimizer: adapting stepsizes by the belief in observed gradients. cite arxiv:2010.07468"},{"key":"2052_CR33","unstructured":"Reddi SJ, Kale S, Kumar S (2018) On the convergence of Adam and beyond. arXiv:1904.09237"},{"key":"2052_CR34","doi-asserted-by":"crossref","unstructured":"Dubey SR, Chakraborty S, Roy SK, Mukherjee S, Singh SK, Chaudhuri BB (2019) Diffgrad: an optimization method for convolutional neural networks","DOI":"10.1109\/TNNLS.2019.2955777"},{"key":"2052_CR35","unstructured":"Darken C, Moody JE (1989) Note on learning rate schedules for stochastic optimization. In: Advances in neural information processing systems"},{"key":"2052_CR36","doi-asserted-by":"publisher","unstructured":"Gowgi P, Garani SS (2020) Hessian-based bounds on learning rate for gradient descent algorithms. https:\/\/doi.org\/10.1109\/IJCNN48605.2020.9207074","DOI":"10.1109\/IJCNN48605.2020.9207074"},{"issue":"10","key":"2052_CR37","doi-asserted-by":"publisher","first-page":"6685","DOI":"10.1007\/s00521-018-3495-0","volume":"31","author":"J Zhang","year":"2019","unstructured":"Zhang J, Hu F, Li L, Xu X, Yang Z, Chen Y (2019) An adaptive mechanism to achieve learning rate dynamically. Neural Comput Appl 31(10):6685\u20136698. https:\/\/doi.org\/10.1007\/s00521-018-3495-0","journal-title":"Neural Comput Appl"},{"key":"2052_CR38","doi-asserted-by":"publisher","first-page":"1068","DOI":"10.1016\/j.asoc.2018.09.038","volume":"73","author":"A Sharma","year":"2018","unstructured":"Sharma A (2018) Guided stochastic gradient descent algorithm for inconsistent datasets. Appl Soft Comput 73:1068\u20131080. https:\/\/doi.org\/10.1016\/j.asoc.2018.09.038","journal-title":"Appl Soft Comput"},{"key":"2052_CR39","unstructured":"Wan Y, Yao C, Song M, Zhang L (2023) Non-stationary online convex optimization with arbitrary delays. arXiv:2305.12131 [cs.LG]"},{"key":"2052_CR40","unstructured":"Ruder S (2016) An overview of gradient descent optimization algorithms. CoRR arXiv:1609.04747"},{"key":"2052_CR41","doi-asserted-by":"publisher","unstructured":"Liu DC (1989) On the limited memory BFGS method for large scale optimization. CoRR. https:\/\/doi.org\/10.1007\/BF015891","DOI":"10.1007\/BF015891"},{"key":"2052_CR42","doi-asserted-by":"crossref","unstructured":"Bottou L (2010) Large-scale machine learning with stochastic gradient descent, pp 177\u2013187. http:\/\/leon.bottou.org\/papers\/bottou-2010","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"2052_CR43","doi-asserted-by":"crossref","unstructured":"Mor\u00e9 JJ (1977) Levenberg\u2013Marquardt algorithm: implementation and theory. https:\/\/api.semanticscholar.org\/CorpusID:203694768","DOI":"10.1007\/BFb0067700"},{"key":"2052_CR44","doi-asserted-by":"publisher","first-page":"647","DOI":"10.1090\/S0025-5718-1970-0274029-X","volume":"24","author":"DF Shanno","year":"1970","unstructured":"Shanno DF (1970) Conditioning of quasi-Newton methods for function minimization. Math Comput 24:647\u2013656","journal-title":"Math Comput"},{"key":"2052_CR45","unstructured":"LeCun Y, Cortes C (2010) MNIST handwritten digit database. AT &T Labs [Online]. http:\/\/yann.lecun.com\/exdb\/mnist"},{"key":"2052_CR46","unstructured":"Lakshmipathi N (2019) IMDB dataset of 50K movie reviews. http:\/\/ai.stanford.edu\/~amaas\/data\/sentiment\/"},{"key":"2052_CR47","unstructured":"Krizhevsky A (2009) Learning multiple layers of features from tiny images"},{"key":"2052_CR48","unstructured":"Krizhevsky A, Nair V, Hinton G Cifar-100 (Canadian Institute for Advanced Research). http:\/\/www.cs.toronto.edu\/~kriz\/cifar.html"},{"key":"2052_CR49","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks, vol 25. https:\/\/proceedings.neurips.cc\/paper\/2012\/file"},{"key":"2052_CR50","doi-asserted-by":"publisher","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. https:\/\/doi.org\/10.48550\/ARXIV.1409.1556","DOI":"10.48550\/ARXIV.1409.1556"},{"key":"2052_CR51","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Deep residual learning for image recognition. https:\/\/doi.org\/10.48550\/arXiv.1512.03385","DOI":"10.48550\/arXiv.1512.03385"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-023-02052-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10115-023-02052-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-023-02052-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,29]],"date-time":"2024-05-29T00:13:05Z","timestamp":1716941585000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10115-023-02052-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,2,27]]},"references-count":51,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["2052"],"URL":"https:\/\/doi.org\/10.1007\/s10115-023-02052-9","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-2772139\/v1","asserted-by":"object"}]},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,2,27]]},"assertion":[{"value":"3 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 December 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 February 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}