{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T12:25:10Z","timestamp":1773231910648,"version":"3.50.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:00:00Z","timestamp":1764547200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100004358","name":"Samsung Group","doi-asserted-by":"publisher","award":["IO201210-08019-01"],"award-info":[{"award-number":["IO201210-08019-01"]}],"id":[{"id":"10.13039\/100004358","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Applied Soft Computing"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1016\/j.asoc.2025.114051","type":"journal-article","created":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T03:37:24Z","timestamp":1760067444000},"page":"114051","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PB","title":["Analysis of NaN divergence in training monocular depth estimation model"],"prefix":"10.1016","volume":"185","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4155-9225","authenticated-orcid":false,"given":"Bum Jun","family":"Kim","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5344-8114","authenticated-orcid":false,"given":"Hyeonah","family":"Jang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6023-1837","authenticated-orcid":false,"given":"Sang Woo","family":"Kim","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.asoc.2025.114051_bib0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2020.106804","article-title":"Monocular image depth prediction without depth sensors: an unsupervised learning method","volume":"97","author":"Chen","year":"2020","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2025.114051_bib0010","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2024.111873","article-title":"Enforcing high frequency enhancement in deep networks for simultaneous depth estimation and dehazing","volume":"163","author":"Wei","year":"2024","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2025.114051_bib0015","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2020.106533","article-title":"A multi-task faster R-CNN method for 3D vehicle detection based on a single image","volume":"95","author":"Yang","year":"2020","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2025.114051_bib0020","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2024.111253","article-title":"Deep learning-based fusion networks with high-order attention mechanism for 3D object detection in autonomous driving scenarios","volume":"152","author":"Jiang","year":"2024","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2025.114051_bib0025","series-title":"ICSE","first-page":"1159","article-title":"An empirical study on program failures of deep learning jobs","author":"Zhang","year":"2020"},{"key":"10.1016\/j.asoc.2025.114051_bib0030","series-title":"NIPS","first-page":"2366","article-title":"Depth map prediction from a single image using a multi-scale deep network","author":"Eigen","year":"2014"},{"key":"10.1016\/j.asoc.2025.114051_bib0035","article-title":"From big to small: multi-scale local planar guidance for monocular depth estimation","author":"Lee","year":"2019","journal-title":"CoRr"},{"key":"10.1016\/j.asoc.2025.114051_bib0040","series-title":"2022 IEEE 7th International Conference for Convergence in Technology (I2CT)","first-page":"1","article-title":"Focal-WNet: an architecture unifying convolution and attention for depth estimation","author":"Manimaran","year":"2022"},{"key":"10.1016\/j.asoc.2025.114051_bib0045","doi-asserted-by":"crossref","first-page":"3341","DOI":"10.1109\/TMM.2023.3310259","article-title":"URCDC-depth: Uncertainty rectified cross-distillation with CutFlip for monocular depth estimation","volume":"26","author":"Shao","year":"2024","journal-title":"IEEE Trans. Multim."},{"key":"10.1016\/j.asoc.2025.114051_bib0050","series-title":"ICML (3)","first-page":"1310","article-title":"On the difficulty of training recurrent neural networks","volume":"vol. 28","author":"Pascanu","year":"2013"},{"key":"10.1016\/j.asoc.2025.114051_bib0055","series-title":"NEURIPS","first-page":"6391","article-title":"Visualizing the loss landscape of neural nets","author":"Li","year":"2018"},{"issue":"11","key":"10.1016\/j.asoc.2025.114051_bib0060","doi-asserted-by":"crossref","first-page":"1231","DOI":"10.1177\/0278364913491297","article-title":"Vision meets robotics: the KITTI dataset","volume":"32","author":"Geiger","year":"2013","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.asoc.2025.114051_bib0065","series-title":"ICLR","article-title":"Adam: a method for stochastic optimization","author":"Kingma","year":"2015"},{"key":"10.1016\/j.asoc.2025.114051_bib0070","series-title":"NEURIPS","first-page":"8024","article-title":"PyTorch: an imperative style, high-performance deep learning library","author":"Paszke","year":"2019"},{"key":"10.1016\/j.asoc.2025.114051_bib0075","series-title":"ICML","first-page":"448","article-title":"Batch normalization: accelerating deep network training by reducing internal covariate shift","volume":"vol. 37","author":"Ioffe","year":"2015"},{"key":"10.1016\/j.asoc.2025.114051_bib0080","series-title":"NEURIPS","article-title":"On the training dynamics of deep networks with L2 regularization","author":"Lewkowycz","year":"2020"},{"key":"10.1016\/j.asoc.2025.114051_bib0085","series-title":"ECCV (5)","first-page":"746","article-title":"Indoor segmentation and support inference from RGBD images","volume":"vol. 7576","author":"Silberman","year":"2012"},{"key":"10.1016\/j.asoc.2025.114051_bib0090","series-title":"CVPR","first-page":"899","article-title":"DrivingStereo: a large-scale dataset for stereo matching in autonomous driving scenarios","author":"Yang","year":"2019"},{"key":"10.1016\/j.asoc.2025.114051_bib0095","series-title":"CVPR","first-page":"8748","article-title":"Argoverse: 3D tracking and forecasting with rich maps","author":"Chang","year":"2019"},{"key":"10.1016\/j.asoc.2025.114051_bib0100","series-title":"CVPR","first-page":"2482","article-title":"3d packing for self-supervised monocular depth estimation","author":"Guizilini","year":"2020"},{"key":"10.1016\/j.asoc.2025.114051_bib0105","series-title":"AISTATS","first-page":"249","article-title":"Understanding the difficulty of training deep feedforward neural networks","volume":"vol. 9","author":"Glorot","year":"2010"},{"key":"10.1016\/j.asoc.2025.114051_bib0110","series-title":"CVPR","first-page":"21477","article-title":"iDisc: internal discretization for monocular depth estimation","author":"Piccinelli","year":"2023"},{"key":"10.1016\/j.asoc.2025.114051_bib0115","unstructured":"W.J. Reichmann, Use and abuse of statistics, (1961)."},{"issue":"4","key":"10.1016\/j.asoc.2025.114051_bib0120","doi-asserted-by":"crossref","first-page":"582","DOI":"10.1214\/aoms\/1177730349","article-title":"On small-sample estimation","volume":"18","author":"Brown","year":"1947","journal-title":"Ann. Math. Stat."},{"key":"10.1016\/j.asoc.2025.114051_bib0125","series-title":"CVPR","first-page":"14475","article-title":"Revealing the dark secrets of masked image modeling","author":"Xie","year":"2023"},{"issue":"6","key":"10.1016\/j.asoc.2025.114051_bib0130","doi-asserted-by":"crossref","first-page":"837","DOI":"10.1007\/s11633-023-1458-0","article-title":"Depthformer: exploiting long-range correlation and local information for accurate monocular depth estimation","volume":"20","author":"Li","year":"2023","journal-title":"Mach. Intell. Res."},{"issue":"11","key":"10.1016\/j.asoc.2025.114051_bib0135","doi-asserted-by":"crossref","first-page":"4381","DOI":"10.1109\/TCSVT.2021.3049869","article-title":"Monocular depth estimation using Laplacian pyramid-based depth residuals","volume":"31","author":"Song","year":"2021","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.asoc.2025.114051_bib0140","article-title":"Global-local path networks for monocular depth estimation with vertical CutDepth","author":"Kim","year":"2022","journal-title":"CoRr"},{"key":"10.1016\/j.asoc.2025.114051_bib0145","series-title":"ICCV","first-page":"21684","article-title":"DDP: Diffusion model for dense visual prediction","author":"Ji","year":"2023"},{"key":"10.1016\/j.asoc.2025.114051_bib0150","doi-asserted-by":"crossref","first-page":"134543","DOI":"10.1109\/ACCESS.2021.3116380","article-title":"D-net: a generalised and optimised deep network for monocular depth estimation","volume":"9","author":"Thompson","year":"2021","journal-title":"IEEE Access"},{"key":"10.1016\/j.asoc.2025.114051_bib0155","series-title":"ICIP","first-page":"3873","article-title":"Depthformer: Multiscale vision transformer for monocular depth estimation with global local information fusion","author":"Agarwal","year":"2022"},{"key":"10.1016\/j.asoc.2025.114051_bib0160","series-title":"CVPR","first-page":"3906","article-title":"Neural window fully-connected CRFs for monocular depth estimation","author":"Yuan","year":"2022"},{"key":"10.1016\/j.asoc.2025.114051_bib0165","series-title":"CVPR","first-page":"4009","article-title":"AdaBins: Depth estimation using adaptive bins","author":"Bhat","year":"2021"},{"key":"10.1016\/j.asoc.2025.114051_bib0170","series-title":"WACV","first-page":"5850","article-title":"Attention attention everywhere: monocular depth prediction with skip attention","author":"Agarwal","year":"2023"},{"key":"10.1016\/j.asoc.2025.114051_bib0175","article-title":"Dinov2: learning robust visual features without supervision","volume":"2024","author":"Oquab","year":"2024","journal-title":"Trans. Mach. Learn. Res."},{"key":"10.1016\/j.asoc.2025.114051_bib0180","series-title":"ICCV","first-page":"5706","article-title":"Unleashing text-to-image diffusion models for visual perception","author":"Zhao","year":"2023"},{"key":"10.1016\/j.asoc.2025.114051_bib0185","article-title":"Zoedepth: zero-shot transfer by combining relative and metric depth","author":"Bhat","year":"2023","journal-title":"CoRr"},{"key":"10.1016\/j.asoc.2025.114051_bib0190","series-title":"ICCV","first-page":"19843","article-title":"All in tokens: unifying output space of visual tasks via soft token","author":"Ning","year":"2023"},{"key":"10.1016\/j.asoc.2025.114051_bib0195","series-title":"ECCV (1)","first-page":"480","article-title":"LocalBins: improving depth estimation by learning local distributions","volume":"vol. 13661","author":"Bhat","year":"2022"},{"key":"10.1016\/j.asoc.2025.114051_bib0200","series-title":"3DV","first-page":"11","article-title":"Sparsity invariant CNNs","author":"Uhrig","year":"2017"},{"key":"10.1016\/j.asoc.2025.114051_bib0205","series-title":"ECCV (8)","first-page":"740","article-title":"Unsupervised CNN for single view depth estimation: geometry to the rescue","volume":"vol. 9912","author":"Garg","year":"2016"},{"key":"10.1016\/j.asoc.2025.114051_bib0210","series-title":"CVPR","first-page":"11999","article-title":"Swin transformer v2: scaling up capacity and resolution","author":"Liu","year":"2022"},{"key":"10.1016\/j.asoc.2025.114051_bib0215","series-title":"ICCV","first-page":"1026","article-title":"Delving deep into rectifiers: surpassing human-level performance on ImageNet classification","author":"He","year":"2015"},{"key":"10.1016\/j.asoc.2025.114051_bib0220","series-title":"ICLR","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2019"},{"key":"10.1016\/j.asoc.2025.114051_bib0225","series-title":"CVPR","first-page":"2002","article-title":"Deep ordinal regression network for monocular depth estimation","author":"Fu","year":"2018"}],"container-title":["Applied Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156849462501364X?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S156849462501364X?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T15:43:08Z","timestamp":1773157388000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S156849462501364X"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12]]},"references-count":45,"alternative-id":["S156849462501364X"],"URL":"https:\/\/doi.org\/10.1016\/j.asoc.2025.114051","relation":{},"ISSN":["1568-4946"],"issn-type":[{"value":"1568-4946","type":"print"}],"subject":[],"published":{"date-parts":[[2025,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Analysis of NaN divergence in training monocular depth estimation model","name":"articletitle","label":"Article Title"},{"value":"Applied Soft Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.asoc.2025.114051","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114051"}}