{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T06:43:14Z","timestamp":1774939394482,"version":"3.50.1"},"reference-count":53,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T00:00:00Z","timestamp":1769904000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52175210"],"award-info":[{"award-number":["52175210"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Jianghuai Frontier Technology Center Dream Fund","award":["NO.2023-ZM01Z009"],"award-info":[{"award-number":["NO.2023-ZM01Z009"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s10489-026-07143-y","type":"journal-article","created":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T13:06:47Z","timestamp":1771852007000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["TRTP: a three-stage robust task planning framework for open worlds via visual-language models and digital twin simulation"],"prefix":"10.1007","volume":"56","author":[{"given":"Yuanjin","family":"Qu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiangtao","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhihong","family":"Wei","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,2,23]]},"reference":[{"key":"7143_CR1","doi-asserted-by":"crossref","unstructured":"Chen, Y., Arkin, J., Zhang, Y., Roy, N., Fan, C.: Scalable multi-robot collaboration with large language models: Centralized or decentralized systems? In: 2024 IEEE International Conference on Robotics and Automation (ICRA), pp. 4311\u20134317. IEEE, Yokohama (2024). doi: 10.1109\/ICRA57147.2024.10610676","DOI":"10.1109\/ICRA57147.2024.10610676"},{"key":"7143_CR2","doi-asserted-by":"publisher","first-page":"55682","DOI":"10.1109\/ACCESS.2024.3387941","volume":"12","author":"SH Vemprala","year":"2024","unstructured":"Vemprala SH, Bonatti R, Bucker A, Kapoor A (2024) Chatgpt for robotics: Design principles and model abilities. IEEE Access 12:55682\u201355696. https:\/\/doi.org\/10.1109\/ACCESS.2024.3387941","journal-title":"IEEE Access"},{"key":"7143_CR3","doi-asserted-by":"crossref","unstructured":"Sun, H., Zhuang, Y., Kong, L., Dai, B., Zhang, C.: Adaplanner: Adaptive planning from feedback with language models. Advances in Neural Information Processing Systems 36, 58202\u201358245 (2024). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/hash\/b5c8c1c117618267944b2617add0a766-Abstract-Conference.html","DOI":"10.52202\/075280-2537"},{"key":"7143_CR4","unstructured":"Hu, M., Mu, Y., Yu, X.C., Ding, M., Wu, S., Shao, W., Chen, Q., Wang, B., Qiao, Y., Luo, P.: Tree-planner: Efficient close-loop task planning with large language models. In: The Twelfth International Conference on Learning Representations, Vienna (2024). https:\/\/doi.org\/10.48550\/arXiv.2310.08582"},{"issue":"1","key":"7143_CR5","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1146\/annurev-control-101119-071628","volume":"3","author":"S Tellex","year":"2020","unstructured":"Tellex S, Gopalan N, Kress-Gazit H, Matuszek C (2020) Robots that use language. Annual Review of Control Robotics and Autonomous Systems 3(1):25\u201355. https:\/\/doi.org\/10.1146\/annurev-control-101119-071628","journal-title":"Annual Review of Control Robotics and Autonomous Systems"},{"key":"7143_CR6","doi-asserted-by":"crossref","unstructured":"Valmeekam, K., Marquez, M., Sreedharan, S., Kambhampati, S.: On the planning abilities of large language models-a critical investigation. Advances in Neural Information Processing Systems 36, 75993\u201376005 (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/hash\/efb2072a358cefb75886a315a6fcf880-Abstract-Conference.html","DOI":"10.52202\/075280-3320"},{"key":"7143_CR7","doi-asserted-by":"publisher","first-page":"23716","DOI":"10.5555\/3524938.3525056","volume":"35","author":"J-B Alayrac","year":"2022","unstructured":"Alayrac J-B, Donahue J, Luc P, Miech A, Barr I, Hasson Y, Lenc K, Mensch A, Millican K, Reynolds M (2022) Flamingo: a visual language model for few-shot learning. Adv Neural Inf Process Syst 35:23716\u201323736. https:\/\/doi.org\/10.5555\/3524938.3525056","journal-title":"Adv Neural Inf Process Syst"},{"key":"7143_CR8","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In: International Conference on Machine Learning, pp. 19730\u201319742. PMLR, Honolulu (2023). https:\/\/proceedings.mlr.press\/v202\/li23q"},{"key":"7143_CR9","doi-asserted-by":"crossref","unstructured":"Chen, Z., Wu, J., Wang, W., Su, W., Chen, G., Xing, S., Zhong, M., Zhang, Q., Zhu, X., Lu, L.: Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, Seattle, pp. 24185\u201324198 (2024). doi: 10.1109\/CVPR52733.2024.02283","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"7143_CR10","unstructured":"Yao, Y., Yu, T., Zhang, A., Wang, C., Cui, J., Zhu, H., Cai, T., Li, H., Zhao, W., He, Z.: Minicpm-v: A gpt-4v level mllm on your phone. Preprint at https:\/\/arxiv.org\/abs\/2408.01800 (2024)"},{"key":"7143_CR11","unstructured":"Hurst, A., Lerer, A., Goucher, A.P., Perelman, A., Ramesh, A., Clark, A., Ostrow, A., Welihinda, A., Hayes, A., Radford, A.: Gpt-4o system card. arXiv preprint arXiv:2410.21276 (2024)"},{"key":"7143_CR12","unstructured":"Huang, W., Abbeel, P., Pathak, D., Mordatch, I.: Language models as zero-shot planners: Extracting actionable knowledge for embodied agents. In: International Conference on Machine Learning, pp. 9118\u20139147. PMLR, Baltimore (2022). https:\/\/doi.org\/10.48550\/arXiv.2201.07207"},{"key":"7143_CR13","unstructured":"Brohan, A., Chebotar, Y., Finn, C., Hausman, K., Herzog, A., Ho, D., Ibarz, J., Irpan, A., Jang, E., Julian, R.: Do as i can, not as i say: Grounding language in robotic affordances. In: Conference on Robot Learning, pp. 287\u2013318. PMLR, Auckland (2023). https:\/\/proceedings.mlr.press\/v205\/ichter23a.html"},{"key":"7143_CR14","unstructured":"Huang, W., Xia, F., Xiao, T., Chan, H., Liang, J., Florence, P., Zeng, A., Tompson, J., Mordatch, I., Chebotar, Y.: Inner monologue: Embodied reasoning through planning with language models. In: Conference on Robot Learning, pp. 1769\u20131782. PMLR, Auckland (2023). https:\/\/proceedings.mlr.press\/v205\/huang23c.html"},{"key":"7143_CR15","doi-asserted-by":"crossref","unstructured":"Chen, B., Xia, F., Ichter, B., Rao, K., Gopalakrishnan, K., Ryoo, M.S., Stone, A., Kappler, D.: Open-vocabulary queryable scene representations for real world planning. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 11509\u201311522. IEEE, London (2023). doi: 10.1109\/ICRA48891.2023.10161534","DOI":"10.1109\/ICRA48891.2023.10161534"},{"key":"7143_CR16","doi-asserted-by":"crossref","unstructured":"Huang, C., Mees, O., Zeng, A., Burgard, W.: Visual language maps for robot navigation. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 10608\u201310615. IEEE, London (2023). doi: 10.1109\/ICRA48891.2023.10160969","DOI":"10.1109\/ICRA48891.2023.10160969"},{"key":"7143_CR17","unstructured":"Ma, Y.J., Kumar, V., Zhang, A., Bastani, O., Jayaraman, D.: Liv: Language-image representations and rewards for robotic control. In: International Conference on Machine Learning, pp. 23301\u201323320. PMLR, Honolulu (2023). https:\/\/proceedings.mlr.press\/v202\/ma23b.html"},{"key":"7143_CR18","doi-asserted-by":"crossref","unstructured":"Kannan, S.S., Venkatesh, V.L., Min, B.-C.: Smart-llm: Smart multi-agent robot task planning using large language models. In: 2024 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 12140\u201312147. IEEE, Abu Dhabi (2024). doi: 10.1109\/IROS58592.2024.10802322","DOI":"10.1109\/IROS58592.2024.10802322"},{"key":"7143_CR19","unstructured":"Liu, B., Jiang, Y., Zhang, X., Liu, Q., Zhang, S., Biswas, J., Stone, P.: Llm+ p: Empowering large language models with optimal planning proficiency. Preprint at https:\/\/arxiv.org\/abs\/2304.11477 (2023)"},{"key":"7143_CR20","unstructured":"Thomason, J., Zhang, S., Mooney, R.J., Stone, P.: Learning to interpret natural language commands through human-robot dialog. In: Twenty-Fourth International Joint Conference on Artificial Intelligence, vol. 15, pp. 1923\u20131929. AAAI Press, Buenos Aires (2015). https:\/\/dl.acm.org\/doi\/abs\/10.5555\/2832415.2832516"},{"key":"7143_CR21","unstructured":"Jang, E., Irpan, A., Khansari, M., Kappler, D., Ebert, F., Lynch, C., Levine, S., Finn, C.: Bc-z: Zero-shot task generalization with robotic imitation learning. In: Faust, A., Hsu, D., Neumann, G. (eds.) Conference on Robot Learning, pp. 991\u20131002. PMLR, London (2022). https:\/\/proceedings.mlr.press\/v164\/jang22a"},{"key":"7143_CR22","unstructured":"Shah, D., Osi\u0144ski, B., Levine, S.: Lm-nav: Robotic navigation with large pre-trained models of language, vision, and action. In: Conference on Robot Learning, pp. 492\u2013504. PMLR, Auckland (2023). https:\/\/proceedings.mlr.press\/v205\/shah23b"},{"key":"7143_CR23","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR, Virtual (2021). http:\/\/proceedings.mlr.press\/v139\/radford21a"},{"key":"7143_CR24","unstructured":"Nair, S., Rajeswaran, A., Kumar, V., Finn, C., Gupta, A.: R3m: A universal visual representation for robot manipulation. In: 6th Annual Conference on Robot Learning, Auckland, pp. 892\u2013909 (2022). https:\/\/proceedings.mlr.press\/v205\/nair23a.html"},{"issue":"5","key":"7143_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10489-025-06249-z","volume":"55","author":"X Gao","year":"2025","unstructured":"Gao X, Peng D, Yang Y, Huang F, Yuan Y, Tan C, Li F (2025) Two-stage graph attention networks and q-learning based maintenance tasks scheduling. Appl Intell 55(5):1\u201320. https:\/\/doi.org\/10.1007\/s10489-025-06249-z","journal-title":"Appl Intell"},{"key":"7143_CR26","unstructured":"Rana, K., Haviland, J., Garg, S., Abou-Chakra, J., Reid, I., Suenderhauf, N.: Sayplan: Grounding large language models using 3d scene graphs for scalable robot task planning. In: 7th Annual Conference on Robot Learning, Atlanta, pp. 23\u201372 (2023). https:\/\/proceedings.mlr.press\/v229\/rana23a.html"},{"key":"7143_CR27","volume-title":"Pddl| the planning domain definition language","author":"C Aeronautiques","year":"1998","unstructured":"Aeronautiques C, Howe A, Knoblock C, McDermott ID, Ram A, Veloso M, Weld D, Sri DW, Barrett A, Christianson D (1998) Pddl| the planning domain definition language. Tech. Rep, Technical Report"},{"key":"7143_CR28","doi-asserted-by":"publisher","first-page":"191","DOI":"10.1613\/jair.1705","volume":"26","author":"M Helmert","year":"2006","unstructured":"Helmert M (2006) The fast downward planning system. Journal of Artificial Intelligence Research 26:191\u2013246. https:\/\/doi.org\/10.1613\/jair.1705","journal-title":"Journal of Artificial Intelligence Research"},{"key":"7143_CR29","unstructured":"Silver, T., Hariprasad, V., Shuttleworth, R.S., Kumar, N., Lozano-P\u00e9rez, T., Kaelbling, L.P.: Pddl planning with pretrained large language models. In: NeurIPS 2022 Foundation Models for Decision Making Workshop, New Orleans (2022)"},{"key":"7143_CR30","doi-asserted-by":"crossref","unstructured":"Singh, I., Blukis, V., Mousavian, A., Goyal, A., Xu, D., Tremblay, J., Fox, D., Thomason, J., Garg, A.: Progprompt: Generating situated robot task plans using large language models. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 11523\u201311530. IEEE, London (2023). doi: 10.1109\/ICRA48891.2023.10161317","DOI":"10.1109\/ICRA48891.2023.10161317"},{"key":"7143_CR31","doi-asserted-by":"crossref","unstructured":"Liang, J., Huang, W., Xia, F., Xu, P., Hausman, K., Ichter, B., Florence, P., Zeng, A.: Code as policies: Language model programs for embodied control. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 9493\u20139500. IEEE, London (2023). doi: 10.1109\/ICRA48891.2023.10160591","DOI":"10.1109\/ICRA48891.2023.10160591"},{"key":"7143_CR32","doi-asserted-by":"crossref","unstructured":"Jannuzzi, M., Perezhohin, Y., Peres, F., Castelli, M., Popovi\u010d, A.: Zero-shot prompting strategies for table question answering with a low-resource language. Emerging Science Journal 8(5), 2003\u20132022 (2024) https:\/\/doi.org\/10.28991\/ESJ-2024-08-05-020","DOI":"10.28991\/ESJ-2024-08-05-020"},{"issue":"8","key":"7143_CR33","doi-asserted-by":"publisher","first-page":"1087","DOI":"10.1007\/s10514-023-10139-z","volume":"47","author":"J Wu","year":"2023","unstructured":"Wu J, Antonova R, Kan A, Lepert M, Zeng A, Song S, Bohg J, Rusinkiewicz S, Funkhouser T (2023) Tidybot: Personalized robot assistance with large language models. Auton Robot 47(8):1087\u20131102. https:\/\/doi.org\/10.1007\/s10514-023-10139-z","journal-title":"Auton Robot"},{"key":"7143_CR34","doi-asserted-by":"crossref","unstructured":"Zhao, X., Li, M., Weber, C., Hafez, M.B., Wermter, S.: Chat with the environment: Interactive multimodal perception using large language models. In: 2023 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 3590\u20133596. IEEE, Detroit (2023). doi: 10.1109\/IROS55552.2023.10342363","DOI":"10.1109\/IROS55552.2023.10342363"},{"key":"7143_CR35","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Lee, W.S., Hsu, D.: Large language models as commonsense knowledge for large-scale task planning. Advances in Neural Information Processing Systems 36, 31967\u201331987 (2024) https:\/\/doi.org\/10.48550\/arXiv.2305.14078","DOI":"10.52202\/075280-1387"},{"key":"7143_CR36","unstructured":"Driess, D., Xia, F., Sajjadi, M.S., Lynch, C., Chowdhery, A., Ichter, B., Wahid, A., Tompson, J., Vuong, Q., Yu, T.: Palm-e: an embodied multimodal language model. In: Proceedings of the 40th International Conference on Machine Learning, Honolulu, pp. 8469\u20138488 (2023). https:\/\/proceedings.mlr.press\/v202\/driess23a.html"},{"key":"7143_CR37","doi-asserted-by":"crossref","unstructured":"Editya, A.S., Ahmad, T., Studiawan, H.: Visual instruction tuning for drone accident forensics. HighTech and Innovation Journal 5(4), 870\u2013884 (2024) https:\/\/doi.org\/10.28991\/HIJ-2024-05-04-01","DOI":"10.28991\/HIJ-2024-05-04-01"},{"key":"7143_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2025.3628434","volume":"74","author":"Z Xiao","year":"2025","unstructured":"Xiao Z, Wan Q, Tong H, Xing H (2025) Attentional knowledge-based state-space model for electrocardiogram signal classification. IEEE Trans Instrum Meas 74:1\u201315. https:\/\/doi.org\/10.1109\/TIM.2025.3628434","journal-title":"IEEE Trans Instrum Meas"},{"key":"7143_CR39","doi-asserted-by":"crossref","unstructured":"Cornejo, J., Cornejo, J., Vargas, M., Carvajal, M., Perales, P., Rodr\u00edguez, G., Macias, C., Canizares, S., Silva, P., Cubas, R.F.: Sy-mis project: Biomedical design of endo-robotic and laparoscopic training system for surgery on the earth and space. Emerging Science Journal 8(2), 372\u2013393 (2024) https:\/\/doi.org\/10.28991\/ESJ-2024-08-02-01","DOI":"10.28991\/ESJ-2024-08-02-01"},{"key":"7143_CR40","doi-asserted-by":"crossref","unstructured":"Vargas, M., Vasquez, Y., Barra, D., Charapaqui, S., Tapia-Yanayaco, P., Maldonado-G\u00f3mez, R., Mendoza-Arias, L., Altatorre, A., Ccellccaro, C., Bedoya-Castillo, M.: Elbow-hand robotic exoskeletons for active and passive rehabilitation on post-stroke patients: A bioengineering review. HighTech and Innovation Journal 5(4), 1170\u20131190 (2024) https:\/\/doi.org\/10.28991\/HIJ-2024-05-04-020","DOI":"10.28991\/HIJ-2024-05-04-020"},{"issue":"6","key":"7143_CR41","doi-asserted-by":"publisher","first-page":"3413","DOI":"10.1109\/TBDATA.2025.3594294","volume":"11","author":"Z Xiao","year":"2025","unstructured":"Xiao Z, Xing H, Qu R, Li H, Tong H, Luo S, Song J, Feng L, Wan Q (2025) Knowledge aggregation transformer network for multivariate time series classification. IEEE Transactions on Big Data 11(6):3413\u20133429. https:\/\/doi.org\/10.1109\/TBDATA.2025.3594294","journal-title":"IEEE Transactions on Big Data"},{"key":"7143_CR42","unstructured":"Wang, P., Bai, S., Tan, S., Wang, S., Fan, Z., Bai, J., Chen, K., Liu, X., Wang, J., Ge, W.: Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution. Preprint at https:\/\/arxiv.org\/abs\/2409.12191 (2024)"},{"key":"7143_CR43","doi-asserted-by":"crossref","unstructured":"Khazatsky, A., Pertsch, K., Nair, S., Balakrishna, A., Dasari, S., Karamcheti, S., Nasiriany, S., Srirama, M.K., Chen, L.Y., Ellis, K.: Droid: A large-scale in-the-wild robot manipulation dataset. In: Robotics: Science and Systems, Delft (2024). https:\/\/www.research.ed.ac.uk\/en\/publications\/droid-a-large-scale-in-the-wild-robot-manipulation-dataset","DOI":"10.15607\/RSS.2024.XX.120"},{"issue":"11","key":"7143_CR44","doi-asserted-by":"publisher","first-page":"4125","DOI":"10.1109\/TPAMI.2020.2991965","volume":"43","author":"D Damen","year":"2020","unstructured":"Damen D, Doughty H, Farinella GM, Fidler S, Furnari A, Kazakos E, Moltisanti D, Munro J, Perrett T, Price W (2020) The epic-kitchens dataset: Collection, challenges and baselines. IEEE Trans Pattern Anal Mach Intell 43(11):4125\u20134141. https:\/\/doi.org\/10.1109\/TPAMI.2020.2991965","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"7143_CR45","unstructured":"Wang, W., Chen, Z., Wang, W., Cao, Y., Liu, Y., Gao, Z., Zhu, J., Zhu, X., Lu, L., Qiao, Y.: Enhancing the reasoning ability of multimodal large language models via mixed preference optimization. Preprint at https:\/\/arxiv.org\/abs\/2411.10442 (2024)"},{"key":"7143_CR46","unstructured":"Chen, Z., Wang, W., Cao, Y., Liu, Y., Gao, Z., Cui, E., Zhu, J., Ye, S., Tian, H., Liu, Z.: Expanding performance boundaries of open-source multimodal models with model, data, and test-time scaling. Preprint at https:\/\/arxiv.org\/abs\/2412.05271 (2024)"},{"key":"7143_CR47","unstructured":"Grattafiori, A., Dubey, A., Jauhri, A., Pandey, A., Kadian, A., Al-Dahle, A., Letman, A., Mathur, A., Schelten, A., Vaughan, A.: The llama 3 herd of models. Preprint at https:\/\/arxiv.org\/abs\/2407.21783 (2024)"},{"key":"7143_CR48","unstructured":"Deitke, M., Clark, C., Lee, S., Tripathi, R., Yang, Y., Park, J.S., Salehi, M., Muennighoff, N., Lo, K., Soldaini, L.: Molmo and pixmo: Open weights and open data for state-of-the-art multimodal models. Preprint at https:\/\/arxiv.org\/abs\/2409.17146 (2024)"},{"key":"7143_CR49","unstructured":"Lu, S., Li, Y., Chen, Q.-G., Xu, Z., Luo, W., Zhang, K., Ye, H.-J.: Ovis: Structural embedding alignment for multimodal large language model. Preprint at https:\/\/arxiv.org\/abs\/2405.20797 (2024)"},{"key":"7143_CR50","doi-asserted-by":"crossref","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Advances in neural information processing systems 36, 34892\u201334916 (2023). https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/hash\/6dcf277ea32ce3288914faf369fe6de0-Abstract-Conference.html","DOI":"10.52202\/075280-1516"},{"key":"7143_CR51","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., Ravi, N., Mao, H., Rolland, C., Gustafson, L., Xiao, T., Whitehead, S., Berg, A.C., Lo, W.-Y.: Segment anything. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, Paris, pp. 4015\u20134026 (2023). doi: 10.1109\/ICCV51070.2023.00371","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"7143_CR52","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188. IEEE, Montreal (2021). doi: 10.1109\/ICCV48922.2021.01196","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"7143_CR53","doi-asserted-by":"crossref","unstructured":"Yang, L., Kang, B., Huang, Z., Xu, X., Feng, J., Zhao, H.: Depth anything: Unleashing the power of large-scale unlabeled data. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10371\u201310381. IEEE, Vancouver (2023). doi: 10.1109\/CVPR52733.2024.00987","DOI":"10.1109\/CVPR52733.2024.00987"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-026-07143-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-026-07143-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-026-07143-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,31]],"date-time":"2026-03-31T05:14:12Z","timestamp":1774934052000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-026-07143-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2]]},"references-count":53,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["7143"],"URL":"https:\/\/doi.org\/10.1007\/s10489-026-07143-y","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2]]},"assertion":[{"value":"17 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for Publication"}}],"article-number":"107"}}