{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T05:08:39Z","timestamp":1773378519395,"version":"3.50.1"},"reference-count":80,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"3","license":[{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:00:00Z","timestamp":1772323200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62233002"],"award-info":[{"award-number":["62233002"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["92370203"],"award-info":[{"award-number":["92370203"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFC2603600, and in part by the Fundamental Research Funds for the Central Universities"],"award-info":[{"award-number":["2022YFC2603600, and in part by the Fundamental Research Funds for the Central Universities"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1109\/tcsvt.2025.3622736","type":"journal-article","created":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T17:42:57Z","timestamp":1760722977000},"page":"3027-3040","source":"Crossref","is-referenced-by-count":0,"title":["ChatStitch: Visualizing Through Structures via Surround-View Unsupervised Deep Image Stitching With Collaborative LLM-Agents"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4183-3423","authenticated-orcid":false,"given":"Hao","family":"Liang","sequence":"first","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1706-9253","authenticated-orcid":false,"given":"Zhipeng","family":"Dong","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-0733-9813","authenticated-orcid":false,"given":"Kaixin","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3565-501X","authenticated-orcid":false,"given":"Hao","family":"Li","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiyuan","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6628-7946","authenticated-orcid":false,"given":"Yufeng","family":"Yue","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5520-7127","authenticated-orcid":false,"given":"Mengyin","family":"Fu","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3964-2433","authenticated-orcid":false,"given":"Yi","family":"Yang","sequence":"additional","affiliation":[{"name":"School of Automation, Beijing Institute of Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3135041"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2024.3381793"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3485907"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3462100"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3545795"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/tcsvt.2025.3548845"},{"key":"ref7","first-page":"6998","article-title":"Visual agents as fast and slow thinkers","volume-title":"Proc. 13th Int. Conf. Learn. Represent.","author":"Sun"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73030-6_6"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3092828"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00680"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-73235-5_22"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3568829"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01732"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2023.3348986"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2023.3251661"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-023-11226-z"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-006-0002-3"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.247"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01201"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00367"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126544"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.300"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.3390\/s24165352"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2024.3436051"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvcir.2020.102950"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19787-1_4"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2021.3070525"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.12.032"},{"key":"ref30","first-page":"123812","article-title":"Reconstructing the image stitching pipeline: Integrating fusion and rectangling into a unified inpainting model","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"37","author":"Xie"},{"key":"ref31","article-title":"Modification takes courage: Seamless image stitching via reference-driven inpainting","author":"Xie","year":"2024","journal-title":"arXiv:2411.10309"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00260"},{"key":"ref33","article-title":"Visual recognition with deep nearest centroids","author":"Wang","year":"2022","journal-title":"arXiv:2209.07383"},{"key":"ref34","article-title":"CLUSTSEG: Clustering for universal segmentation","author":"Liang","year":"2023","journal-title":"arXiv:2305.02187"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00252"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ITSC48978.2021.9565009"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref39","volume-title":"Gpt-4o","year":"2025"},{"key":"ref40","first-page":"4171","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. Linguistics, Hum. Lang. Technol.","volume":"1","author":"Devlin"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01428"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/MITS.2023.3298534"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2019.00058"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3028424"},{"key":"ref45","first-page":"29541","article-title":"Learning distilled collaboration graph for multi-agent perception","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Li"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00416"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_36"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_7"},{"key":"ref49","article-title":"CoBEVT: Cooperative bird\u2019s eye view semantic segmentation with sparse transformers","author":"Xu","year":"2022","journal-title":"arXiv:2207.02202"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00892"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3143299"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02067"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67361-5_40"},{"key":"ref54","volume-title":"Openscanerio Editor","year":"2025"},{"key":"ref55","volume-title":"51Sim-One","year":"2025"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197364"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3141661"},{"key":"ref58","article-title":"VIMI: Vehicle-infrastructure multi-view intermediate fusion for camera-based 3D object detection","author":"Wang","year":"2023","journal-title":"arXiv:2303.10975"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00033"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2021.3093573"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3368234"},{"key":"ref62","article-title":"BEVControl: Accurately controlling street-view elements with multi-perspective consistency via BEV sketch layout","author":"Yang","year":"2023","journal-title":"arXiv:2308.01661"},{"key":"ref63","article-title":"DriveDreamer: Towards real-world-driven world models for autonomous driving","author":"Wang","year":"2023","journal-title":"arXiv:2309.09777"},{"key":"ref64","article-title":"DrivingDiffusion: Layout-guided multi-view driving scene video generation with latent diffusion model","author":"Li","year":"2023","journal-title":"arXiv:2310.07771"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/JIOT.2022.3212464"},{"key":"ref66","first-page":"22841","article-title":"MagicDrive: Street view generation with diverse 3D geometry control","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Gao"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00140"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-99-8850-1_1"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1145\/3528223.3530127"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA55743.2025.11127831"},{"key":"ref71","article-title":"Robust image stitching with optimal plane","author":"Nie","year":"2025","journal-title":"arXiv:2508.05903"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298719"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3125736"},{"key":"ref75","article-title":"Deep image homography estimation","author":"DeTone","year":"2016","journal-title":"arXiv:1606.03798"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/34.24792"},{"key":"ref77","first-page":"2017","article-title":"Spatial transformer networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Jaderberg"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2934344"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2007.364084"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01436"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/76\/11424237\/11206342.pdf?arnumber=11206342","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:39:50Z","timestamp":1773347990000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11206342\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3]]},"references-count":80,"journal-issue":{"issue":"3"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2025.3622736","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"value":"1051-8215","type":"print"},{"value":"1558-2205","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,3]]}}}