{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,15]],"date-time":"2026-02-15T03:12:29Z","timestamp":1771125149802,"version":"3.50.1"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"27","license":[{"start":{"date-parts":[[2025,1,3]],"date-time":"2025-01-03T00:00:00Z","timestamp":1735862400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,3]],"date-time":"2025-01-03T00:00:00Z","timestamp":1735862400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-024-20559-3","type":"journal-article","created":{"date-parts":[[2025,1,3]],"date-time":"2025-01-03T03:47:10Z","timestamp":1735876030000},"page":"33125-33144","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Multimodal AI model for zero-shot vehicle brand identification"],"prefix":"10.1007","volume":"84","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5393-526X","authenticated-orcid":false,"given":"Chutisant","family":"Kerdvibulvech","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,3]]},"reference":[{"issue":"1","key":"20559_CR1","doi-asserted-by":"publisher","first-page":"541","DOI":"10.1109\/TITS.2022.3212921","volume":"24","author":"A Amirkhani","year":"2023","unstructured":"Amirkhani A, Barshooi AH (2023) Deepcar 5.0: vehicle make and model recognition under challenging conditions. IEEE Trans Intell Transp Syst 24(1):541\u2013553","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"20559_CR2","doi-asserted-by":"crossref","unstructured":"Azarafza M, Nayyeri M, Steinmetz C, Staab S, Rettberg A (2024) Hybrid reasoning based on large language models for autonomous car driving. arXiv:2402.13602v3","DOI":"10.1109\/ICCMA63715.2024.10843921"},{"key":"20559_CR3","doi-asserted-by":"crossref","unstructured":"Bularz M, Przystalski K, Ogorza\u0142ek M (2023) Car make and model recognition system using rear-lamp features and convolutional neural networks. Multimed Tools Appl","DOI":"10.1007\/s11042-023-15081-x"},{"key":"20559_CR4","doi-asserted-by":"crossref","unstructured":"Cui C, Ma Y, Cao X, Ye W, Zhou Y, Liang K, Chen J, Lu J, Yang Z, Liao K, Gao T, Li E, Tang K, Cao Z, Zhou T, Liu A, Yan X, Mei S, Cao J, Wang Z, Zheng C (2024) A survey on multimodal large language models for autonomous driving. In: IEEE\/CVF winter conference on applications of computer vision workshops (WACVW). Los Alamitos, CA, USA: IEEE Computer Society, pp 958\u2013979","DOI":"10.1109\/WACVW60836.2024.00106"},{"key":"20559_CR5","doi-asserted-by":"crossref","unstructured":"Cui C, Yang Z, Zhou Y, Ma Y, Lu J, Li L, Chen Y, Panchal J, Wang Z (2024) Personalized autonomous driving with large language models: field experiments. arXiv:2312.09397v3","DOI":"10.1109\/ITSC58415.2024.10919978"},{"key":"20559_CR6","doi-asserted-by":"crossref","unstructured":"Cui Y, Yan L, Cao Z, Liu D (2021) Tf-blender: temporal feature blender for video object detection. In: IEEE\/CVF international conference on computer vision (ICCV) pp 8118\u20138127","DOI":"10.1109\/ICCV48922.2021.00803"},{"key":"20559_CR7","unstructured":"Gallagher J (2023) DINO-GPT4-V: Use GPT-4V in a two-stage detection model. Roboflow, 7. Accessed on 24-May-2024. https:\/\/blog.roboflow.com\/dino-gpt-4v\/"},{"key":"20559_CR8","doi-asserted-by":"crossref","unstructured":"Gayen S, Maity S, Kumar Singh P, Woo Geem Z, Sarkar R (2024) Two decades of vehicle make and model recognition \u2013 survey, challenges and future directions. J King Saud Univ Comput Inf Sci 36(1)","DOI":"10.1016\/j.jksuci.2023.101885"},{"key":"20559_CR9","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1016\/j.image.2018.12.009","volume":"72","author":"S Ghassemi","year":"2019","unstructured":"Ghassemi S, Fiandrotti A, Caimotti E, Francini G, Magli E (2019) Vehicle joint make and model recognition with multiscale attention windows. Signal Process Image Commun 72:69\u201379","journal-title":"Signal Process Image Commun"},{"key":"20559_CR10","doi-asserted-by":"crossref","unstructured":"Han C, Wang Q, Cui Y, Cao Z, Wang W, Qi S, Liu D (2023) E2VPT: an effective and efficient approach for visual prompt tuning. In: IEEE\/CVF international conference on computer vision (ICCV). Los Alamitos, CA, USA: IEEE Computer Society, pp 17 445\u201317 456","DOI":"10.1109\/ICCV51070.2023.01604"},{"key":"20559_CR11","unstructured":"Han C, Wang Q, Cui Y, Wang W, Huang L, Qi S, Liu D (2024) Facing the elephant in the room: visual prompt tuning or full finetuning?. In: International conference on learning representations (ICLR)"},{"key":"20559_CR12","doi-asserted-by":"publisher","first-page":"91 487","DOI":"10.1109\/ACCESS.2021.3090766","volume":"9","author":"A Hassan","year":"2021","unstructured":"Hassan A, Ali M, Durrani NM, Tahir MA (2021) An empirical analysis of deep learning architectures for vehicle make and model recognition. IEEE Access 9:91 487-91 499","journal-title":"IEEE Access"},{"key":"20559_CR13","unstructured":"Igersheim M (2023) Mercedes-benz is set to integrate gpt-4 into its voice assistant,\" Blog Cyberjustice. https:\/\/cyberjustice.blog\/2023\/06\/26\/mercedes-benz-is-set-to-integrate-gpt-4-into-its-voice-assistant\/ Accessed on 24-May-2024"},{"key":"20559_CR14","doi-asserted-by":"crossref","unstructured":"Krause J, Stark M, Deng J, Fei-Fei L (2013) 3d object representations for fine-grained categorization. In: IEEE international conference on computer vision workshops pp 554\u2013561","DOI":"10.1109\/ICCVW.2013.77"},{"key":"20559_CR15","unstructured":"Liang JC, Cui Y, Wang Q, Geng T, Wang W, Liu D (2023) Clusterformer: clustering as a universal visual learner. In: International conference on neural information processing systems (NeurIPS2023) pp 64 029\u201364 042"},{"key":"20559_CR16","doi-asserted-by":"crossref","unstructured":"Liu D, Cui Y, Yan L, Mousas C, Yang B, Chen Y (2021) Densernet: weakly supervised visual localization using multi-scale feature aggregation. In: AAAI conference on artificial intelligence (AAAI) pp 6101\u20136109","DOI":"10.1609\/aaai.v35i7.16760"},{"key":"20559_CR17","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.neucom.2020.05.027","volume":"409","author":"D Liu","year":"2020","unstructured":"Liu D, Cui Y, Chen Y, Zhang J, Fan B (2020) Video object detection for autonomous driving: motion-aid feature calibration. Neurocomputing 409:1\u201311","journal-title":"Neurocomputing"},{"key":"20559_CR18","doi-asserted-by":"crossref","unstructured":"Lyu Y, Schiopu I, Cornelis B, Munteanu A (2022) Framework for vehicle make and model recognition-a new large-scale dataset and an efficient two-branch-two-stage deep learning architecture. Sensors 22(21)","DOI":"10.3390\/s22218439"},{"key":"20559_CR19","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1007\/s11265-020-01567-6","volume":"93","author":"X Ni","year":"2020","unstructured":"Ni X, Huttunen H (2020) Vehicle attribute recognition by appearance: Computer vision methods for vehicle type, make and model classification. J Signal Process Syst 93:357\u2013368","journal-title":"J Signal Process Syst"},{"key":"20559_CR20","unstructured":"OpenAI (2023) GPT-4 Technical Report. arXiv:2303.08774v3"},{"key":"20559_CR21","unstructured":"Poultney L (2023) Citroen\u2019s luxury cars get built-in chatgpt for the ultimate ai copilot experience. TechRadar the technology experts.https:\/\/www.techradar.com\/vehicle-tech\/hybrid-electric-vehicles\/citroens-luxury-cars-get-built-in-chatgpt-for-the-ultimate-ai-copilot-experience Accessed on 24-May-2024"},{"key":"20559_CR22","unstructured":"Qi Z, Fang Y, Zhang M, Sun Z, Wu T, Liu Z, Lin D, Wang J, Zhao H (2023) Gemini vs GPT-4V: a preliminary comparison and combination of vision-language models through qualitative cases. arXiv:2312.15011v1"},{"key":"20559_CR23","doi-asserted-by":"crossref","unstructured":"Rao AS, Sapna S, Akshay T, Shenoy AS, Adithya BV, Dias A (2022) Identification of car make and model using deep learning and computer vision techniques. In: International conference on artificial intelligence and data engineering (AIDE) pp 202\u2013207","DOI":"10.1109\/AIDE57180.2022.10060631"},{"key":"20559_CR24","doi-asserted-by":"crossref","unstructured":"Sultan Z, Farooq MU, Raza RH (2023) Improved vehicle logo detection and recognition for complex traffic environments using deep learning based unwarping of extracted logo regions in varying angles. In: Digital interaction and machine intelligence. Springer Nature Switzerland, Cham, pp 12\u201325","DOI":"10.1007\/978-3-031-37649-8_2"},{"key":"20559_CR25","doi-asserted-by":"publisher","first-page":"126 733","DOI":"10.1109\/ACCESS.2023.3330114","volume":"11","author":"SH Tan","year":"2023","unstructured":"Tan SH, Chuah JH, Chow C-O, Kanesan J (2023) Coarse-to-fine context aggregation network for vehicle make and model recognition. IEEE Access 11:126 733-126 747","journal-title":"IEEE Access"},{"key":"20559_CR26","doi-asserted-by":"crossref","unstructured":"Wang D, Al-Rubaie A, Alsarkal YI, Stincic S, Davies J (2021) Cost effective and accurate vehicle make\/model recognition method using yolov5. In: 2021 International conference on smart applications, communications and networking (SmartNets) pp 1\u20134","DOI":"10.1109\/SmartNets50376.2021.9555409"},{"key":"20559_CR27","doi-asserted-by":"crossref","unstructured":"Wang J, Wang P, Sun G, Liu D, Dianat S, Rao R, Rabbani M, Tao Z (2024) Text is MASS: modeling as stochastic embedding for text-video retrieval. In: IEEE\/CVF conference on computer vision and pattern recognition (CVPR). Los Alamitos, CA, USA: IEEE Computer Society pp 16 551\u201316 560","DOI":"10.1109\/CVPR52733.2024.01566"},{"key":"20559_CR28","doi-asserted-by":"crossref","unstructured":"Wang Q, Mao Y, Wang J, Yu H, Nie S, Wang S, Feng F, Huang L, Quan X, Xu Z, Liu D (2023) APrompt: attention prompt tuning for efficient adaptation of pre-trained language models. In: Conference on empirical methods in natural language processing (EMNLP) pp 9147\u20139160","DOI":"10.18653\/v1\/2023.emnlp-main.567"},{"issue":"12","key":"20559_CR29","doi-asserted-by":"publisher","first-page":"4706","DOI":"10.1109\/TIV.2023.3325300","volume":"8","author":"S Wang","year":"2023","unstructured":"Wang S, Zhu Y, Li Z, Wang Y, Li L, He Z (2023) Chatgpt as your vehicle co-pilot: an initial attempt. IEEE Trans Intell Veh 8(12):4706\u20134721","journal-title":"IEEE Trans Intell Veh"},{"issue":"12","key":"20559_CR30","doi-asserted-by":"publisher","first-page":"672","DOI":"10.4236\/jsea.2023.1612034","volume":"16","author":"ZM Wase","year":"2023","unstructured":"Wase ZM, Madisetti VK, Bahga A (2023) Object detection meets LLMS: model fusion for safety and security. J Softw Eng Appl 16(12):672\u2013684","journal-title":"J Softw Eng Appl"},{"key":"20559_CR31","unstructured":"Wen L, Yang X, Fu D, Wang X, Cai P, Li X, Ma T, Li Y, Xu L, Shang D, Zhu Z, Sun S, Bai Y, Cai X, Dou M, Hu S, Shi B, Qiao Y (2023) On the road with GPT-4V(ision): early explorations of visual-language model on autonomous driving. arXiv:2311.05332"},{"key":"20559_CR32","unstructured":"Wu Y, Wang S, Yang H, Zheng T, Zhang H, Zhao Y, Qin B (2023) An early evaluation of GPT-4V(vision). arXiv:2310.16534v1"},{"issue":"10","key":"20559_CR33","doi-asserted-by":"publisher","first-page":"6642","DOI":"10.1109\/TCSVT.2022.3177320","volume":"32","author":"L Yan","year":"2022","unstructured":"Yan L, Ma S, Wang Q, Chen Y, Zhang X, Savakis A, Liu D (2022) Video captioning using global-local representation. IEEE Trans Circ Syst Vid Technol 32(10):6642\u20136656","journal-title":"IEEE Trans Circ Syst Vid Technol"},{"key":"20559_CR34","doi-asserted-by":"crossref","unstructured":"Yang L, Luo P, Loy CC, Tang X (2015) A large-scale car dataset for fine-grained categorization and verification. In: IEEE conference on computer vision and pattern recognition (CVPR) pp 3973\u20133981","DOI":"10.1109\/CVPR.2015.7299023"},{"key":"20559_CR35","doi-asserted-by":"crossref","unstructured":"Zhang H, Li X, Yuan H, Liang H, Wang Y, Song S (2023) A multi-angle appearance-based approach for vehicle type and brand recognition utilizing faster regional convolution neural networks. Sensors 23(23)","DOI":"10.3390\/s23239569"},{"key":"20559_CR36","unstructured":"Zhou X, Knoll AC (2024) GPT-4V as traffic assistant: an in-depth look at vision language model on complex traffic events. arXiv:2402.02205"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20559-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-024-20559-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-024-20559-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T04:14:41Z","timestamp":1757132081000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-024-20559-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,3]]},"references-count":36,"journal-issue":{"issue":"27","published-online":{"date-parts":[[2025,8]]}},"alternative-id":["20559"],"URL":"https:\/\/doi.org\/10.1007\/s11042-024-20559-3","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,3]]},"assertion":[{"value":"27 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 November 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 January 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No conflicts of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of Interest"}}]}}