{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T06:16:21Z","timestamp":1774419381949,"version":"3.50.1"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icassp49660.2025.10889104","type":"proceedings-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T13:52:43Z","timestamp":1741787563000},"page":"1-5","source":"Crossref","is-referenced-by-count":0,"title":["FruitMMBench: A Multi-modal Benchmark for Fruit Quality Assessment"],"prefix":"10.1109","author":[{"given":"Jiawei","family":"Chen","sequence":"first","affiliation":[{"name":"Beihang University,Beijing,China"}]},{"given":"Gong","family":"Huang","sequence":"additional","affiliation":[{"name":"Beihang University,Beijing,China"}]},{"given":"Liu","family":"Liu","sequence":"additional","affiliation":[{"name":"Beijing Univ. of Posts and Telecomm.,Beijing,China"}]},{"given":"Zhenbo","family":"Xu","sequence":"additional","affiliation":[{"name":"Beijing Univ. of Posts and Telecomm.,Beijing,China"}]},{"given":"Qinghong","family":"Yang","sequence":"additional","affiliation":[{"name":"Beihang University,Hangzhou International Innovation Institute,Hangzhou,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Qwen-vl: A frontier large vision-language model with versatile abilities","author":"Bai","year":"2023"},{"key":"ref2","article-title":"Multimodal-gpt: A vision and language model for dialogue with humans","author":"Gong","year":"2023"},{"key":"ref3","article-title":"Cogvlm: Visual expert for pretrained language models","author":"Wang","year":"2023"},{"key":"ref4","article-title":"Language models are few-shot learners","author":"Brown","year":"2020"},{"key":"ref5","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA57147.2024.10610779"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-03139-8"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.303"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_44"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3390\/electronics11244100"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/GCITC60406.2023.10426610"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1155\/2022\/9210947"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3369699"},{"key":"ref14","article-title":"Visionllm: Large language model is also an open-ended decoder for vision-centric tasks","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Wang"},{"key":"ref15","article-title":"Touchstone: Evaluating vision-language models by language models","author":"Bai","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72658-3_13"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwae403"},{"key":"ref18","article-title":"Are we on the right way for evaluating large vision-language models?","author":"Chen","year":"2024"},{"key":"ref19","article-title":"Seed-bench: Benchmarking multimodal llms with generative comprehension","author":"Li","year":"2023"},{"key":"ref20","article-title":"Gemini: A family of highly capable multimodal models","volume":"1","author":"Anil","year":"2023"},{"key":"ref21","article-title":"The claude 3 model family: Opus, sonnet, haiku","volume":"1","author":"Anthropic","year":"2024","journal-title":"Claude-3 Model Card"},{"key":"ref22","article-title":"Deepseek-vl: Towards real-world vision-language understanding","author":"Lu","year":"2024"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"key":"ref24","article-title":"Yi: Open foundation models by 01. ai","author":"Young","year":"2024"},{"key":"ref25","article-title":"Minicpm-v: A gpt-4v level mllm on your phone","author":"Yao","year":"2024"}],"event":{"name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Hyderabad, India","start":{"date-parts":[[2025,4,6]]},"end":{"date-parts":[[2025,4,11]]}},"container-title":["ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10887540\/10887541\/10889104.pdf?arnumber=10889104","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T05:22:34Z","timestamp":1774416154000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10889104\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/icassp49660.2025.10889104","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}