{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T04:00:38Z","timestamp":1754107238642,"version":"3.33.0"},"reference-count":41,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,8]]},"DOI":"10.1109\/vcip63160.2024.10849859","type":"proceedings-article","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T18:36:57Z","timestamp":1738003017000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["LLaVA-Tour: A Large Multimodal Model for Japanese Tourist Spot Prediction and Review Generation"],"prefix":"10.1109","author":[{"given":"Hiromasa","family":"Yamanishi","sequence":"first","affiliation":[{"name":"The University of Tokyo"}]},{"given":"Ling","family":"Xiao","sequence":"additional","affiliation":[{"name":"The University of Tokyo"}]},{"given":"Toshihiko","family":"Yamasaki","sequence":"additional","affiliation":[{"name":"The University of Tokyo"}]}],"member":"263","reference":[{"year":"2023","key":"ref1","article-title":"Gpt-4 technical report"},{"article-title":"Gemini: A family of highly capable multimodal models","year":"2023","author":"Anil","key":"ref2"},{"key":"ref3","article-title":"Visual instruction tuning","volume-title":"Proc. Advances in Neural Information Processing Systems","volume":"36","author":"Liu"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02484"},{"journal-title":"Global Infographic","article-title":"Economic impact report","year":"2023","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1080\/19368623.2023.2211993"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1108\/IJCHM-05-2023-0686"},{"key":"ref8","article-title":"Qwen-vl: A versatile vision-language model for understanding, localization, text reading, and beyond","author":"Bai","year":"2023","journal-title":"arxiv preprint"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr42600.2020.00265"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873973"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/E17-1059"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313463"},{"key":"ref13","first-page":"4947","article-title":"Personalized transformer for explainable recommendation","volume-title":"Proc. 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing","author":"Zong"},{"key":"ref14","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Advances in Neural Information Processing Systems","volume":"33","author":"Brown"},{"article-title":"Qwen technical report","year":"2023","author":"Bai","key":"ref15"},{"article-title":"Llama: Open and efficient foundation language models","year":"2023","author":"Touvron","key":"ref16"},{"key":"ref17","article-title":"Instructblip: Towards general-purpose vision-language models with instruction tuning","volume-title":"Proc. Advances in Neural Information Processing Systems","volume":"36","author":"Dai"},{"article-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models","volume-title":"Proc. International Conference on Learning Representations","author":"Zhu","key":"ref18"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3696409.3700273"},{"key":"ref20","article-title":"Llava-med: Training a large language-and-vision assistant for biomedicine in one day","volume-title":"Proc. Advances in Neural Information Processing Systems","volume":"36","author":"Li"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/cvpr52733.2024.02629"},{"article-title":"Finetuned language models are zero-shot learners","volume-title":"Proc. International Conference on Learning Representations","author":"Wei","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1016\/j.annals.2019.01.014"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3655755.3655770"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.3390\/digital2040030"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1108\/IJCHM-06-2021-0767"},{"article-title":"Towards urban general intelligence: A review and outlook of urban foundation models","year":"2024","author":"Zhang","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.103808"},{"article-title":"Gpt4geo: How a language model sees the world\u2019s geography","year":"2023","author":"Roberts","key":"ref29"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3580488"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/IMCOM60618.2024.10418409"},{"article-title":"A multimodal dataset and benchmark for tourism review generation","volume-title":"The ACM RecSys Workshop on Recommenders in Tourism (Rectour 2024)","author":"Yamanishi","key":"ref32"},{"key":"ref33","first-page":"158","article-title":"The\u02dc effect of user-generated content on tourist behavior: The mediating role of destination image","volume":"10","author":"del Carmen Hidalgo Alcazar","year":"2014","journal-title":"Tourism & Management Studies"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1177\/13567667211030675"},{"key":"ref35","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International Conference on Machine Learning","author":"Radford"},{"article-title":"Vicuna: An opensource chatbot impressing gpt-4 with 90% chatgpt quality","year":"2023","author":"Chiang","key":"ref36"},{"article-title":"Lora: Low-rank adaptation of large language models","volume-title":"Proc. International Conference on Learning Representations","author":"Hu","key":"ref37"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref39","first-page":"311","article-title":"Bleu: A method for automatic evaluation of machine translation","volume-title":"Proc. 40th Annual Meeting on Association for Computational Linguistics","author":"Papineni"},{"key":"ref40","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"LIN","year":"2004","journal-title":"Text Summarization Branches Out"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-44593-5_25"}],"event":{"name":"2024 IEEE International Conference on Visual Communications and Image Processing (VCIP)","start":{"date-parts":[[2024,12,8]]},"location":"Tokyo, Japan","end":{"date-parts":[[2024,12,11]]}},"container-title":["2024 IEEE International Conference on Visual Communications and Image Processing (VCIP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10849624\/10849781\/10849859.pdf?arnumber=10849859","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,28]],"date-time":"2025-01-28T06:08:46Z","timestamp":1738044526000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10849859\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"references-count":41,"URL":"https:\/\/doi.org\/10.1109\/vcip63160.2024.10849859","relation":{},"subject":[],"published":{"date-parts":[[2024,12,8]]}}}