{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T18:58:17Z","timestamp":1754161097936,"version":"3.41.2"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031995675"},{"type":"electronic","value":"9783031995682"}],"license":[{"start":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T00:00:00Z","timestamp":1753747200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T00:00:00Z","timestamp":1753747200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-99568-2_4","type":"book-chapter","created":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T15:15:00Z","timestamp":1753715700000},"page":"43-55","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Towards Event-Driven Evaluation of\u00a0Surveillance Video Understanding Using Natural Language"],"prefix":"10.1007","author":[{"given":"Jo\u00e3o","family":"Pereira","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vasco","family":"Lopes","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jo\u00e3o","family":"Neves","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Semedo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,7,29]]},"reference":[{"key":"4_CR1","unstructured":"Achiam, J., et\u00a0al.: Gpt-4 technical report. arXiv:2303.08774 (2023)"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et\u00a0al.: Spice: semantic propositional image caption evaluation. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"4_CR3","unstructured":"Bai, S., et\u00a0al.: Qwen2.5-vl technical report. arXiv:2502.13923 (2025)"},{"key":"4_CR4","unstructured":"Brown, T., et\u00a0al.: Language models are few-shot learners. In: NeurIPS (2020)"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Chen, J., et\u00a0al.: Videollm-online: online video large language model for streaming video. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01742"},{"key":"4_CR6","doi-asserted-by":"crossref","unstructured":"Deng, A., Yang, T., Chen, C.: A large-scale study of spatiotemporal representation learning with a new benchmark on action recognition. In: ICCV (2023)","DOI":"10.1109\/ICCV51070.2023.01876"},{"key":"4_CR7","unstructured":"Dong, H., et\u00a0al.: Benchmarking and improving detail image caption. arXiv:2405.19092 (2024)"},{"key":"4_CR8","unstructured":"Fu, C., et\u00a0al.: Video-mme: the first-ever comprehensive evaluation benchmark of multi-modal llms in video analysis. arXiv:2405.21075 (2024)"},{"key":"4_CR9","unstructured":"Kim, K., Park, G., Lee, Y.: Videoicl: confidence-based iterative in-context learning for out-of-distribution video understanding. arXiv:2412.02186 (2024)"},{"key":"4_CR10","unstructured":"Li, B., et\u00a0al.: Llava-onevision: easy visual task transfer. arXiv:2408.03326 (2024)"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Li, C., et\u00a0al.: Llava-med: training a large language-and-vision assistant for biomedicine in one day. In: NeurIPS (2023)","DOI":"10.32388\/VLXB6M"},{"key":"4_CR12","unstructured":"Liu, H., et\u00a0al.: Visual instruction tuning. In: NeurIPS (2023)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Maaz, M., et\u00a0al.: Video-chatgpt: towards detailed video understanding via large vision and language models. arXiv:2306.05424 (2023)","DOI":"10.18653\/v1\/2024.acl-long.679"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Papineni, K., et\u00a0al.: Bleu: a method for automatic evaluation of machine translation. In: ACL (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"4_CR15","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: ICML (2021)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Sultani, W., Chen, C., Shah, M.: Real-world anomaly detection in surveillance videos. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00678"},{"key":"4_CR17","unstructured":"Team, G., et\u00a0al.: Gemini: a family of highly capable multimodal models (2024). arXiv:2312.11805 (2024)"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Zitnick, C.L., Parikh, D.: Cider: consensus-based image description evaluation. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"4_CR19","unstructured":"Wang, J., et\u00a0al.: Tarsier: recipes for training and evaluating large video description models. arXiv:2407.00634 (2024)"},{"key":"4_CR20","unstructured":"Yang, A., et\u00a0al.: Qwen2.5 technical report. arXiv:2412.15115 (2024)"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Yuan, T., Zhang, X., Liu, K.: Towards surveillance video-and-language understanding: new dataset baselines and challenges. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.02082"},{"key":"4_CR22","unstructured":"Zhang, B., et\u00a0al.: Videollama 3: frontier multimodal foundation models for image and video understanding. arXiv:2501.13106 (2025)"},{"key":"4_CR23","unstructured":"Zhang, Y., et\u00a0al.: Video instruction tuning with synthetic data. arXiv:2410.02713 (2024)"},{"key":"4_CR24","unstructured":"Zheng, L., et\u00a0al.: Judging llm-as-a-judge with mt-bench and chatbot arena. In: NeurIPS (2023)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Image Analysis"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-99568-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T15:15:11Z","timestamp":1753715711000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-99568-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,29]]},"ISBN":["9783031995675","9783031995682"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-99568-2_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,7,29]]},"assertion":[{"value":"29 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IbPRIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Iberian Conference on Pattern Recognition and Image Analysis","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Coimbra","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ibpria2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ibpria.org\/2025\/?page=home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}