{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T12:26:04Z","timestamp":1770467164455,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":24,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819620739","type":"print"},{"value":"9789819620746","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2074-6_43","type":"book-chapter","created":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T16:06:16Z","timestamp":1735661176000},"page":"355-362","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["VERGE in VBS 2025"],"prefix":"10.1007","author":[{"given":"Nick","family":"Pantelidis","sequence":"first","affiliation":[]},{"given":"Dimitris","family":"Georgalis","sequence":"additional","affiliation":[]},{"given":"Maria","family":"Pegia","sequence":"additional","affiliation":[]},{"given":"Damianos","family":"Galanopoulos","sequence":"additional","affiliation":[]},{"given":"Konstantinos","family":"Apostolidis","sequence":"additional","affiliation":[]},{"given":"Klearchos","family":"Stavrothanasopoulos","sequence":"additional","affiliation":[]},{"given":"Anastasia","family":"Moumtzidou","sequence":"additional","affiliation":[]},{"given":"Konstantinos","family":"Gkountakos","sequence":"additional","affiliation":[]},{"given":"Ilias","family":"Gialampoukidis","sequence":"additional","affiliation":[]},{"given":"Stefanos","family":"Vrochidis","sequence":"additional","affiliation":[]},{"given":"Vasileios","family":"Mezaris","sequence":"additional","affiliation":[]},{"given":"Ioannis","family":"Kompatsiaris","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,1]]},"reference":[{"key":"43_CR1","unstructured":"CBMI \u201923: Proceedings of the 20th International Conference on Content-based Multimedia Indexing. Association for Computing Machinery, New York, NY, USA (2023)"},{"key":"43_CR2","unstructured":"Allan, M., et al.: 2017 robotic instrument segmentation challenge (2019). https:\/\/arxiv.org\/abs\/1902.06426"},{"key":"43_CR3","doi-asserted-by":"crossref","unstructured":"Caba\u00a0Heilbron, F., Escorcia, V., Ghanem, B., Carlos\u00a0Niebles, J.: Activitynet: a large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"43_CR4","doi-asserted-by":"publisher","first-page":"627","DOI":"10.1007\/978-3-031-25069-9_40","volume-title":"Computer Vision - ECCV 2022 Workshops","author":"D Galanopoulos","year":"2023","unstructured":"Galanopoulos, D., Mezaris, V.: Are All Combinations Equal? combining textual and visual features with multiple space learning for text-based video retrieval. In: Karlinsky, L., Michaeli, T., Nishino, K. (eds.) Computer Vision - ECCV 2022 Workshops, pp. 627\u2013643. Springer Nature Switzerland, Cham (2023)"},{"key":"43_CR5","doi-asserted-by":"crossref","unstructured":"Galanopoulos, D., Mezaris, V.: Cross-modal learning for free-text video search. In: Khosrow-Pour, M. (ed.) Encyclopedia of Information Science and Technology. IGI Global (2025)","DOI":"10.4018\/978-1-6684-7366-5.ch088"},{"key":"43_CR6","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1016\/j.neucom.2021.02.014","volume":"441","author":"W Gan","year":"2021","unstructured":"Gan, W., Wong, P.K., Yu, G., Zhao, R., Vong, C.M.: Light-weight network for real-time adaptive stereo depth estimation. Neurocomputing 441, 118\u2013127 (2021). https:\/\/doi.org\/10.1016\/j.neucom.2021.02.014","journal-title":"Neurocomputing"},{"issue":"1","key":"43_CR7","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TPAMI.2010.57","volume":"33","author":"H Jegou","year":"2010","unstructured":"Jegou, H., Douze, M., Schmid, C.: Product quantization for nearest neighbor search. IEEE Trans. Pattern Anal. Mach. Intell. 33(1), 117\u2013128 (2010)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"43_CR8","unstructured":"Jocher, G., Chaurasia, A., Qiu, J.: Ultralytics yolov8 (2023). https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"43_CR9","unstructured":"Kolesnikov, A., et al.: An Image is Worth 16x16 Words: transformers for image recognition at scale. In: International Conference on Learning Representations (ICLR) (2021)"},{"issue":"7","key":"43_CR10","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1007\/s11263-020-01316-z","volume":"128","author":"A Kuznetsova","year":"2020","unstructured":"Kuznetsova, A., et al.: The Open Images Dataset v4: unified image classification, object detection, and visual relationship detection at scale. Int. J. Comput. Vision 128(7), 1956\u20131981 (2020)","journal-title":"Int. J. Comput. Vision"},{"key":"43_CR11","doi-asserted-by":"crossref","unstructured":"Li, Y., Song, Y., Cao, L., Tetreault, J., Goldberg, L., Jaimes, A., Luo, J.: TGIF: a new dataset and benchmark on animated GIF description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4641\u20134650 (2016)","DOI":"10.1109\/CVPR.2016.502"},{"key":"43_CR12","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"43_CR13","doi-asserted-by":"crossref","unstructured":"Loko\u010d, J., et\u00a0al.: Interactive video retrieval in the age of effective joint embedding deep models: lessons from the 11th VBS. Multimedia Syst. 1\u201324 (2023)","DOI":"10.1007\/s00530-023-01143-5"},{"key":"43_CR14","unstructured":"Markatopoulou, F., et\u00a0al.: ITI-CERTH participation in TRECVID 2017. In: Proceedings of TRECVID 2017 Workshop. USA (2017)"},{"key":"43_CR15","doi-asserted-by":"crossref","unstructured":"Pantelidis, N., et\u00a0al.: Verge in vbs 2024. In: International Conference on Multimedia Modeling, pp. 356\u2013363. Springer (2024)","DOI":"10.1007\/978-3-031-53302-0_32"},{"key":"43_CR16","doi-asserted-by":"crossref","unstructured":"Pittaras, N., Markatopoulou, F., Mezaris, V., Patras, I.: Comparison of fine-tuning and extension strategies for deep convolutional neural networks. In: MultiMedia Modeling: 23rd International Conference, MMM 2017, Reykjavik, Iceland, January 4-6, 2017, Proceedings, Part I 23, pp. 102\u2013114. Springer (2017)","DOI":"10.1007\/978-3-319-51811-4_9"},{"key":"43_CR17","doi-asserted-by":"crossref","unstructured":"Rossetto, L., Schuldt, H., Awad, G., Butt, A.A.: V3C\u2013A research video collection. In: MultiMedia Modeling: 25th International Conference, MMM 2019, Thessaloniki, Greece, January 8\u201311, 2019, Proceedings, Part I 25, pp. 349\u2013360. Springer (2019)","DOI":"10.1007\/978-3-030-05710-7_29"},{"key":"43_CR18","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. Int. J. Comput. Vision 115, 211\u2013252 (2015)","journal-title":"Int. J. Comput. Vision"},{"key":"43_CR19","unstructured":"Shao, S., et\u00a0al.: CrowdHuman: A Benchmark for Detecting Human in a Crowd. arXiv Preprint arXiv:1805.00123 (2018)"},{"key":"43_CR20","unstructured":"Tan, M., Le, Q.: Efficientnetv2: smaller models and faster training. In: International Conference on Machine Learning, pp. 10096\u201310106. PMLR (2021)"},{"key":"43_CR21","doi-asserted-by":"crossref","unstructured":"Truong, Q.T., et al.: Marine Video Kit: a new marine video dataset for content-based analysis and retrieval. In: International Conference on Multimedia Modeling, pp. 539\u2013550. Springer (2023)","DOI":"10.1007\/978-3-031-27077-2_42"},{"key":"43_CR22","doi-asserted-by":"crossref","unstructured":"Wang, X., Wu, J., Chen, J., Li, L., Wang, Y.F., Wang, W.Y.: Vatex: a large-scale, high-quality multilingual dataset for video-and-language research. In: Proceedings of the 2019 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 4581\u20134591 (2019)","DOI":"10.1109\/ICCV.2019.00468"},{"key":"43_CR23","doi-asserted-by":"crossref","unstructured":"Xu, J., Mei, T., Yao, T., Rui, Y.: Msr-vtt: a large video description dataset for bridging video and language. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5288\u20135296 (2016)","DOI":"10.1109\/CVPR.2016.571"},{"issue":"6","key":"43_CR24","doi-asserted-by":"publisher","first-page":"1452","DOI":"10.1109\/TPAMI.2017.2723009","volume":"40","author":"B Zhou","year":"2017","unstructured":"Zhou, B., Lapedriza, A., Khosla, A., Oliva, A., Torralba, A.: Places: a 10 Million image database for scene recognition. IEEE Trans. Pattern Anal. Mach. Intell. 40(6), 1452\u20131464 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2074-6_43","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T17:10:12Z","timestamp":1735665012000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2074-6_43"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819620739","9789819620746"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2074-6_43","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}