{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,7]],"date-time":"2026-02-07T10:51:09Z","timestamp":1770461469129,"version":"3.49.0"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T00:00:00Z","timestamp":1643155200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T00:00:00Z","timestamp":1643155200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001824","name":"Czech Science Foundation","doi-asserted-by":"crossref","award":["19-22071Y"],"award-info":[{"award-number":["19-22071Y"]}],"id":[{"id":"10.13039\/501100001824","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100001824","name":"Czech Science Foundation","doi-asserted-by":"crossref","award":["19-22071Y"],"award-info":[{"award-number":["19-22071Y"]}],"id":[{"id":"10.13039\/501100001824","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100010669","name":"H2020 LEIT Information and Communication Technologies","doi-asserted-by":"publisher","award":["825079"],"award-info":[{"award-number":["825079"]}],"id":[{"id":"10.13039\/100010669","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010669","name":"H2020 LEIT Information and Communication Technologies","doi-asserted-by":"publisher","award":["951911"],"award-info":[{"award-number":["951911"]}],"id":[{"id":"10.13039\/100010669","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010669","name":"H2020 LEIT Information and Communication Technologies","doi-asserted-by":"publisher","award":["951911"],"award-info":[{"award-number":["951911"]}],"id":[{"id":"10.13039\/100010669","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002428","name":"Austrian Science Fund","doi-asserted-by":"publisher","award":["P 32010-N38"],"award-info":[{"award-number":["P 32010-N38"]}],"id":[{"id":"10.13039\/501100002428","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002428","name":"Austrian Science Fund","doi-asserted-by":"publisher","award":["P 32010-N38"],"award-info":[{"award-number":["P 32010-N38"]}],"id":[{"id":"10.13039\/501100002428","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001711","name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","doi-asserted-by":"publisher","award":["CRSII5_193788"],"award-info":[{"award-number":["CRSII5_193788"]}],"id":[{"id":"10.13039\/501100001711","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001602","name":"Science Foundation Ireland","doi-asserted-by":"publisher","award":["18\/CRT\/6223"],"award-info":[{"award-number":["18\/CRT\/6223"]}],"id":[{"id":"10.13039\/501100001602","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001602","name":"Science Foundation Ireland","doi-asserted-by":"publisher","award":["18\/CRT\/6224"],"award-info":[{"award-number":["18\/CRT\/6224"]}],"id":[{"id":"10.13039\/501100001602","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001602","name":"Science Foundation Ireland","doi-asserted-by":"publisher","award":["SFI\/12\/RC\/2289_2"],"award-info":[{"award-number":["SFI\/12\/RC\/2289_2"]}],"id":[{"id":"10.13039\/501100001602","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001602","name":"Science Foundation Ireland","doi-asserted-by":"publisher","award":["SFI\/13\/RC\/2106"],"award-info":[{"award-number":["SFI\/13\/RC\/2106"]}],"id":[{"id":"10.13039\/501100001602","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001711","name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","doi-asserted-by":"publisher","award":["CRSII5_193788"],"award-info":[{"award-number":["CRSII5_193788"]}],"id":[{"id":"10.13039\/501100001711","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2022,3]]},"DOI":"10.1007\/s13735-021-00225-2","type":"journal-article","created":{"date-parts":[[2022,1,26]],"date-time":"2022-01-26T17:02:26Z","timestamp":1643216546000},"page":"1-18","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":51,"title":["Interactive video retrieval evaluation at a distance: comparing sixteen interactive video search systems in a remote setting at the 10th Video Browser Showdown"],"prefix":"10.1007","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5386-330X","authenticated-orcid":false,"given":"Silvan","family":"Heller","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6750-5500","authenticated-orcid":false,"given":"Viktor","family":"Gsteiger","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2442-4900","authenticated-orcid":false,"given":"Werner","family":"Bailer","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2903-3968","authenticated-orcid":false,"given":"Cathal","family":"Gurrin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0889-3491","authenticated-orcid":false,"given":"Bj\u00f6rn \u00de\u00f3r","family":"J\u00f3nsson","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3558-4144","authenticated-orcid":false,"given":"Jakub","family":"Loko\u010d","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9535-966X","authenticated-orcid":false,"given":"Andreas","family":"Leibetseder","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2636-0915","authenticated-orcid":false,"given":"Franti\u0161ek","family":"Mejzl\u00edk","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8082-4509","authenticated-orcid":false,"given":"Ladislav","family":"Pe\u0161ka","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5389-9465","authenticated-orcid":false,"given":"Luca","family":"Rossetto","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3548-0537","authenticated-orcid":false,"given":"Konstantin","family":"Schall","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9218-1704","authenticated-orcid":false,"given":"Klaus","family":"Schoeffmann","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9865-6371","authenticated-orcid":false,"given":"Heiko","family":"Schuldt","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3396-1516","authenticated-orcid":false,"given":"Florian","family":"Spiess","sequence":"additional","affiliation":[]},{"given":"Ly-Duyen","family":"Tran","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7182-7038","authenticated-orcid":false,"given":"Lucia","family":"Vadicamo","sequence":"additional","affiliation":[]},{"given":"Patrik","family":"Vesel\u00fd","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2505-9178","authenticated-orcid":false,"given":"Stefanos","family":"Vrochidis","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4074-3442","authenticated-orcid":false,"given":"Jiaxin","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,26]]},"reference":[{"key":"225_CR1","doi-asserted-by":"publisher","unstructured":"Amato G, Bolettieri P, Carrara F, Debole F, Falchi F, Gennaro C, Vadicamo L, Vairo, C (2021) The VISIONE video search system: exploiting off-the-shelf text search engines for large-scale video retrieval. J Imag 7(5). https:\/\/doi.org\/10.3390\/jimaging7050076","DOI":"10.3390\/jimaging7050076"},{"key":"225_CR2","doi-asserted-by":"publisher","unstructured":"Amato G, Bolettieri P, Falchi F, Gennaro C, Messina N, Vadicamo L, Vairo C (2021) VISIONE at video browser showdown 2021. In: International conference on multimedia modeling. Springer, pp 473\u2013478. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_47","DOI":"10.1007\/978-3-030-67835-7_47"},{"key":"225_CR3","doi-asserted-by":"publisher","unstructured":"Amato G, Falchi F, Gennaro C, Rabitti F (2017) Searching and annotating 100M images with yfcc100m-hnfc6 and mi-file. In: Workshop on content-based multimedia indexing. ACM, pp 26:1\u201326:4. https:\/\/doi.org\/10.1145\/3095713.3095740","DOI":"10.1145\/3095713.3095740"},{"key":"225_CR4","doi-asserted-by":"publisher","unstructured":"Andreadis S, Moumtzidou A, Gkountakos K, Pantelidis N, Apostolidis K, Galanopoulos D, Gialampoukidis I, Vrochidis, S, Mezaris V, Kompatsiaris I (2021) VERGE in vbs 2021. In: International conference on multimedia modeling. Springer, pp. 398\u2013404. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_35","DOI":"10.1007\/978-3-030-67835-7_35"},{"issue":"10","key":"225_CR5","doi-asserted-by":"publisher","first-page":"2582","DOI":"10.1364\/JOSAA.25.002582","volume":"25","author":"R Benavente","year":"2008","unstructured":"Benavente R, Vanrell M, Baldrich R (2008) Parametric fuzzy sets for automatic color naming. JOSA A 25(10):2582\u20132593. https:\/\/doi.org\/10.1364\/JOSAA.25.002582","journal-title":"JOSA A"},{"key":"225_CR6","doi-asserted-by":"publisher","unstructured":"Berns F, Rossetto L, Schoeffmann K, Beecks C, Awad G (2019) V3C1 dataset: an evaluation of content characteristics. In: International conference on multimedia retrieval. ACM, pp 334\u2013338. https:\/\/doi.org\/10.1145\/3323873.3325051","DOI":"10.1145\/3323873.3325051"},{"key":"225_CR7","unstructured":"Bochkovskiy A, Wang CY, Liao HYM (2020) Yolov4: optimal speed and accuracy of object detection. CoRR. arXiv:2004.10934"},{"key":"225_CR8","doi-asserted-by":"publisher","unstructured":"Chen K, Pang J, Wang J, Xiong Y, Li X, Sun S, Feng W, Liu Z, Shi J, Ouyang W, Loy CC, Lin D (2019) Hybrid task cascade for instance segmentation. In: Conference on computer vision and pattern recognition, pp. 4969\u20134978. https:\/\/doi.org\/10.1109\/CVPR.2019.00511","DOI":"10.1109\/CVPR.2019.00511"},{"key":"225_CR9","doi-asserted-by":"publisher","unstructured":"Cox I, Miller M, Omohundro S, Yianilos P (1996) Pichunter: Bayesian relevance feedback for image retrieval. In: International conference on pattern recognition, vol\u00a03. IEEE, pp 361\u2013369. https:\/\/doi.org\/10.1109\/ICPR.1996.546971","DOI":"10.1109\/ICPR.1996.546971"},{"key":"225_CR10","doi-asserted-by":"crossref","unstructured":"Deng D, Liu H, Li X, Cai D (2018) Pixellink: detecting scene text via instance segmentation. In: Proceedings of the thirty-second AAAI conference on artificial intelligence, (AAAI-18), the 30th innovative applications of artificial intelligence (IAAI-18), and the 8th AAAI symposium on educational advances in artificial intelligence (EAAI-18). AAAI, pp 6773\u20136780","DOI":"10.1609\/aaai.v32i1.12269"},{"key":"225_CR11","doi-asserted-by":"publisher","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: Conference on computer vision and pattern recognition. IEEE, pp 248\u2013255. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"225_CR12","doi-asserted-by":"publisher","unstructured":"Galanopoulos D, Mezaris V (2020) Attention mechanisms, signal encodings and fusion strategies for improved ad-hoc video search with dual encoding networks. In: International conference on multimedia retrieval. ACM, pp 336\u2013340. https:\/\/doi.org\/10.1145\/3372278.3390737","DOI":"10.1145\/3372278.3390737"},{"key":"225_CR13","doi-asserted-by":"publisher","unstructured":"Gurrin C, J\u00f3nsson B\u00de, Sch\u00f6ffmann K, Dang-Nguyen D, Lokoc J, Tran M, H\u00fcrst W, Rossetto L, Healy G (2021) Introduction to the fourth annual lifelog search challenge, lsc\u201921. In: International conference on multimedia retrieval. ACM, pp 690\u2013691. https:\/\/doi.org\/10.1145\/3460426.3470945","DOI":"10.1145\/3460426.3470945"},{"key":"225_CR14","doi-asserted-by":"publisher","unstructured":"Hara K, Kataoka H, Satoh Y (2018) Can spatiotemporal 3d cnns retrace the history of 2d cnns and imagenet? https:\/\/doi.org\/10.1109\/CVPR.2018.00685","DOI":"10.1109\/CVPR.2018.00685"},{"key":"225_CR15","doi-asserted-by":"publisher","unstructured":"Gasser R, Rossetto L, Heller S, Schuldt H (2020) Cottontail DB: an open source database system for multimedia retrieval and analysis. In: The 28th ACM international conference on multimedia, virtual event. Seattle, WA, pp. 4465\u20134468. https:\/\/doi.org\/10.1145\/3394171.3414538","DOI":"10.1145\/3394171.3414538"},{"key":"225_CR16","doi-asserted-by":"publisher","unstructured":"Heller S, Gasser R, Illi C, Pasquinelli M, Sauter L, Spiess F, Schuldt H (2021) Towards explainable interactive multi-modal video retrieval with vitrivr. In: International conference on multimedia modeling. Springer, pp. 435\u2013440. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_41","DOI":"10.1007\/978-3-030-67835-7_41"},{"key":"225_CR17","doi-asserted-by":"publisher","unstructured":"Heller S, Sauter L, Schuldt H, Rossetto L (2020) Multi-stage queries and temporal scoring in vitrivr. In: International conference on multimedia & expo workshops (ICMEW). IEEE, pp 1\u20135. https:\/\/doi.org\/10.1109\/ICMEW46912.2020.9105954","DOI":"10.1109\/ICMEW46912.2020.9105954"},{"key":"225_CR18","doi-asserted-by":"publisher","unstructured":"Hezel N, Schall K, Jung K, Barthel KU (2021) Video search with sub-image keyword transfer using existing image archives. In: International conference on multimedia modeling. Springer, pp 484\u2013489. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_49","DOI":"10.1007\/978-3-030-67835-7_49"},{"key":"225_CR19","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: Efficient convolutional neural networks for mobile vision applications. CoRR. arXiv:1704.04861"},{"issue":"1","key":"225_CR20","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TPAMI.2010.57","volume":"33","author":"H J\u00e9gou","year":"2010","unstructured":"J\u00e9gou H, Douze M, Schmid C (2010) Product quantization for nearest neighbor search. IEEE Trans Pattern Anal Mach Intell 33(1):117\u2013128. https:\/\/doi.org\/10.1109\/TPAMI.2010.57","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"225_CR21","doi-asserted-by":"publisher","first-page":"2340","DOI":"10.1109\/TIP.2021.3051462","volume":"30","author":"Y Jiang","year":"2021","unstructured":"Jiang Y, Gong X, Liu D, Cheng Y, Fang C, Shen X, Yang J, Zhou P, Wang Z (2021) Enlightengan: deep light enhancement without paired supervision. IEEE Trans Image Process 30:2340\u20132349. https:\/\/doi.org\/10.1109\/TIP.2021.3051462","journal-title":"IEEE Trans Image Process"},{"key":"225_CR22","doi-asserted-by":"publisher","unstructured":"Karisch C, Leibetseder A, Schoeffmann K (2021) Noshot video browser at vbs2021. In: International conference on multimedia modeling. Springer, pp. 405\u2013409. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_36","DOI":"10.1007\/978-3-030-67835-7_36"},{"key":"225_CR23","unstructured":"Kay W, Carreira J, Simonyan K, Zhang B, Hillier C, Vijayanarasimhan S, Viola F, Green T, Back T, Natsev P, Suleyman M, Zisserman A (2017) The kinetics human action video dataset. CoRR. arXiv:1705.06950"},{"key":"225_CR24","doi-asserted-by":"publisher","unstructured":"Khan OS, J\u00f3nsson B\u00de, Larsen M, Poulsen L, Koelma DC, Rudinac S, Worring M, Zah\u00e1lka J (2021) Exquisitor at the video browser showdown 2021: relationships between semantic classifiers. In: International conference on multimedia modeling. Springer, pp 410\u2013416. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_37","DOI":"10.1007\/978-3-030-67835-7_37"},{"key":"225_CR25","doi-asserted-by":"publisher","unstructured":"Khan OS, J\u00f3nsson B\u00de, Rudinac S, Zah\u00e1lka J, Ragnarsd\u00f3ttir H, \u00deorleiksd\u00f3ttir \u00de, Gu\u00f0mundsson G\u00de, Amsaleg L, Worring M (2020) Interactive learning for multimedia at large. In: Proceedings of the European conference on information retrieval. Springer, pp 410\u2013416. https:\/\/doi.org\/10.1007\/978-3-030-45439-5_33","DOI":"10.1007\/978-3-030-45439-5_33"},{"key":"225_CR26","doi-asserted-by":"publisher","unstructured":"Kratochv\u00edl M, Vesel\u00fd P, Mejzl\u00edk F, Loko\u010d J (2020) Som-hunter: video browsing with relevance-to-som feedback loop. In: International conference on multimedia modeling. Springer, pp. 790\u2013795. https:\/\/doi.org\/10.1007\/978-3-030-37734-2_71","DOI":"10.1007\/978-3-030-37734-2_71"},{"issue":"7","key":"225_CR27","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1007\/s11263-020-01316-z","volume":"128","author":"A Kuznetsova","year":"2018","unstructured":"Kuznetsova A, Rom H, Alldrin N, Uijlings JRR, Krasin I, Pont-Tuset J, Kamali S, Popov S, Malloci M, Duerig T, Ferrari V (2018) The open images dataset V4. Int J Comput Vis 128(7):1956\u20131981. https:\/\/doi.org\/10.1007\/s11263-020-01316-z","journal-title":"Int J Comput Vis"},{"key":"225_CR28","doi-asserted-by":"publisher","unstructured":"Lee Y, Choi H, Park S, Ro YM (2021) IVIST: interactive video search tool in VBS 2021. In: International conference on multimedia modeling. Springer, pp 423\u2013428. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_39","DOI":"10.1007\/978-3-030-67835-7_39"},{"key":"225_CR29","doi-asserted-by":"publisher","unstructured":"Leibetseder A, Schoeffmann K (2021) Less is more\u2014divexplore 5.0 at VBS 2021. In: International conference on multimedia modeling. Springer, pp 455\u2013460. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_44","DOI":"10.1007\/978-3-030-67835-7_44"},{"key":"225_CR30","doi-asserted-by":"publisher","unstructured":"Leibetseder A, Schoeffmann K (2021) lifexplore at the lifelog search challenge 2021. In: Proceedings of the fourth annual workshop on lifelog search challenge. ACM, pp 23\u201328. https:\/\/doi.org\/10.1145\/3463948.3469060","DOI":"10.1145\/3463948.3469060"},{"key":"225_CR31","doi-asserted-by":"publisher","unstructured":"Li X, Xu C, Yang G, Chen Z, Dong J (2019) W2VV++: fully deep learning for ad-hoc video search. In: International conference on multimedia. ACM, pp. 1786\u20131794. https:\/\/doi.org\/10.1145\/3343031.3350906","DOI":"10.1145\/3343031.3350906"},{"key":"225_CR32","doi-asserted-by":"publisher","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft COCO: common objects in context. In: Computer vision\u2014ECCV. Springer, pp 740\u2013755. https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"225_CR33","doi-asserted-by":"publisher","unstructured":"Loko\u010d J, B\u00e1toryov\u00e1 J, Smr\u017e D, Dobransk\u00fd M (2021) Video search with collage queries. In: International conference on multimedia modeling. Springer, pp 429\u2013434. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_40","DOI":"10.1007\/978-3-030-67835-7_40"},{"key":"225_CR34","doi-asserted-by":"publisher","unstructured":"Loko\u010d J, Sou\u010dek T, Vesel\u00fd P, Mejzl\u00edk F, Ji J, Xu C, Li X (2020) A W2VV++ case study with automated and interactive text-to-video retrieval. In: International conference on multimedia. ACM. https:\/\/doi.org\/10.1145\/3394171.3414002","DOI":"10.1145\/3394171.3414002"},{"key":"225_CR35","doi-asserted-by":"crossref","unstructured":"Loko\u010d J, Bailer W, Barthel KU, Gurrin C, Heller S, J\u00f3nsson B\u00de, Pe\u0161ka L, Rossetto L, Schoeffmann K, Vadicamo L, Vrochidis S, Wu J (2022) A task category space for user-centric comparative multimedia search evaluations. In: International conference on multimedia modeling","DOI":"10.1007\/978-3-030-98358-1_16"},{"key":"225_CR36","doi-asserted-by":"publisher","unstructured":"Loko\u010d J, Koval\u010d\u00edk G, Sou\u010dek T, Moravec J, \u010cech P (2019) A framework for effective known-item search in video. In: International conference on multimedia. ACM, pp. 1777\u20131785. https:\/\/doi.org\/10.1145\/3343031.3351046","DOI":"10.1145\/3343031.3351046"},{"key":"225_CR37","doi-asserted-by":"publisher","unstructured":"Loko\u010d J, Vesel\u00fd P, Mejzl\u00edk F, Koval\u010d\u00edk G, Sou\u010dek T, Rossetto L, Schoeffmann K, Bailer W, Gurrin C, Sauter L, Song J, Vrochidis S, Wu J, J\u00f3nsson B\u00de (2021) Is the reign of interactive search eternal? findings from the video browser showdown 2020. ACM Trans Multim Comput Commun Appl 17(3). https:\/\/doi.org\/10.1145\/3445031","DOI":"10.1145\/3445031"},{"key":"225_CR38","doi-asserted-by":"publisher","unstructured":"Markatopoulou F, Moumtzidou A, Galanopoulos D, Avgerinakis K, Andreadis S, Gialampoukidis I, Tachos S, Vrochidis S, Mezaris V, Kompatsiaris I, Patras I (2017) ITI-CERTH participation in TRECVID 2017. In: TREC video retrieval evaluation. NIST. https:\/\/doi.org\/10.5281\/zenodo.1183440","DOI":"10.5281\/zenodo.1183440"},{"key":"225_CR39","doi-asserted-by":"publisher","unstructured":"Messina N, Falchi F, Esuli A, Amato G (2020) Transformer reasoning network for image-text matching and retrieval. In: International conference on pattern recognition. IEEE. https:\/\/doi.org\/10.1109\/ICPR48806.2021.9413172","DOI":"10.1109\/ICPR48806.2021.9413172"},{"key":"225_CR40","doi-asserted-by":"publisher","unstructured":"Mettes P, Koelma DC, Snoek CGM (2020) Shuffled imagenet banks for video event detection and search. ACM Trans Multim Comput Commun Appl 16(2):44:1\u201344:21. https:\/\/doi.org\/10.1145\/3377875","DOI":"10.1145\/3377875"},{"issue":"2","key":"225_CR41","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1109\/TPAMI.2019.2901464","volume":"42","author":"M Monfort","year":"2020","unstructured":"Monfort M, Vondrick C, Oliva A, Andonian A, Zhou B, Ramakrishnan K, Bargal SA, Yan T, Brown LM, Fan Q, Gutfreund D (2020) Moments in time dataset: one million videos for event understanding. IEEE Trans Pattern Anal Mach Intell 42(2):502\u2013508. https:\/\/doi.org\/10.1109\/TPAMI.2019.2901464","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"225_CR42","doi-asserted-by":"publisher","unstructured":"Nguyen PA, Lu YJ, Zhang H, Ngo CW (2018) Enhanced VIREO KIS at VBS 2018. In: International conference on multimedia modeling. Springer, pp 407\u2013412. https:\/\/doi.org\/10.1007\/978-3-319-73600-6_42","DOI":"10.1007\/978-3-319-73600-6_42"},{"key":"225_CR43","doi-asserted-by":"publisher","unstructured":"Nguyen PA, Wu J, Ngo CW, Francis D, Huet B (2020) VIREO @ video browser showdown 2020. In: International conference on multimedia modeling. Springer, pp 772\u2013777. https:\/\/doi.org\/10.1007\/978-3-030-37734-2_68","DOI":"10.1007\/978-3-030-37734-2_68"},{"key":"225_CR44","doi-asserted-by":"publisher","unstructured":"Pe\u0161ka L, Koval\u010d\u00edk G, Sou\u010dek T, \u0160krh\u00e1k V, Loko\u010d J (2021) W2VV++ BERT model at VBS 2021. In: International conference on multimedia modeling. Springer, pp 467\u2013472. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_46","DOI":"10.1007\/978-3-030-67835-7_46"},{"key":"225_CR45","doi-asserted-by":"crossref","unstructured":"Pittaras N, Markatopoulou F, Mezaris V, Patras I (2017): Comparison of fine-tuning and extension strategies for deep convolutional neural networks. In: International conference on multimedia modeling. Springer, pp 102\u2013114. http:\/\/doi.org\/10.1007\/978-3-319-51811-4_9","DOI":"10.1007\/978-3-319-51811-4_9"},{"key":"225_CR46","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J, Krueger G, Sutskever I (2021) Learning transferable visual models from natural language supervision. CoRR. arXiv:2103.00020"},{"key":"225_CR47","doi-asserted-by":"publisher","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: better, faster, stronger. In: Conference on computer vision and pattern recognition. IEEE, pp 7263\u20137271. https:\/\/doi.org\/10.1109\/CVPR.2017.690","DOI":"10.1109\/CVPR.2017.690"},{"key":"225_CR48","unstructured":"Redmon J, Farhadi A (2018) YOLOv3 on the open images dataset. https:\/\/pjreddie.com\/darknet\/yolo\/ (Online; Accessed 22 April 2021)"},{"key":"225_CR49","doi-asserted-by":"crossref","unstructured":"Ressmann A, Schoeffmann K (2021) Ivos-the itec interactive video object search system at vbs2021. In: International conference on multimedia modeling. Springer, pp 479\u2013483","DOI":"10.1007\/978-3-030-67835-7_48"},{"key":"225_CR50","doi-asserted-by":"publisher","unstructured":"Revaud J, Almazan J, Rezende R, de Souza C (2019) Learning with average precision: training image retrieval with a listwise loss. In: International conference on computer vision. IEEE, pp 5106\u20135115. https:\/\/doi.org\/10.1109\/ICCV.2019.00521","DOI":"10.1109\/ICCV.2019.00521"},{"key":"225_CR51","doi-asserted-by":"publisher","unstructured":"Rossetto L (2018) Multi-modal video retrieval. Ph.D. thesis, University of Basel. https:\/\/doi.org\/10.5451\/unibas-006859522","DOI":"10.5451\/unibas-006859522"},{"key":"225_CR52","doi-asserted-by":"publisher","unstructured":"Rossetto L, Baumgartner M, Ashena N, Ruosch F, Pernisch R, Heitz L, Bernstein A (2021) Videograph\u2014towards using knowledge graphs for interactive video retrieval. In: International conference on multimedia modeling. Springer, pp 417\u2013422. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_38","DOI":"10.1007\/978-3-030-67835-7_38"},{"key":"225_CR53","doi-asserted-by":"publisher","DOI":"10.1109\/MMUL.2021.3066779","author":"L Rossetto","year":"2021","unstructured":"Rossetto L, Gasser R, Heller S, Parian-Scherb M, Sauter L, Spiess F, Schuldt H, Peska L, Soucek T, Kratochvil M et al (2021) On the user-centric comparative remote evaluation of interactive video search systems. IEEE Multim. https:\/\/doi.org\/10.1109\/MMUL.2021.3066779","journal-title":"IEEE Multim"},{"key":"225_CR54","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1109\/TMM.2020.2980944","volume":"23","author":"L Rossetto","year":"2020","unstructured":"Rossetto L, Gasser R, Loko\u010d J, Bailer W, Schoeffmann K, Muenzer B, Sou\u010dek T, Nguyen PA, Bolettieri P, Leibetseder A et al (2020) Interactive video retrieval in the age of deep learning-detailed evaluation of VBS 2019. IEEE Trans Multim 23:243\u2013256. https:\/\/doi.org\/10.1109\/TMM.2020.2980944","journal-title":"IEEE Trans Multim"},{"key":"225_CR55","doi-asserted-by":"publisher","unstructured":"Rossetto L, Gasser R, Sauter L, Bernstein A, Schuldt H (2021) A system for interactive multimedia retrieval evaluations. In: International conference on multimedia modeling. Springer. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_33","DOI":"10.1007\/978-3-030-67835-7_33"},{"key":"225_CR56","unstructured":"Rossetto L, Gasser R, Schuldt H (2019) Query by semantic sketch. CoRR. arXiv:1909.12526"},{"key":"225_CR57","doi-asserted-by":"publisher","unstructured":"Rossetto L, Giangreco I, Schuldt H (2014) Cineast: A multi-feature sketch-based video retrieval engine. In: International symposium on multimedia. IEEE, pp 18\u201323. https:\/\/doi.org\/10.1109\/ISM.2014.38","DOI":"10.1109\/ISM.2014.38"},{"key":"225_CR58","doi-asserted-by":"publisher","unstructured":"Rossetto L, Parian MA, Gasser R, Giangreco I, Heller S, Schuldt H (2019) Deep learning-based concept detection in vitrivr. In: International conference on multimedia modeling. Springer, pp. 616\u2013621. https:\/\/doi.org\/10.1007\/978-3-030-05716-9_55","DOI":"10.1007\/978-3-030-05716-9_55"},{"key":"225_CR59","unstructured":"Rossetto L, Schoeffmann K, Bernstein A (2021) Insights on the V3C2 dataset. CoRR arXiv:2105.01475"},{"key":"225_CR60","doi-asserted-by":"publisher","unstructured":"Rossetto L, Schuldt H, Awad G, Butt AA (2019) V3C\u2014a research video collection. In: International conference on multimedia modeling. Springer, pp 349\u2013360. https:\/\/doi.org\/10.1007\/978-3-030-05710-7_29","DOI":"10.1007\/978-3-030-05710-7_29"},{"key":"225_CR61","doi-asserted-by":"publisher","unstructured":"Schall K, Barthel KU, Hezel N, Jung K (2019) Deep aggregation of regional convolutional activations for content based image retrieval. In: International workshop on multimedia signal processing. IEEE, pp. 1\u20136. https:\/\/doi.org\/10.1109\/MMSP.2019.8901787","DOI":"10.1109\/MMSP.2019.8901787"},{"key":"225_CR62","unstructured":"Schoeffmann K (2021) Vbs 2021 overview. https:\/\/www.youtube.com\/watch?v=8Kg_5BQon9I&t=587s"},{"key":"225_CR63","doi-asserted-by":"publisher","unstructured":"Schoeffmann K (2019) Video browser showdown 2012-2019: a review. In: Conference on content-based multimedia indexing. IEEE, pp 1\u20134. https:\/\/doi.org\/10.1109\/CBMI.2019.8877397","DOI":"10.1109\/CBMI.2019.8877397"},{"issue":"9","key":"225_CR64","doi-asserted-by":"publisher","first-page":"2035","DOI":"10.1109\/TPAMI.2018.2848939","volume":"41","author":"B Shi","year":"2019","unstructured":"Shi B, Yang M, Wang X, Lyu P, Yao C, Bai X (2019) ASTER: an attentional scene text recognizer with flexible rectification. IEEE Trans Pattern Anal Mach Intell 41(9):2035\u20132048. https:\/\/doi.org\/10.1109\/TPAMI.2018.2848939","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"225_CR65","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"key":"225_CR66","doi-asserted-by":"publisher","unstructured":"Smith R (2007) An overview of the tesseract OCR engine. In: International conference on document analysis and recognition. IEEE, pp 629\u2013633. https:\/\/doi.org\/10.1109\/ICDAR.2007.4376991","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"225_CR67","unstructured":"Soucek T, Lokoc J (2020) Transnet V2: an effective deep network architecture for fast shot transition detection. CoRR arXiv:2008.04838"},{"key":"225_CR68","doi-asserted-by":"publisher","unstructured":"Spiess F, Gasser R, Heller S, Rossetto L, Sauter L, Schuldt H (2021) Competitive interactive video retrieval in virtual reality with vitrivr-vr. In: International conference on multimedia modeling. Springer, pp 441\u2013447. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_42","DOI":"10.1007\/978-3-030-67835-7_42"},{"key":"225_CR69","doi-asserted-by":"publisher","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Conference on computer vision and pattern recognition. IEEE, pp 2818\u20132826. https:\/\/doi.org\/10.1109\/CVPR.2016.308","DOI":"10.1109\/CVPR.2016.308"},{"key":"225_CR70","doi-asserted-by":"publisher","unstructured":"Tan WR, Chan CS, Aguirre HE, Tanaka K (2016) Ceci n\u2019est pas une pipe: a deep convolutional network for fine-art paintings classification. In: International conference on image processing, pp 3703\u20133707. https:\/\/doi.org\/10.1109\/ICIP.2016.7533051","DOI":"10.1109\/ICIP.2016.7533051"},{"key":"225_CR71","doi-asserted-by":"publisher","unstructured":"Tran L, Nguyen M, Nguyen T, Healy G, Caputo A, Nguyen BT, Gurrin C (2021) A VR interface for browsing visual spaces at VBS2021. In: International conference on multimedia modeling. Springer, pp 490\u2013495. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_50","DOI":"10.1007\/978-3-030-67835-7_50"},{"issue":"7","key":"225_CR72","doi-asserted-by":"publisher","first-page":"1512","DOI":"10.1109\/TIP.2009.2019809","volume":"18","author":"J Van De Weijer","year":"2009","unstructured":"Van De Weijer J, Schmid C, Verbeek J, Larlus D (2009) Learning color names for real-world applications. IEEE Trans Image Process 18(7):1512\u20131523. https:\/\/doi.org\/10.1109\/TIP.2009.2019809","journal-title":"IEEE Trans Image Process"},{"key":"225_CR73","doi-asserted-by":"publisher","unstructured":"Vesel\u00fd P, Mejzl\u00edk F, Loko\u010d J (2021) Somhunter V2 at video browser showdown 2021. In: International conference on multimedia modeling. Springer, pp 461\u2013466. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_45","DOI":"10.1007\/978-3-030-67835-7_45"},{"key":"225_CR74","doi-asserted-by":"publisher","unstructured":"Wu J, Ngo CW (2020) Interpretable embedding for ad-hoc video search. In: International conference on multimedia. ACM, pp 3357\u20143366. https:\/\/doi.org\/10.1145\/3394171.3413916","DOI":"10.1145\/3394171.3413916"},{"key":"225_CR75","doi-asserted-by":"publisher","unstructured":"Wu J, Nguyen PA, Ma Z, Ngo CW (2021) Sql-like interpretable interactive video search. In: International conference on multimedia modeling. Springer, pp 391\u2013397. https:\/\/doi.org\/10.1007\/978-3-030-67835-7_34","DOI":"10.1007\/978-3-030-67835-7_34"},{"key":"225_CR76","doi-asserted-by":"publisher","unstructured":"Xie S, Girshick RB, Doll\u00e1r P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Conference on computer vision and pattern recognition. IEEE, pp 5987\u20135995. https:\/\/doi.org\/10.1109\/CVPR.2017.634","DOI":"10.1109\/CVPR.2017.634"},{"key":"225_CR77","doi-asserted-by":"publisher","unstructured":"Ye G, Li Y, Xu H, Liu D, Chang SF (2015) Eventnet: A large scale structured concept library for complex event detection in video. In: International conference on multimedia. ACM, pp 471\u2014480. https:\/\/doi.org\/10.1145\/2733373.2806221","DOI":"10.1145\/2733373.2806221"},{"key":"225_CR78","doi-asserted-by":"crossref","unstructured":"Zhang H, Wang Y, Dayoub F, Sunderhauf N (2021) Varifocalnet: an iou-aware dense object detector. In: Conference on computer vision and pattern recognition. IEEE, pp 8514\u20138523","DOI":"10.1109\/CVPR46437.2021.00841"},{"issue":"6","key":"225_CR79","doi-asserted-by":"publisher","first-page":"1452","DOI":"10.1109\/TPAMI.2017.2723009","volume":"40","author":"B Zhou","year":"2018","unstructured":"Zhou B, Lapedriza A, Khosla A, Oliva A, Torralba A (2018) Places: a 10 million image database for scene recognition. IEEE Trans Pattern Anal Mach Intell 40(6):1452\u20131464. https:\/\/doi.org\/10.1109\/TPAMI.2017.2723009","journal-title":"IEEE Trans Pattern Anal Mach Intell"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-021-00225-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13735-021-00225-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-021-00225-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T23:24:35Z","timestamp":1744154675000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13735-021-00225-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,26]]},"references-count":79,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2022,3]]}},"alternative-id":["225"],"URL":"https:\/\/doi.org\/10.1007\/s13735-021-00225-2","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,26]]},"assertion":[{"value":"15 October 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 November 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 December 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}