{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,22]],"date-time":"2025-11-22T11:32:23Z","timestamp":1763811143158,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592239","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"565-570","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Video Retrieval for Everyday Scenes With Common Objects"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5608-9089","authenticated-orcid":false,"given":"Arun","family":"Zachariah","sequence":"first","affiliation":[{"name":"University of Missouri, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1859-0438","authenticated-orcid":false,"given":"Praveen","family":"Rao","sequence":"additional","affiliation":[{"name":"University of Missouri, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Lightweight and High-Performance Data Processing. Retrieved","author":"Framework X","year":"2022","unstructured":"2007. BaseX | The XML Framework: Lightweight and High-Performance Data Processing. Retrieved July 1, 2022 from https:\/\/basex.org"},{"key":"e_1_3_2_1_2_1","unstructured":"2014. PySceneDetect. Retrieved July 1 2022 from http:\/\/scenedetect.com\/en\/latest\/"},{"key":"e_1_3_2_1_3_1","first-page":"13","article-title":"Content Based Video Retrieval Systems-Methods, Techniques, Trends and Challenges","volume":"112","author":"Ansari Aasif","year":"2015","unstructured":"Aasif Ansari and Muzammil\u00a0H Mohammed. 2015. Content Based Video Retrieval Systems-Methods, Techniques, Trends and Challenges. International Journal of Computer Applications 112, 7 (2015), 13\u201322.","journal-title":"International Journal of Computer Applications"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00814"},{"key":"e_1_3_2_1_6_1","volume-title":"TARN: Temporal Attentive Relation Network for Few-Shot and Zero-Shot Action Recognition. In British Machine Vision Conference. 1\u201314","author":"Bishay Mina","year":"2019","unstructured":"Mina Bishay, Georgios Zoumpourlis, and Ioannis Patras. 2019. TARN: Temporal Attentive Relation Network for Few-Shot and Zero-Shot Action Recognition. In British Machine Vision Conference. 1\u201314."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072484"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2393393"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2391674"},{"key":"e_1_3_2_1_10_1","volume-title":"The Design and Operation of CloudLab. In 2019 USENIX Annual Technical Conference. 1\u201314","author":"Duplyakin Dmitry","year":"2019","unstructured":"Dmitry Duplyakin, Robert Ricci, Aleksander Maricq, Gary Wong, Jonathon Duerig, Eric Eide, Leigh Stoller, Mike Hibler, David Johnson, Kirk Webb, Aditya Akella, Kuangching Wang, Glenn Ricart, Larry Landweber, Chip Elliott, Michael Zink, Emmanuel Cecchet, Snigdhaswin Kar, and Prabodh Mishra. 2019. The Design and Operation of CloudLab. In 2019 USENIX Annual Technical Conference. 1\u201314."},{"key":"e_1_3_2_1_11_1","volume-title":"Video Re-localization. In Proceedings of the European Conference on Computer Vision (ECCV). 1\u201316","author":"Feng Yang","year":"2018","unstructured":"Yang Feng, Lin Ma, Wei Liu, Tong Zhang, and Jiebo Luo. 2018. Video Re-localization. In Proceedings of the European Conference on Computer Vision (ECCV). 1\u201316."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.227"},{"key":"e_1_3_2_1_13_1","first-page":"241","article-title":"Deep Image Retrieval","volume":"2016","author":"Gordo Albert","year":"2016","unstructured":"Albert Gordo, Jon Almaz\u00e1n, Jerome Revaud, and Diane Larlus. 2016. Deep Image Retrieval: Learning Global Representations for Image Search. In Computer Vision - ECCV 2016. 241\u2013257.","journal-title":"Learning Global Representations for Image Search. In Computer Vision - ECCV"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-03555-5_10"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00186"},{"key":"e_1_3_2_1_16_1","first-page":"312","article-title":"Memory-Augmented Dense Predictive Coding for Video Representation Learning","volume":"2020","author":"Han Tengda","year":"2020","unstructured":"Tengda Han, Weidi Xie, and Andrew Zisserman. 2020. Memory-Augmented Dense Predictive Coding for Video Representation Learning. In Computer Vision - ECCV 2020. 312\u2013329.","journal-title":"Computer Vision - ECCV"},{"key":"e_1_3_2_1_17_1","volume-title":"Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_18_1","volume-title":"VCDB: A Large-Scale Database for Partial Copy Detection in Videos. In Computer Vision \u2013 ECCV","author":"Jiang Yu-Gang","year":"2014","unstructured":"Yu-Gang Jiang, Yudong Jiang, and Jiajun Wang. 2014. VCDB: A Large-Scale Database for Partial Copy Detection in Videos. In Computer Vision \u2013 ECCV 2014, David Fleet, Tomas Pajdla, Bernt Schiele, and Tinne Tuytelaars (Eds.). Springer International Publishing, Cham, 357\u2013371."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2016.2530714"},{"key":"e_1_3_2_1_20_1","volume-title":"Computational Linguistics, and Speech Recognition","author":"Jurafsky Daniel","unstructured":"Daniel Jurafsky and James\u00a0H. Martin. 2009. Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition (2nd ed.). Prentice Hall, USA.","edition":"2"},{"key":"e_1_3_2_1_21_1","volume-title":"Cross-Dimensional Weighting for Aggregated Deep Convolutional Features. In Computer Vision - ECCV 2016 Workshops. 685\u2013701","author":"Kalantidis Yannis","year":"2016","unstructured":"Yannis Kalantidis, Clayton Mellina, and Simon Osindero. 2016. Cross-Dimensional Weighting for Aggregated Deep Convolutional Features. In Computer Vision - ECCV 2016 Workshops. 685\u2013701."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00645"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.49"},{"key":"e_1_3_2_1_24_1","volume-title":"DnS: Distill-and-Select for Efficient and Accurate Video Indexing and Retrieval. arXiv preprint arXiv:2106.13266","author":"Kordopatis-Zilos Giorgos","year":"2021","unstructured":"Giorgos Kordopatis-Zilos, Christos Tzelepis, Symeon Papadopoulos, Ioannis Kompatsiaris, and Ioannis Patras. 2021. DnS: Distill-and-Select for Efficient and Accurate Video Indexing and Retrieval. arXiv preprint arXiv:2106.13266 (2021)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00358"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-37731-1_61"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2645404"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-016-4176-6"},{"key":"e_1_3_2_1_29_1","volume-title":"Clipcap: Clip Prefix for Image Captioning. arXiv preprint arXiv:2111.09734","author":"Mokady Ron","year":"2021","unstructured":"Ron Mokady, Amir Hertz, and Amit\u00a0H Bermano. 2021. Clipcap: Clip Prefix for Image Captioning. arXiv preprint arXiv:2111.09734 (2021)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.374"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806228"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.318"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2016.56"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00331"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00331"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1238"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072354"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/2072298.2072354"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/1631272.1631295"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2587640"},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of International Conference on Pattern Recognition (ICPR). 5360\u20135367","author":"Wang Kuan-Hsun","year":"2020","unstructured":"Kuan-Hsun Wang, Chia\u00a0Chun Cheng, Yi-Ling Chen, Yale Song, and Shang-Hong Lai. 2020. Attention-Based Deep Metric Learning for Near-Duplicate Video Retrieval. In Proc. of International Conference on Pattern Recognition (ICPR). 5360\u20135367."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/1291233.1291280"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/1291233.1291280"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.571"},{"key":"e_1_3_2_1_45_1","volume-title":"Proc. of the 32nd International Conference on Machine Learning. 2048\u20132057","author":"Xu Kelvin","year":"2015","unstructured":"Kelvin Xu, Jimmy Ba, Ryan Kiros, Kyunghyun Cho, Aaron Courville, Ruslan Salakhudinov, Rich Zemel, and Yoshua Bengio. 2015. Show, Attend and Tell: Neural Image Caption Generation with Visual Attention. In Proc. of the 32nd International Conference on Machine Learning. 2048\u20132057."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-018-5862-3"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.282"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00315"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372278.3390682"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3444685.3446253"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592239","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592239","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:22Z","timestamp":1750182682000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592239"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":49,"alternative-id":["10.1145\/3591106.3592239","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592239","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}