{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T21:17:40Z","timestamp":1780607860024,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62372339, 62476021, 72434005, 62371350, 62372336"],"award-info":[{"award-number":["62372339, 62476021, 72434005, 62371350, 62372336"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Key Science and Technology Research Project of Xinjiang Production and Construction Corps in 2025"},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["2042023kf1033"],"award-info":[{"award-number":["2042023kf1033"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Ministry of Education Industry-University Cooperative Education Project","award":["240700006245501"],"award-info":[{"award-number":["240700006245501"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755325","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:15Z","timestamp":1761375255000},"page":"4166-4174","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Quantum Interference-Inspired Who-What-Where Composite-Semantics Instance Search for Story Videos"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-9848-7376","authenticated-orcid":false,"given":"Zijun","family":"Xu","sequence":"first","affiliation":[{"name":"School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6682-7867","authenticated-orcid":false,"given":"Jiahao","family":"Guo","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1161-8995","authenticated-orcid":false,"given":"Chunjie","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Beijing Jiaotong University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9796-488X","authenticated-orcid":false,"given":"Zhongyuan","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China, National Engineering Research Center for Multimedia Software, Wuhan, China, and Hubei Key Laboratory of Multimedia and Network Communication Engineering, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4526-6297","authenticated-orcid":false,"given":"Chunxia","family":"Xiao","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China, National Engineering Research Center for Multimedia Software, Wuhan, China, and Hubei Key Laboratory of Multimedia and Network Communication Engineering, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8287-8655","authenticated-orcid":false,"given":"Chao","family":"Liang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China, National Engineering Research Center for Multimedia Software, Wuhan, China, and Hubei Key Laboratory of Multimedia and Network Communication Engineering, Wuhan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Probability and measure theory","author":"Ash Robert B","unstructured":"Robert B Ash and Catherine A Dol\u00e9ans-Dade. 2000. Probability and measure theory. Academic press."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-59569-6_49"},{"key":"e_1_3_2_1_3_1","unstructured":"JC de Borda. 1781. M'emoire sur les' elections au scrutin. Histoire de l'Acad'emie Royale des Sciences (1781)."},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the learning to rank Challenge. PMLR, 25-35","author":"Burges Christopher","year":"2011","unstructured":"Christopher Burges, Krysta Svore, Paul Bennett, Andrzej Pastusiak, and Qiang Wu. 2011. Learning to rank using an ensemble of lambda-gradient models. In Proceedings of the learning to rank Challenge. PMLR, 25-35."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.122"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01231"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/1571941.1572114"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_1_9_1","volume-title":"Retinaface: Single-stage dense face localisation in the wild. arXiv preprint arXiv:1905.00641","author":"Deng Jiankang","year":"2019","unstructured":"Jiankang Deng, Jia Guo, Yuxiang Zhou, Jinke Yu, Irene Kotsia, and Stefanos Zafeiriou. 2019b. Retinaface: Single-stage dense face localisation in the wild. arXiv preprint arXiv:1905.00641 (2019)."},{"key":"e_1_3_2_1_10_1","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171-4186."},{"key":"e_1_3_2_1_11_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/872757.872795"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1103\/RevModPhys.20.367"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-45442-5_17"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/239"},{"key":"e_1_3_2_1_16_1","volume-title":"What and Where: Composite-semantic Instance Search for Story Videos. In 2023 IEEE International Conference on Multimedia and Expo (ICME). IEEE Computer Society, 858-863","author":"Guo Jiahao","year":"2023","unstructured":"Jiahao Guo, Chao Liang, and Zhongyuan Wang. 2023. Who, What and Where: Composite-semantic Instance Search for Story Videos. In 2023 IEEE International Conference on Multimedia and Expo (ICME). IEEE Computer Society, 858-863."},{"key":"e_1_3_2_1_17_1","volume-title":"What and Where: Composite-Semantics Instance Search for Story Videos","author":"Guo Jiahao","year":"2025","unstructured":"Jiahao Guo, Ankang Lu, Zhengqian Wu, Zhongyuan Wang, and Chao Liang. 2025. Who, What and Where: Composite-Semantics Instance Search for Story Videos. IEEE Transactions on Image Processing (2025)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_6"},{"key":"e_1_3_2_1_19_1","unstructured":"Martin H\u00f6ffernig and Werner Bailer. 2016. JOANNEUM RESEARCH at TRECVID 2016 Instance Search Task.. In TRECVID."},{"key":"e_1_3_2_1_20_1","unstructured":"Herv\u00e9 Le Borgne. 2017. IRIM at TRECVID 2017: Instance Search. In TRECVID."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00490"},{"key":"e_1_3_2_1_22_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023b. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_23_1","volume-title":"International conference on machine learning. PMLR, 12888-12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022b. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning. PMLR, 12888-12900."},{"key":"e_1_3_2_1_24_1","volume-title":"Align before fuse: Vision and language representation learning with momentum distillation. Advances in neural information processing systems","author":"Li Junnan","year":"2021","unstructured":"Junnan Li, Ramprasaath Selvaraju, Akhilesh Gotmare, Shafiq Joty, Caiming Xiong, and Steven Chu Hong Hoi. 2021b. Align before fuse: Vision and language representation learning with momentum distillation. Advances in neural information processing systems, Vol. 34 (2021), 9694-9705."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2020.08.006"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612838"},{"key":"e_1_3_2_1_27_1","unstructured":"Ya Li Guanyu Chen Xiangqian Cheng Chong Chen Shaoqiang Xu Xinyu Li Xuanlu Xiang Yanyun Zhao Zhicheng Zhao and Fei Su. 2019. BUPT-MCPRL at TRECVID 2019: ActEV and INS.. In TRECVID."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00056"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2023.3279145"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107256"},{"key":"e_1_3_2_1_31_1","volume-title":"Noel E O'Connor, Jiang Zhou, Lucas Azevedo, Tobias Daudert, Brian Davis, et al.","author":"Marsden Mark","year":"2016","unstructured":"Mark Marsden, Eva Mohedano, Kevin McGuinness, Andrea Calafell, Xavier Gir\u00f3-i Nieto, Noel E O'Connor, Jiang Zhou, Lucas Azevedo, Tobias Daudert, Brian Davis, et al., 2016. Dublin City University and partners' participation in the INS and VTT tracks at TRECVid 2016. In TRECVID."},{"key":"e_1_3_2_1_32_1","volume-title":"Story: style, structure, substance, and the principles of screenwriting","author":"McKee Robert","unstructured":"Robert McKee. 1997. Story: style, structure, substance, and the principles of screenwriting. Harper Collins."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.omega.2020.102254"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3617892"},{"key":"e_1_3_2_1_35_1","first-page":"815","volume-title":"Proceedings of the 20th International Conference on Natural Language Processing (ICON), Jyoti D. Pawar and Sobha Lalitha Devi (Eds.). NLP Association of India (NLPAI)","author":"Phukan Arpan","year":"2023","unstructured":"Arpan Phukan and Asif Ekbal. 2023. QeMMA: Quantum-Enhanced Multi-Modal Sentiment Analysis. In Proceedings of the 20th International Conference on Natural Language Processing (ICON), Jyoti D. Pawar and Sobha Lalitha Devi (Eds.). NLP Association of India (NLPAI), Goa University, Goa, India, 815-821. https:\/\/aclanthology.org\/2023.icon-1.84\/"},{"key":"e_1_3_2_1_36_1","volume-title":"Anas Anwarul Haq Khan, and Asif Ekbal","author":"Phukan Arpan","year":"2024","unstructured":"Arpan Phukan, Anas Anwarul Haq Khan, and Asif Ekbal. 2024a. QuMIN: quantum multi-modal data fusion for humor detection. Multimedia Tools and Applications (2024), 1-18."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2024.3388016"},{"key":"e_1_3_2_1_38_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1177\/0192512102023004002"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102085"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i8.32920"},{"key":"e_1_3_2_1_42_1","volume-title":"Text Proxy: Decomposing Retrieval from a 1-to-N Relationship into N 1-to-1 Relationships for Text-Video Retrieval. arXiv preprint arXiv:2410.06618","author":"Xiao Jian","year":"2024","unstructured":"Jian Xiao, Zhenzhen Hu, Jia Li, and Richang Hong. 2024. Text Proxy: Decomposing Retrieval from a 1-to-N Relationship into N 1-to-1 Relationships for Text-Video Retrieval. arXiv preprint arXiv:2410.06618 (2024)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1080\/01605682.2019.1657365"},{"key":"e_1_3_2_1_44_1","volume-title":"Quantum algorithms for compositional natural language processing. arXiv preprint arXiv:1608.01406","author":"Zeng William","year":"2016","unstructured":"William Zeng and Bob Coecke. 2016. Quantum algorithms for compositional natural language processing. arXiv preprint arXiv:1608.01406 (2016)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3050058"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tcs.2018.04.029"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755325","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:04:46Z","timestamp":1765339486000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755325"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":46,"alternative-id":["10.1145\/3746027.3755325","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755325","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}