{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T00:20:03Z","timestamp":1759969203285,"version":"build-2065373602"},"publisher-location":"New York, NY, USA","reference-count":18,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3717653","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:20:01Z","timestamp":1748017201000},"page":"1576-1580","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Hybrid, Unified and Iterative: A Novel Framework for Text-based Person Anomaly Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0196-6083","authenticated-orcid":false,"given":"Tien-Huy","family":"Nguyen","sequence":"first","affiliation":[{"name":"University of Information Technology Vietnam National University, Ho Chi Minh, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6842-6250","authenticated-orcid":false,"given":"Huu-Loc","family":"Tran","sequence":"additional","affiliation":[{"name":"University of Information Technology Vietnam National University, Ho Chi Minh, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-2243-7428","authenticated-orcid":false,"given":"Huu-Phong","family":"Phan-Nguyen","sequence":"additional","affiliation":[{"name":"University of Information Technology Vietnam National University, Ho Chi Minh, Vietnam"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8025-2501","authenticated-orcid":false,"given":"Quang-Vinh","family":"Dinh","sequence":"additional","affiliation":[{"name":"AI VIETNAM Lab, Ninh Thuan, Vietnam"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n. d.]. AI VIETNAM - aivietnam.edu.vn. https:\/\/aivietnam.edu.vn"},{"key":"e_1_3_2_2_2_1","unstructured":"Quang-Khai Bui-Tran Duc-Huy Ha Minh-Hung Nguyen Phuc-Hung Dang Thien-An Trieu-Hoang and Tien-Huy Nguyen. [n. d.]. Enhanced Video Retrieval System: Leveraging GPT-4 for Multimodal Query Expansion and Open Image Search."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3628797.3629011"},{"key":"e_1_3_2_2_4_1","unstructured":"Junnan Li Dongxu Li Silvio Savarese and Steven Hoi. 2023. BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models. arXiv:2301.12597 [cs.CV] https:\/\/arxiv.org\/abs\/2301.12597"},{"key":"e_1_3_2_2_5_1","unstructured":"Zheng Li Lijia Si Caili Guo Yang Yang and Qiushi Cao. 2024. Data Augmentation for Text-based Person Retrieval Using Large Language Models. arXiv:2405.11971 [cs.CV] https:\/\/arxiv.org\/abs\/2405.11971"},{"key":"e_1_3_2_2_6_1","unstructured":"Tien-Huy Nguyen Hoang-Long Nguyen-Huu Thien-Doanh Le Huu-Loc Tran Quoc-Khanh Le-Tran Hoang-Bach Ngo Minh-Hung An and Quang-Vinh Dinh. 2023. Multimodal Fusion in NewsImages 2023: Evaluating Translators Keyphrase Extraction and CLIP Pre-Training.. In MediaEval."},{"key":"e_1_3_2_2_7_1","unstructured":"Tien-Huy Nguyen Quang-Khai Tran and Anh-Tuan Quang-Hoang. 2024. Improving Generalization in Visual Reasoning via Self-Ensemble. arXiv:2410.20883 [cs.CV] https:\/\/arxiv.org\/abs\/2410.20883"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","unstructured":"Tho-Quang Nguyen Huu-Loc Tran Tuan-Khoa Tran Huu-Phong Phan-Nguyen and Tien-Huy Nguyen. 2024. FA-YOLOv9: Improved YOLOv9 Based on Feature Attention Block. 1--6. doi:10.1109\/MAPR63514.2024.10661057","DOI":"10.1109\/MAPR63514.2024.10661057"},{"key":"e_1_3_2_2_9_1","unstructured":"Hoang-Long Nguyen-Huu Tran Thi Cam Giang Cam-Nguyen Tran-Nhu Phuoc Phan Hoang and Tien-Huy Nguyen Phat Huu and. [n. d.]. AViSearch: A Multimodal Video Event Retrieval System via Query Enhancement and Optimized Keyframes."},{"key":"e_1_3_2_2_10_1","volume-title":"Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever.","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs.CV] https:\/\/arxiv.org\/ abs\/2103.00020"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Koustuv Sinha Robin Jia Dieuwke Hupkes Joelle Pineau Adina Williams and Douwe Kiela. 2021. Masked Language Modeling and the Distributional Hypothesis: Order Word Matters Pre-training for Little. arXiv:2104.06644 [cs.CL] https:\/\/arxiv.org\/abs\/2104.06644","DOI":"10.18653\/v1\/2021.emnlp-main.230"},{"key":"e_1_3_2_2_12_1","volume-title":"Saksham Singhal, Subhojit Som, and Furu Wei.","author":"Wang Wenhui","year":"2022","unstructured":"Wenhui Wang, Hangbo Bao, Li Dong, Johan Bjorck, Zhiliang Peng, Qiang Liu, Kriti Aggarwal, Owais Khan Mohammed, Saksham Singhal, Subhojit Som, and Furu Wei. 2022. Image as a Foreign Language: BEiT Pretraining for All Vision and Vision-Language Tasks. arXiv:2208.10442 [cs.CV] https:\/\/arxiv.org\/abs\/2208. 10442"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01857"},{"key":"e_1_3_2_2_15_1","volume-title":"Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search. arXiv:2411.17776 [cs.CV] https:\/\/arxiv.org\/abs\/2411.17776","author":"Yang Shuyu","year":"2024","unstructured":"Shuyu Yang, Yaxiong Wang, Li Zhu, and Zhedong Zheng. 2024. Beyond Walking: A Large-Scale Image-Text Benchmark for Text-based Person Anomaly Search. arXiv:2411.17776 [cs.CV] https:\/\/arxiv.org\/abs\/2411.17776"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Shuyu Yang Yinan Zhou Yaxiong Wang Yujiao Wu Li Zhu and Zhedong Zheng. 2023. Towards Unified Text-based Person Retrieval: A Large-scale Multi- Attribute and Language Search Benchmark. arXiv:2306.02898 [cs.CV] https: \/\/arxiv.org\/abs\/2306.02898","DOI":"10.1145\/3581783.3611709"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383184"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3717653","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3717653","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T03:09:19Z","timestamp":1759892959000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3717653"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":18,"alternative-id":["10.1145\/3701716.3717653","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3717653","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}