{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T09:24:24Z","timestamp":1769937864847,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T00:00:00Z","timestamp":1746662400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,5,8]]},"DOI":"10.1145\/3701716.3715220","type":"proceedings-article","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T16:20:01Z","timestamp":1748017201000},"page":"601-610","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Efficient Integration of ASR with Large Language Models to Enhance Video Search at Scale"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7590-3567","authenticated-orcid":false,"given":"Qiang","family":"Zhang","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong Univerisity, Xi'an, Shaanxi, China and Bilibili, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2175-2801","authenticated-orcid":false,"given":"Fengshun","family":"Xiao","sequence":"additional","affiliation":[{"name":"Bilibili, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9884-9226","authenticated-orcid":false,"given":"Tianjiao","family":"Li","sequence":"additional","affiliation":[{"name":"Bilibili, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7291-8202","authenticated-orcid":false,"given":"Li","family":"Lin","sequence":"additional","affiliation":[{"name":"Bilibili, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8617-1477","authenticated-orcid":false,"given":"Hanyin","family":"Fang","sequence":"additional","affiliation":[{"name":"Bilibili, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-6679-1859","authenticated-orcid":false,"given":"Huyang","family":"Sun","sequence":"additional","affiliation":[{"name":"Bilibili, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8712-174X","authenticated-orcid":false,"given":"Ruoyu","family":"Liu","sequence":"additional","affiliation":[{"name":"Department of Respiratory and Critical Care Medicine, The Second Affiliated Hospital of Xi'an Jiaotong University, Xi'an, Shaanxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7173-7793","authenticated-orcid":false,"given":"Xiaoyan","family":"Zhu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, Shaanxi, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3862-6557","authenticated-orcid":false,"given":"Jiayin","family":"Wang","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Xi'an Jiaotong University, Xi'an, Shaanxi, China"}]}],"member":"320","published-online":{"date-parts":[[2025,5,23]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0306-4573(02)00021-3"},{"key":"e_1_3_2_2_2_1","volume-title":"International conference on machine learning. PMLR, 173--182","author":"Amodei Dario","year":"2016","unstructured":"Dario Amodei, Sundaram Ananthanarayanan, Rishita Anubhai, Jingliang Bai, Eric Battenberg, Carl Case, Jared Casper, Bryan Catanzaro, Qiang Cheng, Guoliang Chen, et al. 2016. Deep speech 2: End-to-end speech recognition in english and mandarin. In International conference on machine learning. PMLR, 173--182."},{"key":"e_1_3_2_2_3_1","unstructured":"BAAI. 2023. bge-base-zh-v1.5. https:\/\/huggingface.co\/BAAI\/bge-base-zh-v1.5"},{"key":"e_1_3_2_2_4_1","volume-title":"SparTerm: Learning term-based sparse representation for fast text retrieval. arXiv preprint arXiv:2010.00768","author":"Bai Yang","year":"2020","unstructured":"Yang Bai, Xiaoguang Li, Gang Wang, Chaoliang Zhang, Lifeng Shang, Jun Xu, Zhaowei Wang, Fangshan Wang, and Qun Liu. 2020. SparTerm: Learning term-based sparse representation for fast text retrieval. arXiv preprint arXiv:2010.00768 (2020)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"e_1_3_2_2_6_1","unstructured":"Bilibili. 2023. Bilibili 2023 Environmental Social and Governance Report. https:\/\/ir.bilibili.com\/media\/0ivc54ou\/bilibili-2023-environmental-social-and-governance-report.pdf"},{"key":"e_1_3_2_2_7_1","volume-title":"Sabato Marco Siniscalchi, Pin-Yu Chen, and Eng-Siong Chng.","author":"Chen Chen","year":"2024","unstructured":"Chen Chen, Yuchen Hu, Chao-Han Huck Yang, Sabato Marco Siniscalchi, Pin-Yu Chen, and Eng-Siong Chng. 2024. Hyporadise: An open baseline for generative speech recognition with large language models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.599"},{"key":"e_1_3_2_2_9_1","volume-title":"The 54th Statistical Report on the Development of Internet in China. https:\/\/www.cnnic.net.cn\/n4\/2024\/0829\/c88--11065.html","author":"CNNIC.","unstructured":"CNNIC. 2024. The 54th Statistical Report on the Development of Internet in China. https:\/\/www.cnnic.net.cn\/n4\/2024\/0829\/c88--11065.html"},{"key":"e_1_3_2_2_10_1","unstructured":"Chengyu Cui. 2020. A Python Lib for Chinese NLP Preprocessing & Parsing. https:\/\/github.com\/dongrixinyu\/JioNLP"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56063-7_24"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_2_13_1","volume-title":"SimCSE: Simple Contrastive Learning of Sentence Embeddings. In 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP","author":"Gao Tianyu","year":"2021","unstructured":"Tianyu Gao, Xingcheng Yao, and Danqi Chen. 2021. SimCSE: Simple Contrastive Learning of Sentence Embeddings. In 2021 Conference on Empirical Methods in Natural Language Processing, EMNLP 2021. Association for Computational Linguistics (ACL), 6894--6910."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00495"},{"key":"e_1_3_2_2_15_1","volume-title":"Deep Speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567","author":"Hannun A","year":"2014","unstructured":"A Hannun. 2014. Deep Speech: Scaling up end-to-end speech recognition. arXiv preprint arXiv:1412.5567 (2014)."},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers). 690--696","author":"Heafield Kenneth","year":"2013","unstructured":"Kenneth Heafield, Ivan Pouzyrevsky, Jonathan H Clark, and Philipp Koehn. 2013. Scalable modified Kneser-Ney language model estimation. In Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers). 690--696."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053606"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612161"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/104"},{"key":"e_1_3_2_2_20_1","volume-title":"Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.","author":"Karpukhin Vladimir","year":"2020","unstructured":"Vladimir Karpukhin, Barlas Oguz, Sewon Min, Patrick SH Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih. 2020. Dense Passage Retrieval for Open-Domain Question Answering.. In EMNLP (1). 6769--6781."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.161"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-368"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3350906"},{"key":"e_1_3_2_2_25_1","volume-title":"Distilling dense representations for ranking using tightly-coupled teachers. arXiv preprint arXiv:2010.11386","author":"Lin Sheng-Chieh","year":"2020","unstructured":"Sheng-Chieh Lin, Jheng-Hong Yang, and Jimmy Lin. 2020. Distilling dense representations for ranking using tightly-coupled teachers. arXiv preprint arXiv:2010.11386 (2020)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_24"},{"key":"e_1_3_2_2_27_1","volume-title":"Bilibili: Tapping Into China's AI Boom With Chinese YouTube. https:\/\/seekingalpha.com\/article\/4616306-bilibili-tapping-into-chinas-ai-boom-with-chinese-youtube","author":"LEL","year":"2023","unstructured":"LEL Investment LLC. 2023. Bilibili: Tapping Into China's AI Boom With Chinese YouTube. https:\/\/seekingalpha.com\/article\/4616306-bilibili-tapping-into-chinas-ai-boom-with-chinese-youtube"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.395"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.07.028"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475703"},{"key":"e_1_3_2_2_31_1","volume-title":"Leveraging Semantic Search and LLMs for Domain-Adaptive Information Retrieval. In International Conference on Information and Software Technologies. Springer, 148--159","author":"Maoro Falk","year":"2023","unstructured":"Falk Maoro, Benjamin Vehmeyer, and Michaela Geierhos. 2023. Leveraging Semantic Search and LLMs for Domain-Adaptive Information Retrieval. In International Conference on Information and Software Technologies. Springer, 148--159."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386476"},{"key":"e_1_3_2_2_33_1","unstructured":"Navid Mehrdad Hrushikesh Mohapatra Mossaab Bagdouri Prijith Chandran Alessandro Magnani Xunfan Cai Ajit Puthenputhussery Sachin Yadav Tony Lee ChengXiang Zhai and Ciya Liao. 2024. Large Language Models for Relevance Judgment in Product Search. arxiv: 2406.00247 [cs.IR] https:\/\/arxiv.org\/abs\/2406.00247"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2010-343"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-49127-9_28"},{"key":"e_1_3_2_2_36_1","volume-title":"Document expansion by query prediction. arXiv preprint arXiv:1904.08375","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document expansion by query prediction. arXiv preprint arXiv:1904.08375 (2019)."},{"key":"e_1_3_2_2_37_1","volume-title":"Online Video Consumption Statistics","year":"2024","unstructured":"oberlo. 2024. Online Video Consumption Statistics (2024). https:\/\/www.oberlo.com\/statistics\/online-video-consumption-statistics"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589335.3648298"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Yifan Peng Jinchuan Tian William Chen Siddhant Arora Brian Yan Yui Sudo Muhammad Shakeel Kwanghee Choi Jiatong Shi Xuankai Chang et al. 2024b. OWSM v3.1: Better and faster open whisper-style speech models based on e-branchformer. arXiv preprint arXiv:2401.16658 (2024).","DOI":"10.21437\/Interspeech.2024-1194"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389676"},{"key":"e_1_3_2_2_41_1","first-page":"1","article-title":"Scaling speech technology to 1,000 languages","volume":"25","author":"Pratap Vineel","year":"2024","unstructured":"Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, et al. 2024. Scaling speech technology to 1,000 languages. Journal of Machine Learning Research, Vol. 25, 97 (2024), 1--52.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_42_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_2_43_1","volume-title":"International conference on machine learning. PMLR, 28492--28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International conference on machine learning. PMLR, 28492--28518."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1108\/00220410410560582"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_2_47_1","volume-title":"Term-weighting approaches in automatic text retrieval. Information processing & management","author":"Salton Gerard","year":"1988","unstructured":"Gerard Salton and Christopher Buckley. 1988. Term-weighting approaches in automatic text retrieval. Information processing & management, Vol. 24, 5 (1988), 513--523."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.923"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.392"},{"key":"e_1_3_2_2_51_1","unstructured":"Jiaxin Wu Chong-Wah Ngo Wing-Kwong Chan and Sheng-Hua Zhong. 2024. LLM-based query paraphrasing for video search. arxiv: 2407.12341 [cs.MM] https:\/\/arxiv.org\/abs\/2407.12341"},{"key":"e_1_3_2_2_52_1","volume-title":"When search engine services meet large language models: visions and challenges","author":"Xiong Haoyi","year":"2024","unstructured":"Haoyi Xiong, Jiang Bian, Yuchen Li, Xuhong Li, Mengnan Du, Shuaiqiang Wang, Dawei Yin, and Sumi Helal. 2024. When search engine services meet large language models: visions and challenges. IEEE Transactions on Services Computing (2024)."},{"key":"e_1_3_2_2_53_1","volume-title":"MiniRBT: A Two-stage Distilled Small Chinese Pre-trained Model. arXiv preprint arXiv:2304.00717","author":"Yao Xin","year":"2023","unstructured":"Xin Yao, Ziqing Yang, Yiming Cui, and Shijin Wang. 2023. MiniRBT: A Two-stage Distilled Small Chinese Pre-trained Model. arXiv preprint arXiv:2304.00717 (2023)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.820"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271800"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.714"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"crossref","unstructured":"Ding Zhao Tara N Sainath David Rybach Pat Rondon Deepti Bhatia Bo Li and Ruoming Pang. 2019. Shallow-Fusion End-to-End Contextual Biasing.. In Interspeech. 1418--1422.","DOI":"10.21437\/Interspeech.2019-1209"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-industry.58"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00877"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Companion Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715220","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3701716.3715220","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T03:03:17Z","timestamp":1759892597000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3701716.3715220"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,8]]},"references-count":59,"alternative-id":["10.1145\/3701716.3715220","10.1145\/3701716"],"URL":"https:\/\/doi.org\/10.1145\/3701716.3715220","relation":{},"subject":[],"published":{"date-parts":[[2025,5,8]]},"assertion":[{"value":"2025-05-23","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}