{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:48:35Z","timestamp":1774352915462,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":57,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T00:00:00Z","timestamp":1700956800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,11,26]]},"DOI":"10.1145\/3624918.3625324","type":"proceedings-article","created":{"date-parts":[[2023,11,23]],"date-time":"2023-11-23T08:49:17Z","timestamp":1700729357000},"page":"212-222","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Typos-aware Bottlenecked Pre-Training for Robust Dense Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6711-0955","authenticated-orcid":false,"given":"Shengyao","family":"Zhuang","sequence":"first","affiliation":[{"name":"The University of Queensland, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1050-7708","authenticated-orcid":false,"given":"Linjun","family":"Shou","sequence":"additional","affiliation":[{"name":"Microsoft, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2200-8711","authenticated-orcid":false,"given":"Jian","family":"Pei","sequence":"additional","affiliation":[{"name":"Duke University, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6140-7187","authenticated-orcid":false,"given":"Ming","family":"Gong","sequence":"additional","affiliation":[{"name":"Microsoft, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9750-1626","authenticated-orcid":false,"given":"Houxing","family":"Ren","sequence":"additional","affiliation":[{"name":"Beihang University, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0271-5563","authenticated-orcid":false,"given":"Guido","family":"Zuccon","sequence":"additional","affiliation":[{"name":"The University of Queensland, Australia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6657-5806","authenticated-orcid":false,"given":"Daxin","family":"Jiang","sequence":"additional","affiliation":[{"name":"Microsoft, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,11,26]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Ms marco: A human generated machine reading comprehension dataset. arXiv preprint arXiv:1611.09268","author":"Bajaj Payal","year":"2016","unstructured":"Payal Bajaj, Daniel Campos, Nick Craswell, Li Deng, Jianfeng Gao, Xiaodong Liu, Rangan Majumder, Andrew McNamara, Bhaskar Mitra, Tri Nguyen, 2016. Ms marco: A human generated machine reading comprehension dataset. arXiv preprint arXiv:1611.09268 (2016)."},{"key":"e_1_3_2_1_2_1","volume-title":"ECIR (2)(Lecture Notes in Computer Science), Vol.\u00a013186","author":"Bassani Elias","unstructured":"Elias Bassani. 2022. ranx: A Blazing-Fast Python Library for Ranking Evaluation and Comparison. In ECIR (2)(Lecture Notes in Computer Science), Vol.\u00a013186. Springer, 259\u2013264."},{"key":"e_1_3_2_1_3_1","volume-title":"DENSE RETRIEVAL WITH APACHE SOLR NEURAL SEARCH. In ECIR 2022 Industry Day.","author":"Benedetti Alessandro","year":"2022","unstructured":"Alessandro Benedetti and Elia Porciani. 2022. DENSE RETRIEVAL WITH APACHE SOLR NEURAL SEARCH. In ECIR 2022 Industry Day."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99736-6_7"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/275"},{"key":"e_1_3_2_1_6_1","volume-title":"DiffCSE: Difference-based Contrastive Learning for Sentence Embeddings. In Annual Conference of the North American Chapter of the Association for Computational Linguistics.","author":"Chuang Yung-Sung","year":"2022","unstructured":"Yung-Sung Chuang, Rumen Dangovski, Hongyin Luo, Yang Zhang, Shiyu Chang, Marin Solja\u010di\u0107, Shang-Wen Li, Wen-Tau Yih, Yoon Kim, and James Glass. 2022. DiffCSE: Difference-based Contrastive Learning for Sentence Embeddings. In Annual Conference of the North American Chapter of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=r1xMH1BtvB","author":"Clark Kevin","year":"2020","unstructured":"Kevin Clark, Minh-Thang Luong, Quoc\u00a0V. Le, and Christopher\u00a0D. Manning. 2020. ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=r1xMH1BtvB"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19-1423"},{"key":"e_1_3_2_1_9_1","volume-title":"Open Challenges in the Application of Dense Retrieval for Case Law Search. In ECIR 2022 Industry Day.","author":"Du Pan","year":"2022","unstructured":"Pan Du, Hawre Hosseini, George Sanchez, and Filippo Pompili. 2022. Open Challenges in the Application of Dense Retrieval for Case Law Search. In ECIR 2022 Industry Day."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.609"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/32206.32212"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.75"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.203"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-72240-1_26"},{"key":"e_1_3_2_1_15_1","volume-title":"Tevatron: An Efficient and Flexible Toolkit for Dense Retrieval. ArXiv abs\/2203.05765","author":"Gao Luyu","year":"2022","unstructured":"Luyu Gao, Xueguang Ma, Jimmy\u00a0J. Lin, and Jamie Callan. 2022. Tevatron: An Efficient and Flexible Toolkit for Dense Retrieval. ArXiv abs\/2203.05765 (2022)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080749"},{"key":"e_1_3_2_1_17_1","volume-title":"Improving efficient neural ranking models with cross-architecture knowledge distillation. arXiv preprint arXiv:2010.02666","author":"Hofst\u00e4tter Sebastian","year":"2020","unstructured":"Sebastian Hofst\u00e4tter, Sophia Althammer, Michael Schr\u00f6der, Mete Sertkan, and Allan Hanbury. 2020. Improving efficient neural ranking models with cross-architecture knowledge distillation. arXiv preprint arXiv:2010.02666 (2020)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403305"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3536324"},{"key":"e_1_3_2_1_22_1","volume-title":"The tale of two MS MARCO\u2013and their unfair comparisons. arXiv preprint arXiv:2304.12904","author":"Lassance Carlos","year":"2023","unstructured":"Carlos Lassance and St\u00e9phane Clinchant. 2023. The tale of two MS MARCO\u2013and their unfair comparisons. arXiv preprint arXiv:2304.12904 (2023)."},{"key":"e_1_3_2_1_23_1","volume-title":"Distilling dense representations for ranking using tightly-coupled teachers. arXiv preprint arXiv:2010.11386","author":"Lin Sheng-Chieh","year":"2020","unstructured":"Sheng-Chieh Lin, Jheng-Hong Yang, and Jimmy Lin. 2020. Distilling dense representations for ranking using tightly-coupled teachers. arXiv preprint arXiv:2010.11386 (2020)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.repl4nlp-1.17"},{"key":"e_1_3_2_1_25_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_26_1","volume-title":"RetroMAE: Pre-training Retrieval-oriented Transformers via Masked Auto-Encoder. arXiv preprint arXiv:2205.12035","author":"Liu Zheng","year":"2022","unstructured":"Zheng Liu and Yingxia Shao. 2022. RetroMAE: Pre-training Retrieval-oriented Transformers via Masked Auto-Encoder. arXiv preprint arXiv:2205.12035 (2022)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.220"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463262"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/1416950.1416952"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/312624.312618"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-99736-6_27"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.466"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.191"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.224"},{"key":"e_1_3_2_1_35_1","volume-title":"A thorough examination on zero-shot dense retrieval. arXiv preprint arXiv:2204.12755","author":"Ren Ruiyang","year":"2022","unstructured":"Ruiyang Ren, Yingqi Qu, Jing Liu, Wayne\u00a0Xin Zhao, Qifei Wu, Yuchen Ding, Hua Wu, Haifeng Wang, and Ji-Rong Wen. 2022. A thorough examination on zero-shot dense retrieval. arXiv preprint arXiv:2204.12755 (2022)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.496"},{"key":"e_1_3_2_1_37_1","volume-title":"LexMAE: Lexicon-Bottlenecked Pretraining for Large-Scale Retrieval. arXiv preprint arXiv:2208.14754","author":"Shen Tao","year":"2022","unstructured":"Tao Shen, Xiubo Geng, Chongyang Tao, Can Xu, Xiaolong Huang, Binxing Jiao, Linjun Yang, and Daxin Jiang. 2022. LexMAE: Lexicon-Bottlenecked Pretraining for Large-Scale Retrieval. arXiv preprint arXiv:2208.14754 (2022)."},{"key":"e_1_3_2_1_38_1","volume-title":"Analysing the Robustness of Dual Encoders for Dense Retrieval Against Misspellings(SIGIR \u201922)","author":"Sidiropoulos Georgios","unstructured":"Georgios Sidiropoulos and Evangelos Kanoulas. 2022. Analysing the Robustness of Dual Encoders for Dense Retrieval Against Misspellings(SIGIR \u201922). Association for Computing Machinery, New York, NY, USA, 2132\u20132136."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1002\/1097-4571(2000)9999:9999<::AID-ASI1591>3.3.CO;2-I"},{"key":"e_1_3_2_1_40_1","unstructured":"Nandan Thakur Nils Reimers Andreas R\u00fcckl\u00e9 Abhishek Srivastava and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2). https:\/\/openreview.net\/forum?id=wCu6T5xFjeJ"},{"key":"e_1_3_2_1_41_1","volume-title":"Lecture Notes on Neural Information Retrieval. arXiv preprint arXiv:2207.13443","author":"Tonellotto Nicola","year":"2022","unstructured":"Nicola Tonellotto. 2022. Lecture Notes on Neural Information Retrieval. arXiv preprint arXiv:2207.13443 (2022)."},{"key":"e_1_3_2_1_42_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_1_43_1","volume-title":"SimLM: Pre-training with Representation Bottleneck for Dense Passage Retrieval. arXiv preprint arXiv:2207.02578","author":"Wang Liang","year":"2022","unstructured":"Liang Wang, Nan Yang, Xiaolong Huang, Binxing Jiao, Linjun Yang, Daxin Jiang, Rangan Majumder, and Furu Wei. 2022. SimLM: Pre-training with Representation Bottleneck for Dense Passage Retrieval. arXiv preprint arXiv:2207.02578 (2022)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1002\/asi.10262"},{"key":"e_1_3_2_1_45_1","volume-title":"Spelling correction in the PubMed search engine. Information retrieval 9, 5","author":"Wilbur W\u00a0John","year":"2006","unstructured":"W\u00a0John Wilbur, Won Kim, and Natalie Xie. 2006. Spelling correction in the PubMed search engine. Information retrieval 9, 5 (2006), 543\u2013564."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_47_1","volume-title":"Are neural ranking models robust?ACM Transactions on Information Systems 41, 2","author":"Wu Chen","year":"2022","unstructured":"Chen Wu, Ruqing Zhang, Jiafeng Guo, Yixing Fan, and Xueqi Cheng. 2022. Are neural ranking models robust?ACM Transactions on Information Systems 41, 2 (2022), 1\u201336."},{"key":"e_1_3_2_1_48_1","volume-title":"Contextual mask auto-encoder for dense passage retrieval. arXiv preprint arXiv:2208.07670","author":"Wu Xing","year":"2022","unstructured":"Xing Wu, Guangyuan Ma, Meng Lin, Zijia Lin, Zhongyuan Wang, and Songlin Hu. 2022. Contextual mask auto-encoder for dense passage retrieval. arXiv preprint arXiv:2208.07670 (2022)."},{"key":"e_1_3_2_1_49_1","volume-title":"Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations.","author":"Xiong Lee","year":"2020","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul\u00a0N Bennett, Junaid Ahmed, and Arnold Overwijk. 2020. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462880"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3462880"},{"key":"e_1_3_2_1_52_1","volume-title":"RepBERT: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498","author":"Zhan Jingtao","year":"2020","unstructured":"Jingtao Zhan, Jiaxin Mao, Yiqun Liu, Min Zhang, and Shaoping Ma. 2020. RepBERT: Contextualized text embeddings for first-stage retrieval. arXiv preprint arXiv:2006.15498 (2020)."},{"key":"e_1_3_2_1_53_1","volume-title":"Dense text retrieval based on pretrained language models: A survey. arXiv preprint arXiv:2211.14876","author":"Zhao Wayne\u00a0Xin","year":"2022","unstructured":"Wayne\u00a0Xin Zhao, Jing Liu, Ruiyang Ren, and Ji-Rong Wen. 2022. Dense text retrieval based on pretrained language models: A survey. arXiv preprint arXiv:2211.14876 (2022)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3572960.3572981"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.225"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531658"},{"key":"e_1_3_2_1_57_1","volume-title":"CharacterBERT and Self-Teaching for Improving the Robustness of Dense Retrievers on Queries with Typos(SIGIR \u201922)","author":"Zhuang Shengyao","unstructured":"Shengyao Zhuang and Guido Zuccon. 2022. CharacterBERT and Self-Teaching for Improving the Robustness of Dense Retrievers on Queries with Typos(SIGIR \u201922). Association for Computing Machinery, New York, NY, USA, 1444\u20131454."}],"event":{"name":"SIGIR-AP '23: Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","location":"Beijing China","acronym":"SIGIR-AP '23","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624918.3625324","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3624918.3625324","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T21:32:23Z","timestamp":1755898343000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3624918.3625324"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,26]]},"references-count":57,"alternative-id":["10.1145\/3624918.3625324","10.1145\/3624918"],"URL":"https:\/\/doi.org\/10.1145\/3624918.3625324","relation":{},"subject":[],"published":{"date-parts":[[2023,11,26]]},"assertion":[{"value":"2023-11-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}