{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:10:49Z","timestamp":1750219849438,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,8,9]],"date-time":"2023-08-09T00:00:00Z","timestamp":1691539200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Faculdade de Ci\u00eancias e Tecnologia, Universidade Nova de Lisboa","award":["SFRH\/BD\/150497\/2019"],"award-info":[{"award-number":["SFRH\/BD\/150497\/2019"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,8,9]]},"DOI":"10.1145\/3578337.3605131","type":"proceedings-article","created":{"date-parts":[[2023,8,9]],"date-time":"2023-08-09T22:12:46Z","timestamp":1691619166000},"page":"13-22","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["KALE: Using a K-Sparse Projector for Lexical Expansion"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3766-8077","authenticated-orcid":false,"given":"Lu\u00eds","family":"Borges","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3856-2936","authenticated-orcid":false,"given":"Bruno","family":"Martins","sequence":"additional","affiliation":[{"name":"University of Lisbon, Lisbon, Portugal"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1211-7754","authenticated-orcid":false,"given":"Jamie","family":"Callan","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, PA, USA"}]}],"member":"320","published-online":{"date-parts":[[2023,8,9]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the Workshop on Cognitive Computation at the Annual Conference on Neural Information Processing Systems.","author":"Bajaj Payal","year":"2016","unstructured":"Payal Bajaj, Daniel Campos, Nick Craswell, Li Deng, Jianfeng Gao, Xiaodong Liu, Rangan Majumder, Andrew McNamara, Bhaskar Mitra, Tri Nguyen, et al. 2016. MS MARCO: A human generated machine reading comprehension dataset. In Proceedings of the Workshop on Cognitive Computation at the Annual Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3103963"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557456"},{"key":"e_1_3_2_1_4_1","volume-title":"Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M Voorhees. 2020. Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003.07820 (2020)."},{"key":"e_1_3_2_1_5_1","volume-title":"Overview of the TREC 2020 deep learning track. arXiv preprint arXiv:2102","author":"Craswell Nick","year":"2021","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M Voorhees. 2021. Overview of the TREC 2020 deep learning track. arXiv preprint arXiv:2102.07662 (2021)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401204"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463098"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.75"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.acl-long.203"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28238-6_31"},{"key":"e_1_3_2_1_11_1","volume-title":"Improving efficient neural ranking models with cross-architecture knowledge distillation. arXiv preprint arXiv:2010.02666","author":"Sebastian","year":"2020","unstructured":"Sebastian Hofst\"atter, Sophia Althammer, Michael Schr\u00f6der, Mete Sertkan, and Allan Hanbury. 2020. Improving efficient neural ranking models with cross-architecture knowledge distillation. arXiv preprint arXiv:2010.02666 (2020)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.78"},{"key":"e_1_3_2_1_13_1","article-title":"Billion-scale similarity search with gpus","volume":"7","author":"Johnson Jeff","year":"2019","unstructured":"Jeff Johnson, Matthijs Douze, and Herv\u00e9 J\u00e9gou. 2019. Billion-scale similarity search with gpus. IEEE Transactions on Big Data, Vol. 7, 3 (2019).","journal-title":"IEEE Transactions on Big Data"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.7152\/nasko.v3i1.12787"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531833"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463066"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463238"},{"key":"e_1_3_2_1_20_1","volume-title":"Distilling dense representations for ranking using tightly-coupled teachers. arXiv preprint arXiv:2010.11386","author":"Lin Sheng-Chieh","year":"2020","unstructured":"Sheng-Chieh Lin, Jheng-Hong Yang, and Jimmy Lin. 2020. Distilling dense representations for ranking using tightly-coupled teachers. arXiv preprint arXiv:2010.11386 (2020)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.repl4nlp-1.17"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583421"},{"key":"e_1_3_2_1_23_1","volume-title":"RetroMAE: Pre-training retrieval-oriented transformers via masked auto-encoder. arXiv preprint arXiv:2205.12035","author":"Liu Zheng","year":"2022","unstructured":"Zheng Liu and Yingxia Shao. 2022. RetroMAE: Pre-training retrieval-oriented transformers via masked auto-encoder. arXiv preprint arXiv:2205.12035 (2022)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463030"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the International Conference on Machine Learning.","author":"Menon Aditya","year":"2022","unstructured":"Aditya Menon, Sadeep Jayasumana, Ankit Singh Rawat, Seungyeon Kim, Sashank Reddi, and Sanjiv Kumar. 2022. In defense of dual-encoders for neural ranking. In Proceedings of the International Conference on Machine Learning."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-28241-6_7"},{"key":"e_1_3_2_1_27_1","unstructured":"Jianmo Ni Chen Qu Jing Lu Zhuyun Dai Gustavo Hern\u00e1ndez \u00c1brego Ji Ma Vincent Y Zhao Yi Luan Keith B Hall Ming-Wei Chang et al. 2021. Large dual encoders are generalizable retrievers. arXiv preprint arXiv:2112.07899 (2021)."},{"key":"e_1_3_2_1_28_1","volume-title":"From doc2query to docTTTTTquery. Online preprint","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Jimmy Lin, and AI Epistemic. 2019a. From doc2query to docTTTTTquery. Online preprint, Vol. 6 (2019)."},{"key":"e_1_3_2_1_29_1","volume-title":"Document expansion by query prediction. arXiv preprint arXiv:1904.08375","author":"Nogueira Rodrigo","year":"2019","unstructured":"Rodrigo Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019b. Document expansion by query prediction. arXiv preprint arXiv:1904.08375 (2019)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.466"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1410"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.224"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"e_1_3_2_1_34_1","volume-title":"LexMAE: Lexicon-Bottlenecked Pretraining for Large-Scale Retrieval. arXiv preprint arXiv:2208.14754","author":"Shen Tao","year":"2022","unstructured":"Tao Shen, Xiubo Geng, Chongyang Tao, Can Xu, Xiaolong Huang, Binxing Jiao, Linjun Yang, and Daxin Jiang. 2022. LexMAE: Lexicon-Bottlenecked Pretraining for Large-Scale Retrieval. arXiv preprint arXiv:2208.14754 (2022)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1016\/S1741-8364(04)02424-2"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the Conference on Neural Information Processing Systems: Datasets and Benchmarks Track.","author":"Thakur Nandan","year":"2021","unstructured":"Nandan Thakur, Nils Reimers, Andreas R\u00fcckl\u00e9, Abhishek Srivastava, and Iryna Gurevych. 2021. BEIR: A Heterogeneous Benchmark for Zero-shot Evaluation of Information Retrieval Models. In Proceedings of the Conference on Neural Information Processing Systems: Datasets and Benchmarks Track."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1561\/9781680835434"},{"key":"e_1_3_2_1_38_1","volume-title":"RetroMAE v2: Duplex Masked Auto-Encoder For Pre-Training Retrieval-Oriented Language Models. arXiv preprint arXiv:2211.08769","author":"Xiao Shitao","year":"2022","unstructured":"Shitao Xiao and Zheng Liu. 2022. RetroMAE v2: Duplex Masked Auto-Encoder For Pre-Training Retrieval-Oriented Language Models. arXiv preprint arXiv:2211.08769 (2022)."},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Xiong Lee","year":"2020","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul N Bennett, Junaid Ahmed, and Arnold Overwijk. 2020. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3269206.3271800"},{"key":"e_1_3_2_1_41_1","volume-title":"Dense text retrieval based on pretrained language models: A survey. arXiv preprint arXiv:2211.14876","author":"Zhao Wayne Xin","year":"2022","unstructured":"Wayne Xin Zhao, Jing Liu, Ruiyang Ren, and Ji-Rong Wen. 2022. Dense text retrieval based on pretrained language models: A survey. arXiv preprint arXiv:2211.14876 (2022)."},{"key":"e_1_3_2_1_42_1","volume-title":"Daxin Jiang, Nan Duan, and Ji-Rong Wen.","author":"Zhou Kun","year":"2022","unstructured":"Kun Zhou, Xiao Liu, Yeyun Gong, Wayne Xin Zhao, Daxin Jiang, Nan Duan, and Ji-Rong Wen. 2022. MASTER: Multi-task Pre-trained Bottlenecked Masked Autoencoders are Better Dense Retrievers. arXiv preprint arXiv:2212.07841 (2022)."},{"key":"e_1_3_2_1_43_1","volume-title":"Inverted files for text search engines. ACM computing surveys (CSUR)","author":"Zobel Justin","year":"2006","unstructured":"Justin Zobel and Alistair Moffat. 2006. Inverted files for text search engines. ACM computing surveys (CSUR), Vol. 38, 2 (2006), 6--es."}],"event":{"name":"ICTIR '23: The 2023 ACM SIGIR International Conference on the Theory of Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Taipei Taiwan","acronym":"ICTIR '23"},"container-title":["Proceedings of the 2023 ACM SIGIR International Conference on Theory of Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3578337.3605131","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3578337.3605131","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:04Z","timestamp":1750178824000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3578337.3605131"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,9]]},"references-count":43,"alternative-id":["10.1145\/3578337.3605131","10.1145\/3578337"],"URL":"https:\/\/doi.org\/10.1145\/3578337.3605131","relation":{},"subject":[],"published":{"date-parts":[[2023,8,9]]},"assertion":[{"value":"2023-08-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}