{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:55:36Z","timestamp":1777614936128,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,2,27]],"date-time":"2023-02-27T00:00:00Z","timestamp":1677456000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,2,27]]},"DOI":"10.1145\/3539597.3570383","type":"proceedings-article","created":{"date-parts":[[2023,2,22]],"date-time":"2023-02-22T23:27:00Z","timestamp":1677108420000},"page":"994-1002","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":12,"title":["Revisiting Code Search in a Two-Stage Paradigm"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5371-7780","authenticated-orcid":false,"given":"Fan","family":"Hu","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7761-7269","authenticated-orcid":false,"given":"Yanlin","family":"Wang","sequence":"additional","affiliation":[{"name":"Sun Yat-sen University, Zhuhai, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7625-0650","authenticated-orcid":false,"given":"Lun","family":"Du","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0220-8310","authenticated-orcid":false,"given":"Xirong","family":"Li","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3063-9425","authenticated-orcid":false,"given":"Hongyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"The University of Newcastle, Sydney, NSW, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0360-6089","authenticated-orcid":false,"given":"Shi","family":"Han","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9230-2799","authenticated-orcid":false,"given":"Dongmei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Research, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,2,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Javed A Aslam and Mark Montague. 2001. Models for metasearch. In SIGIR.","DOI":"10.1145\/383952.384007"},{"key":"e_1_3_2_2_2_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT.","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In NAACL-HLT."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Lun Du Xiaozhou Shi Yanlin Wang Ensheng Shi Shi Han and Dongmei Zhang. 2021. Is a single model enough? mucos: A multi-model ensemble learning approach for semantic code search. In CIKM.","DOI":"10.1145\/3459637.3482127"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Zhangyin Feng Daya Guo Duyu Tang Nan Duan Xiaocheng Feng Ming Gong Linjun Shou Bing Qin Ting Liu Daxin Jiang and Ming Zhou. 2020. CodeBERT: A Pre-Trained Model for Programming and Natural Languages. In EMNLP.","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_2_5_1","unstructured":"Edward A Fox M Prabhakar Koushik Joseph Shaw Russell Modlin Durgesh Rao et al. 1993. Combining evidence from multiple searches. In The first text retrieval conference (TREC-1)."},{"key":"e_1_3_2_2_6_1","volume-title":"Cascaded Fast and Slow Models for Efficient Semantic Code Search. arXiv","author":"Gotmare Akhilesh Deepak","year":"2021","unstructured":"Akhilesh Deepak Gotmare, Junnan Li, Shafiq Joty, and Steven CH Hoi. 2021. Cascaded Fast and Slow Models for Efficient Semantic Code Search. arXiv (2021)."},{"key":"e_1_3_2_2_7_1","unstructured":"Wenchao Gu Yanlin Wang Lun Du Hongyu Zhang Shi Han Dongmei Zhang and Michael R Lyu. 2022. Accelerating Code Search with Deep Hashing and Code Classification. In ACL."},{"key":"e_1_3_2_2_8_1","unstructured":"Xiaodong Gu Hongyu Zhang and Sunghun Kim. 2018. Deep code search. In ICSE."},{"key":"e_1_3_2_2_9_1","volume-title":"Colin B. Clement, Dawn Drain, Neel Sundaresan, Jian Yin, Daxin Jiang, and Ming Zhou.","author":"Guo Daya","year":"2021","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie Liu, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, Michele Tufano, Shao Kun Deng, Colin B. Clement, Dawn Drain, Neel Sundaresan, Jian Yin, Daxin Jiang, and Ming Zhou. 2021. GraphCodeBERT: Pre-training Code Representations with Data Flow. In ICLR."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"crossref","unstructured":"Emily Hill Lori Pollock and K Vijay-Shanker. 2011. Improving source code search with natural language phrasal representations of method signatures. In ASE.","DOI":"10.1109\/ASE.2011.6100115"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Junjie Huang Duyu Tang Linjun Shou Ming Gong Ke Xu Daxin Jiang Ming Zhou and Nan Duan. 2021. CoSQA: 20 000 Web Queries for Code Search and Question Answering. In ACL.","DOI":"10.18653\/v1\/2021.acl-long.442"},{"key":"e_1_3_2_2_12_1","volume-title":"A code-description representation learning model based on attention","author":"Huang Qing","unstructured":"Qing Huang, An Qiu, Maosheng Zhong, and Yuan Wang. 2020. A code-description representation learning model based on attention. In SANER. IEEE."},{"key":"e_1_3_2_2_13_1","volume-title":"Codesearchnet challenge: Evaluating the state of semantic code search. arXiv","author":"Husain Hamel","year":"2019","unstructured":"Hamel Husain, Ho-Hsiang Wu, Tiferet Gazit, Miltiadis Allamanis, and Marc Brockschmidt. 2019. Codesearchnet challenge: Evaluating the state of semantic code search. arXiv (2019)."},{"key":"e_1_3_2_2_14_1","volume-title":"\u00c9tude comparative de la distribution florale dans une portion des Alpes et des Jura. Bull Soc Vaudoise Sci Nat","author":"Jaccard Paul","year":"1901","unstructured":"Paul Jaccard. 1901. \u00c9tude comparative de la distribution florale dans une portion des Alpes et des Jura. Bull Soc Vaudoise Sci Nat (1901), 547--579."},{"key":"e_1_3_2_2_15_1","volume-title":"Codehow: Effective code search based on api understanding and extended boolean model (e). In ASE.","author":"Lv Fei","year":"2015","unstructured":"Fei Lv, Hongyu Zhang, Jian-guang Lou, Shaowei Wang, Dongmei Zhang, and Jianjun Zhao. 2015. Codehow: Effective code search based on api understanding and extended boolean model (e). In ASE."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2016.2560165"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSC.2016.2560165"},{"key":"e_1_3_2_2_18_1","volume-title":"The probabilistic relevance framework: BM25 and beyond","author":"Robertson Stephen","unstructured":"Stephen Robertson and Hugo Zaragoza. 2009. The probabilistic relevance framework: BM25 and beyond. Now Publishers Inc."},{"key":"e_1_3_2_2_19_1","volume-title":"Relevance weighting of search terms. Journal of the American Society for Information science","author":"Robertson Stephen E","year":"1976","unstructured":"Stephen E Robertson and K Sparck Jones. 1976. Relevance weighting of search terms. Journal of the American Society for Information science (1976), 129--146."},{"key":"e_1_3_2_2_20_1","volume-title":"Latent semantic indexing: An overview. Techn. rep. INFOSYS","author":"Rosario Barbara","year":"2000","unstructured":"Barbara Rosario. 2000. Latent semantic indexing: An overview. Techn. rep. INFOSYS (2000), 1--16."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Saksham Sachdev Hongyu Li Sifei Luan Seohyun Kim Koushik Sen and Satish Chandra. 2018. Retrieval on source code: a neural code search. In MAPL.","DOI":"10.1145\/3211346.3211353"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"Abdus Satter and Kazi Sakib. 2016. A search log mining based query expansion technique to improve effectiveness in code search. In ICCIT.","DOI":"10.1109\/ICCITECHN.2016.7860264"},{"key":"e_1_3_2_2_23_1","volume-title":"Introduction to information retrieval","author":"Sch\u00fctze Hinrich","unstructured":"Hinrich Sch\u00fctze, Christopher D Manning, and Prabhakar Raghavan. 2008. Introduction to information retrieval. Vol. 39. Cambridge University Press Cambridge."},{"key":"e_1_3_2_2_24_1","volume-title":"Combination of multiple searches. NIST SPECIAL PUBLICATION SP","author":"Shaw Joseph A","year":"1995","unstructured":"Joseph A Shaw and Edward A Fox. 1995. Combination of multiple searches. NIST SPECIAL PUBLICATION SP (1995), 105--105."},{"key":"e_1_3_2_2_25_1","volume-title":"Enhancing Semantic Code Search with Multimodal Contrastive Learning and Soft Data Augmentation. arXiv preprint arXiv:2204.03293","author":"Shi Ensheng","year":"2022","unstructured":"Ensheng Shi, Wenchao Gub, Yanlin Wang, Lun Du, Hongyu Zhang, Shi Han, Dongmei Zhang, and Hongbin Sun. 2022. Enhancing Semantic Code Search with Multimodal Contrastive Learning and Soft Data Augmentation. arXiv preprint arXiv:2204.03293 (2022)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Jianhang Shuai Ling Xu Chao Liu Meng Yan Xin Xia and Yan Lei. 2020. Improving code search with co-attentive representation learning. In ICPC.","DOI":"10.1145\/3387904.3389269"},{"key":"e_1_3_2_2_27_1","unstructured":"Janice Singer Timothy C. Lethbridge Norman G. Vinson and Nicolas Anquetil. 1997. An examination of software engineering work practices. In CASCON."},{"key":"e_1_3_2_2_28_1","volume-title":"On the Importance of Building High-quality Training Datasets for Neural Code Search. arXiv","author":"Sun Zhensu","year":"2022","unstructured":"Zhensu Sun, Li Li, Yan Liu, and Xiaoning Du. 2022. On the Importance of Building High-quality Training Datasets for Neural Code Search. arXiv (2022)."},{"key":"e_1_3_2_2_29_1","volume-title":"Hung Dang Phan, Trong Duc Nguyen, and Tien N Nguyen.","author":"Nguyen Thanh Van","year":"2017","unstructured":"Thanh Van Nguyen, Anh Tuan Nguyen, Hung Dang Phan, Trong Duc Nguyen, and Tien N Nguyen. 2017. Combining word2vec with revised vector space model for better code retrieval. In ICSE-C."},{"key":"e_1_3_2_2_30_1","unstructured":"S Vijayarani Ms J Ilamathi Ms Nithya et al. 2015. Preprocessing techniques for text mining-an overview. International Journal of Computer Science & Communication Networks (2015) 7--16."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.3390\/app10217519"},{"key":"e_1_3_2_2_32_1","volume-title":"IECS: Intent-enforced code search via extended Boolean model. Journal of Intelligent & Fuzzy Systems","author":"Yang Yangrui","year":"2017","unstructured":"Yangrui Yang and Qing Huang. 2017. IECS: Intent-enforced code search via extended Boolean model. Journal of Intelligent & Fuzzy Systems (2017), 2565--2576."}],"event":{"name":"WSDM '23: The Sixteenth ACM International Conference on Web Search and Data Mining","location":"Singapore Singapore","acronym":"WSDM '23","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the Sixteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570383","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539597.3570383","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:29Z","timestamp":1750182689000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539597.3570383"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,27]]},"references-count":32,"alternative-id":["10.1145\/3539597.3570383","10.1145\/3539597"],"URL":"https:\/\/doi.org\/10.1145\/3539597.3570383","relation":{},"subject":[],"published":{"date-parts":[[2023,2,27]]},"assertion":[{"value":"2023-02-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}