{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T05:51:16Z","timestamp":1777614676372,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":106,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709290","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:44:43Z","timestamp":1743792283000},"page":"1115-1126","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["On the Necessity of World Knowledge for Mitigating Missing Labels in Extreme Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-5474-6529","authenticated-orcid":false,"given":"Jatin","family":"Prakash","sequence":"first","affiliation":[{"name":"New York University, New York City, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1910-2253","authenticated-orcid":false,"given":"Anirudh","family":"Buvanesh","sequence":"additional","affiliation":[{"name":"Mila - Quebec Artificial Intelligence Institute, Montreal, QC, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0380-689X","authenticated-orcid":false,"given":"Bishal","family":"Santra","sequence":"additional","affiliation":[{"name":"Microsoft Research, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6057-4351","authenticated-orcid":false,"given":"Deepak","family":"Saini","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Mountain View, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0048-1118","authenticated-orcid":false,"given":"Sachin","family":"Yadav","sequence":"additional","affiliation":[{"name":"Google DeepMind, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4779-9588","authenticated-orcid":false,"given":"Jian","family":"Jiao","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Bellevue, WA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8595-8166","authenticated-orcid":false,"given":"Yashoteja","family":"Prabhu","sequence":"additional","affiliation":[{"name":"Microsoft Research, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2086-3191","authenticated-orcid":false,"given":"Amit","family":"Sharma","sequence":"additional","affiliation":[{"name":"Microsoft Research, Bengaluru, Karnataka, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4516-6613","authenticated-orcid":false,"given":"Manik","family":"Varma","sequence":"additional","affiliation":[{"name":"Microsoft Research, Bengaluru, Karnataka, India"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Semdedup: Data-efficient learning at web-scale through semantic deduplication. arXiv preprint arXiv:2303.09540","author":"Abbas Amro","year":"2023","unstructured":"Amro Abbas, Kushal Tirumala, D\u00e1niel Simig, Surya Ganguli, and Ari S Morcos. 2023. Semdedup: Data-efficient learning at web-scale through semantic deduplication. arXiv preprint arXiv:2303.09540 (2023)."},{"key":"e_1_3_2_2_2_1","volume-title":"Ammar Ahmad Awan, Jyoti Aneja, Ahmed Awadallah, Hany Awadalla, Nguyen Bach, Amit Bahree, Arash Bakhtiari, Harkirat Behl, et al.","author":"Abdin Marah","year":"2024","unstructured":"Marah Abdin, Sam Ade Jacobs, Ammar Ahmad Awan, Jyoti Aneja, Ahmed Awadallah, Hany Awadalla, Nguyen Bach, Amit Bahree, Arash Bakhtiari, Harkirat Behl, et al. 2024. Phi-3 technical report: A highly capable language model locally on your phone. arXiv preprint arXiv:2404.14219 (2024)."},{"key":"e_1_3_2_2_3_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_2_4_1","unstructured":"Bo Adler Niket Agarwal Ashwath Aithal Dong H Anh Pallab Bhattacharya Annika Brundyn Jared Casper Bryan Catanzaro Sharon Clay Jonathan Cohen et al. 2024. Nemotron-4 340B Technical Report. arXiv preprint arXiv:2406.11704 (2024)."},{"key":"e_1_3_2_2_5_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Aggarwal Pranjal","year":"2023","unstructured":"Pranjal Aggarwal, Ameet Deshpande, and Karthik Narasimhan. 2023. SemSup-XC: semantic supervision for zero and few-shot extreme classification. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 11, 20 pages."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2488388.2488391"},{"key":"e_1_3_2_2_7_1","unstructured":"Alon Albalak Liangming Pan Colin Raffel and William Yang Wang. 2023. Efficient online data mixing for language model pre-training. In R0-FoMo: Robustness of Few-shot and Zero-shot Learning in Large Foundation Models."},{"key":"e_1_3_2_2_8_1","volume-title":"Luke Zettlemoyer, Hannaneh Hajishirzi, and Wen-tau Yih.","author":"Asai Akari","year":"2024","unstructured":"Akari Asai, Zexuan Zhong, Danqi Chen, Pang Wei Koh, Luke Zettlemoyer, Hannaneh Hajishirzi, and Wen-tau Yih. 2024. Reliable, adaptable, and attributable language models with retrieval. arXiv preprint arXiv:2403.03187 (2024)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018741"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05791-5"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1106"},{"key":"e_1_3_2_2_12_1","volume-title":"Second International Workshop on Learning with Imbalanced Domains: Theory and Applications. PMLR, 8--22","author":"Bekker Jessa","year":"2018","unstructured":"Jessa Bekker and Jesse Davis. 2018. Learning from positive and unlabeled data under the selected at random assumption. In Second International Workshop on Learning with Imbalanced Domains: Theory and Applications. PMLR, 8--22."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-020-05877-5"},{"key":"e_1_3_2_2_14_1","volume-title":"The extreme classification repository: Multi-label datasets and code. url: http:\/\/manikvarma. org\/downloads\/XC. XMLRepository. html","author":"Bhatia K","year":"2016","unstructured":"K Bhatia, K Dahiya, H Jain, A Mittal, Y Prabhu, and M Varma. 2016. The extreme classification repository: Multi-label datasets and code. url: http:\/\/manikvarma. org\/downloads\/XC. XMLRepository. html, Vol. 132 (2016)."},{"key":"e_1_3_2_2_15_1","volume-title":"Sparse local embeddings for extreme multi-label classification. Advances in neural information processing systems","author":"Bhatia Kush","year":"2015","unstructured":"Kush Bhatia, Himanshu Jain, Purushottam Kar, Manik Varma, and Prateek Jain. 2015. Sparse local embeddings for extreme multi-label classification. Advances in neural information processing systems, Vol. 28 (2015)."},{"key":"e_1_3_2_2_16_1","unstructured":"Luiz Bonifacio Hugo Abonizio Marzieh Fadaee and Rodrigo Nogueira. 2022. InPars: Data Augmentation for Information Retrieval using Large Language Models. arxiv: 2202.05144 [cs.CL] https:\/\/arxiv.org\/abs\/2202.05144"},{"key":"e_1_3_2_2_17_1","volume-title":"The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=6ARlSgun7J","author":"Buvanesh Anirudh","year":"2024","unstructured":"Anirudh Buvanesh, Rahul Chand, Jatin Prakash, Bhawna Paliwal, Mudit Dhawan, Neelabh Madan, Deepesh Hada, Vidit Jain, Sonu Mehta, Yashoteja Prabhu, Manish Gupta, Ramachandran Ramjee, and Manik Varma. 2024. Enhancing Tail Performance in Extreme Classifiers by Label Variance Reduction. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=6ARlSgun7J"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3522672"},{"key":"e_1_3_2_2_19_1","volume-title":"A study of position bias in digital library recommender systems. arXiv preprint arXiv:1802.06565","author":"Collins Andrew","year":"2018","unstructured":"Andrew Collins, Dominika Tkaczyk, Akiko Aizawa, and Joeran Beel. 2018. A study of position bias in digital library recommender systems. arXiv preprint arXiv:1802.06565 (2018)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412779"},{"key":"e_1_3_2_2_21_1","volume-title":"Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen M Voorhees. 2020b. Overview of the TREC 2019 deep learning track. arXiv preprint arXiv:2003.07820 (2020)."},{"key":"e_1_3_2_2_22_1","volume-title":"SiameseXML: Siamese Networks meet Extreme Classifiers with 100M Labels. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"2340","author":"Dahiya Kunal","year":"2021","unstructured":"Kunal Dahiya, Ananye Agarwal, Deepak Saini, Gururaj K, Jian Jiao, Amit Singh, Sumeet Agarwal, Purushottam Kar, and Manik Varma. 2021a. SiameseXML: Siamese Networks meet Extreme Classifiers with 100M Labels. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 2330--2340. https:\/\/proceedings.mlr.press\/v139\/dahiya21a.html"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570392"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441810"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599301"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"K. Dahiya S. Yadav S. Sondhi D. Saini S. Mehta J. Jiao S. Agarwal P. Kar and M. Varma. 2023c. Deep encoders with auxiliary parameters for extreme classification. In KDD.","DOI":"10.1145\/3580305.3599301"},{"key":"e_1_3_2_2_27_1","volume-title":"Promptagator: Few-shot Dense Retrieval From 8 Examples. arxiv: 2209.11755 [cs.CL] https:\/\/arxiv.org\/abs\/2209.11755","author":"Dai Zhuyun","year":"2022","unstructured":"Zhuyun Dai, Vincent Y. Zhao, Ji Ma, Yi Luan, Jianmo Ni, Jing Lu, Anton Bakalov, Kelvin Guu, Keith B. Hall, and Ming-Wei Chang. 2022. Promptagator: Few-shot Dense Retrieval From 8 Examples. arxiv: 2209.11755 [cs.CL] https:\/\/arxiv.org\/abs\/2209.11755"},{"key":"e_1_3_2_2_28_1","volume-title":"International Conference on Machine Learning. PMLR, 5547--5569","author":"Du Nan","year":"2022","unstructured":"Nan Du, Yanping Huang, Andrew M Dai, Simon Tong, Dmitry Lepikhin, Yuanzhong Xu, Maxim Krikun, Yanqi Zhou, Adams Wei Yu, Orhan Firat, et al. 2022. Glam: Efficient scaling of language models with mixture-of-experts. In International Conference on Machine Learning. PMLR, 5547--5569."},{"key":"e_1_3_2_2_29_1","volume-title":"Data Filtering Networks. In NeurIPS 2023 Workshop on Distribution Shifts: New Frontiers with Foundation Models. https:\/\/openreview.net\/forum?id=ZKtZ7KQ6G5","author":"Fang Alex","year":"2024","unstructured":"Alex Fang, Albin Madappally Jose, Amit Jain, Ludwig Schmidt, Alexander T Toshev, and Vaishaal Shankar. 2024. Data Filtering Networks. In NeurIPS 2023 Workshop on Distribution Shifts: New Frontiers with Foundation Models. https:\/\/openreview.net\/forum?id=ZKtZ7KQ6G5"},{"key":"e_1_3_2_2_30_1","volume-title":"Better Synthetic Data by Retrieving and Transforming Existing Datasets. arXiv preprint arXiv:2404.14361","author":"Gandhi Saumya","year":"2024","unstructured":"Saumya Gandhi, Ritu Gala, Vijay Viswanathan, Tongshuang Wu, and Graham Neubig. 2024. Better Synthetic Data by Retrieving and Transforming Existing Datasets. arXiv preprint arXiv:2404.14361 (2024)."},{"key":"e_1_3_2_2_31_1","unstructured":"Suriya Gunasekar Yi Zhang Jyoti Aneja Caio C\u00e9sar Teodoro Mendes Allie Del Giorno Sivakanth Gopi Mojan Javaheripi Piero Kauffmann Gustavo de Rosa Olli Saarikivi Adil Salim Shital Shah Harkirat Singh Behl Xin Wang S\u00e9bastien Bubeck Ronen Eldan Adam Tauman Kalai Yin Tat Lee and Yuanzhi Li. 2023. Textbooks Are All You Need. arxiv: 2306.11644 [cs.CL] https:\/\/arxiv.org\/abs\/2306.11644"},{"key":"e_1_3_2_2_32_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Guo Chuan","year":"2019","unstructured":"Chuan Guo, Ali Mousavi, Xiang Wu, Daniel N Holtmann-Rice, Satyen Kale, Sashank Reddi, and Sanjiv Kumar. 2019. Breaking the glass ceiling for embedding-based classifiers for large output spaces. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_33_1","first-page":"19798","article-title":"ELIAS: End-to-End Learning to Index and Search in Large Output Spaces","volume":"35","author":"Gupta Nilesh","year":"2022","unstructured":"Nilesh Gupta, Patrick Chen, Hsiang-Fu Yu, Cho-Jui Hsieh, and Inderjit Dhillon. 2022. ELIAS: End-to-End Learning to Index and Search in Large Output Spaces. Advances in Neural Information Processing Systems, Vol. 35 (2022), 19798--19809.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_34_1","volume-title":"Dual-Encoders for Extreme Multi-label Classification. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=dNe1T0Ahby","author":"Gupta Nilesh","year":"2024","unstructured":"Nilesh Gupta, Fnu Devvrit, Ankit Singh Rawat, Srinadh Bhojanapalli, Prateek Jain, and Inderjit S Dhillon. 2024a. Dual-Encoders for Extreme Multi-label Classification. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=dNe1T0Ahby"},{"key":"e_1_3_2_2_35_1","volume-title":"Efficacy of Dual-Encoders for Extreme Multi-label Classification. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=dNe1T0Ahby","author":"Gupta Nilesh","year":"2024","unstructured":"Nilesh Gupta, Fnu Devvrit, Ankit Singh Rawat, Srinadh Bhojanapalli, Prateek Jain, and Inderjit S Dhillon. 2024b. Efficacy of Dual-Encoders for Extreme Multi-label Classification. In The Twelfth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=dNe1T0Ahby"},{"key":"e_1_3_2_2_36_1","volume-title":"Toxigen: A large-scale machine-generated dataset for adversarial and implicit hate speech detection. arXiv preprint arXiv:2203.09509","author":"Hartvigsen Thomas","year":"2022","unstructured":"Thomas Hartvigsen, Saadia Gabriel, Hamid Palangi, Maarten Sap, Dipankar Ray, and Ece Kamar. 2022. Toxigen: A large-scale machine-generated dataset for adversarial and implicit hate speech detection. arXiv preprint arXiv:2203.09509 (2022)."},{"key":"e_1_3_2_2_37_1","volume-title":"Can Language Models Act as Knowledge Bases at Scale? arXiv preprint arXiv:2402.14273","author":"He Qiyuan","year":"2024","unstructured":"Qiyuan He, Yizhong Wang, and Wenya Wang. 2024. Can Language Models Act as Knowledge Bases at Scale? arXiv preprint arXiv:2402.14273 (2024)."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290979"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939756"},{"key":"e_1_3_2_2_40_1","volume-title":"Proceedings of Machine Learning and Systems. To appear.","author":"Jain Vidit","year":"2023","unstructured":"Vidit Jain, Jatin Prakash, Deepak Saini, Jian Jiao, Ramachandran Ramjee, and Manik Varma. 2023. Renee: End-to-end training of extreme classification models. In Proceedings of Machine Learning and Systems. To appear."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/IISA.2019.8900698"},{"key":"e_1_3_2_2_42_1","volume-title":"Weizhu Chen, Allie Del Giorno, Ronen Eldan, Sivakanth Gopi, et al.","author":"Javaheripi Mojan","year":"2023","unstructured":"Mojan Javaheripi, S\u00e9bastien Bubeck, Marah Abdin, Jyoti Aneja, Sebastien Bubeck, Caio C\u00e9sar Teodoro Mendes, Weizhu Chen, Allie Del Giorno, Ronen Eldan, Sivakanth Gopi, et al. 2023. Phi-2: The surprising power of small language models. Microsoft Research Blog (2023)."},{"key":"e_1_3_2_2_43_1","unstructured":"Vitor Jeronymo Luiz Bonifacio Hugo Abonizio Marzieh Fadaee Roberto Lotufo Jakub Zavrel and Rodrigo Nogueira. 2023. InPars-v2: Large Language Models as Efficient Dataset Generators for Information Retrieval. arxiv: 2301.01820 [cs.IR] https:\/\/arxiv.org\/abs\/2301.01820"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16974"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018699"},{"key":"e_1_3_2_2_46_1","volume-title":"International Conference on Machine Learning. PMLR, 15696--15707","author":"Kandpal Nikhil","year":"2023","unstructured":"Nikhil Kandpal, Haikang Deng, Adam Roberts, Eric Wallace, and Colin Raffel. 2023. Large language models struggle to learn long-tail knowledge. In International Conference on Machine Learning. PMLR, 15696--15707."},{"key":"e_1_3_2_2_47_1","volume-title":"International conference on learning representations.","author":"Kato Masahiro","year":"2019","unstructured":"Masahiro Kato, Takeshi Teshima, and Junya Honda. 2019. Learning from positive and unlabeled data with a selection bias. In International conference on learning representations."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-020-05888-2"},{"key":"e_1_3_2_2_49_1","volume-title":"Advances in Neural Information Processing Systems","author":"Kharbanda Siddhant","year":"2074","unstructured":"Siddhant Kharbanda, Atmadeep Banerjee, Erik Schultheis, and Rohit Babbar. 2022. CascadeXML: Rethinking Transformers for End-to-end Multi-resolution Training in Extreme Multi-label Classification. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 2074--2087. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/0e0157ce5ea15831072be4744cbd5334-Paper-Conference.pdf"},{"key":"e_1_3_2_2_50_1","volume-title":"Gandalf: Learning label correlations in Extreme Multi-label Classification via Label Features. https:\/\/openreview.net\/forum?id=JuyFppXzh2","author":"Kharbanda Siddhant","year":"2024","unstructured":"Siddhant Kharbanda, Devaansh Gupta, Erik Schultheis, Atmadeep Banerjee, Vikas Verma, and Rohit Babbar. 2024. Gandalf: Learning label correlations in Extreme Multi-label Classification via Label Features. https:\/\/openreview.net\/forum?id=JuyFppXzh2"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645617"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.948"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592076"},{"key":"e_1_3_2_2_54_1","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Li Haoxuan","year":"2023","unstructured":"Haoxuan Li, Yanghao Xiao, Chunyuan Zheng, Peng Wu, and Peng Cui. 2023a. Propensity matters: measuring and enhancing balancing for recommendation. In Proceedings of the 40th International Conference on Machine Learning (Honolulu, Hawaii, USA) (ICML'23). JMLR.org, Article 831, 13 pages."},{"key":"e_1_3_2_2_55_1","volume-title":"The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=3VO1y5N7K1H","author":"Li Haoxuan","year":"2023","unstructured":"Haoxuan Li, Chunyuan Zheng, and Peng Wu. 2023b. StableDR: Stabilized Doubly Robust Learning for Recommendation on Data Missing Not at Random. In The Eleventh International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=3VO1y5N7K1H"},{"key":"e_1_3_2_2_56_1","unstructured":"Jeffrey Li Alex Fang Georgios Smyrnis Maor Ivgi Matt Jordan Samir Gadre Hritik Bansal Etash Guha Sedrick Keh Kushal Arora et al. 2024. DataComp-LM: In search of the next generation of training sets for language models. arXiv preprint arXiv:2406.11794 (2024)."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.647"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.220"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412747"},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657951"},{"key":"e_1_3_2_2_61_1","volume-title":"Rephrasing the web: A recipe for compute and data-efficient language modeling. arXiv preprint arXiv:2401.16380","author":"Maini Pratyush","year":"2024","unstructured":"Pratyush Maini, Skyler Seto, He Bai, David Grangier, Yizhe Zhang, and Navdeep Jaitly. 2024. Rephrasing the web: A recipe for compute and data-efficient language modeling. arXiv preprint arXiv:2401.16380 (2024)."},{"key":"e_1_3_2_2_62_1","volume-title":"Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs","author":"Malkov Yu A","year":"2018","unstructured":"Yu A Malkov and Dmitry A Yashunin. 2018. Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE transactions on pattern analysis and machine intelligence, Vol. 42, 4 (2018), 824--836."},{"key":"e_1_3_2_2_63_1","volume-title":"Collaborative filtering and the missing at random assumption. arXiv preprint arXiv:1206.5267","author":"Marlin Benjamin","year":"2012","unstructured":"Benjamin Marlin, Richard S Zemel, Sam Roweis, and Malcolm Slaney. 2012. Collaborative filtering and the missing at random assumption. arXiv preprint arXiv:1206.5267 (2012)."},{"key":"e_1_3_2_2_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/2783258.2783381"},{"key":"e_1_3_2_2_65_1","unstructured":"Tharun Kumar Reddy Medini Qixuan Huang Yiqiu Wang Vijai Mohan and Anshumali Shrivastava. 2019. Extreme Classification in Log Memory using Count-Min Sketch: A Case Study of Amazon Search with 50M Products. In Advances in Neural Information Processing Systems. 13244--13254."},{"key":"e_1_3_2_2_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441807"},{"key":"e_1_3_2_2_67_1","volume-title":"Sumeet Agarwal, Soumen Chakrabarti, Purushottam Kar, and Manik Varma.","author":"Mittal Anshul","year":"2024","unstructured":"Anshul Mittal, Shikhar Mohan, Deepak Saini, Suchith C. Prabhu, Jain jiao, Sumeet Agarwal, Soumen Chakrabarti, Purushottam Kar, and Manik Varma. 2024. Graph Regularized Encoder Training for Extreme Classification. arxiv: 2402.18434 [cs.LG] https:\/\/arxiv.org\/abs\/2402.18434"},{"key":"e_1_3_2_2_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449815"},{"key":"e_1_3_2_2_69_1","volume-title":"Forty-first International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=Cbacx90Wkt","author":"Mohan Shikhar","year":"2024","unstructured":"Shikhar Mohan, Deepak Saini, Anshul Mittal, Sayak Ray Chowdhury, Bhawna Paliwal, Jian Jiao, Manish Gupta, and Manik Varma. 2024. OAK: Enriching Document Representations using Auxiliary Knowledge for Extreme Classification. In Forty-first International Conference on Machine Learning. https:\/\/openreview.net\/forum?id=Cbacx90Wkt"},{"key":"e_1_3_2_2_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615459"},{"key":"e_1_3_2_2_71_1","unstructured":"M. Mohri A. Rostamizadeh and A. Talwalkar. 2012. Foundations of Machine Learning. MIT Press."},{"key":"e_1_3_2_2_72_1","volume-title":"Generative Representational Instruction Tuning. In ICLR 2024 Workshop: How Far Are We From AGI.","author":"Muennighoff Niklas","unstructured":"Niklas Muennighoff, SU Hongjin, Liang Wang, Nan Yang, Furu Wei, Tao Yu, Amanpreet Singh, and Douwe Kiela. [n.,d.]. Generative Representational Instruction Tuning. In ICLR 2024 Workshop: How Far Are We From AGI."},{"key":"e_1_3_2_2_73_1","volume-title":"Nouamane Tazi, Aleksandra Piktus, Sampo Pyysalo, Thomas Wolf, and Colin A Raffel.","author":"Muennighoff Niklas","year":"2024","unstructured":"Niklas Muennighoff, Alexander Rush, Boaz Barak, Teven Le Scao, Nouamane Tazi, Aleksandra Piktus, Sampo Pyysalo, Thomas Wolf, and Colin A Raffel. 2024. Scaling data-constrained language models. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159660"},{"key":"e_1_3_2_2_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185998"},{"key":"e_1_3_2_2_76_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623651"},{"key":"e_1_3_2_2_77_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450139"},{"key":"e_1_3_2_2_78_1","volume-title":"Daxiang Dong, Hua Wu, and Haifeng Wang.","author":"Qu Yingqi","year":"2020","unstructured":"Yingqi Qu, Yuchen Ding, Jing Liu, Kai Liu, Ruiyang Ren, Wayne Xin Zhao, Daxiang Dong, Hua Wu, and Haifeng Wang. 2020. RocketQA: An optimized training approach to dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2010.08191 (2020)."},{"key":"e_1_3_2_2_79_1","unstructured":"Jack W Rae Sebastian Borgeaud Trevor Cai Katie Millican Jordan Hoffmann Francis Song John Aslanides Sarah Henderson Roman Ring Susannah Young et al. 2021. Scaling language models: Methods analysis & insights from training gopher. arXiv preprint arXiv:2112.11446 (2021)."},{"key":"e_1_3_2_2_80_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657942"},{"key":"e_1_3_2_2_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657992"},{"key":"e_1_3_2_2_82_1","volume-title":"International conference on machine learning. PMLR, 8890--8901","author":"Rawat Ankit Singh","year":"2021","unstructured":"Ankit Singh Rawat, Aditya K Menon, Wittawat Jitkrittum, Sadeep Jayasumana, Felix Yu, Sashank Reddi, and Sanjiv Kumar. 2021. Disentangling sampling and labeling bias for learning in large-output spaces. In International conference on machine learning. PMLR, 8890--8901."},{"key":"e_1_3_2_2_83_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR","author":"Reddi Sashank J","year":"2019","unstructured":"Sashank J Reddi, Satyen Kale, Felix Yu, Daniel Holtmann-Rice, Jiecao Chen, and Sanjiv Kumar. 2019. Stochastic negative mining for learning with large output spaces. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1940--1949."},{"key":"e_1_3_2_2_84_1","volume-title":"CLASP: Few-Shot Cross-Lingual Data Augmentation for Semantic Parsing. In Proceedings of the 2nd Conference of the Asia-Pacific","author":"Rosenbaum Andy","year":"2022","unstructured":"Andy Rosenbaum, Saleh Soltan, Wael Hamza, Marco Damonte, Isabel Groves, and Amir Saffari. 2022a. CLASP: Few-Shot Cross-Lingual Data Augmentation for Semantic Parsing. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers). 444--462."},{"key":"e_1_3_2_2_85_1","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics. 218--241","author":"Rosenbaum Andy","year":"2022","unstructured":"Andy Rosenbaum, Saleh Soltan, Wael Hamza, Yannick Versley, and Markus Boese. 2022b. LINGUIST: Language Model Instruction Tuning to Generate Annotated Utterances for Intent Classification and Slot Tagging. In Proceedings of the 29th International Conference on Computational Linguistics. 218--241."},{"key":"e_1_3_2_2_86_1","volume-title":"Md Arafat Sultan, and Christopher Potts","author":"Saad-Falcon Jon","year":"2023","unstructured":"Jon Saad-Falcon, Omar Khattab, Keshav Santhanam, Radu Florian, Martin Franz, Salim Roukos, Avirup Sil, Md Arafat Sultan, and Christopher Potts. 2023. UDAPDR: Unsupervised Domain Adaptation via LLM Prompting and Distillation of Rerankers. arxiv: 2303.00807 [cs.IR]"},{"key":"e_1_3_2_2_87_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00564"},{"key":"e_1_3_2_2_88_1","doi-asserted-by":"publisher","DOI":"10.1145\/3336191.3371783"},{"key":"e_1_3_2_2_89_1","volume-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning-Volume 48","author":"Schnabel Tobias","year":"2016","unstructured":"Tobias Schnabel, Adith Swaminathan, Ashudeep Singh, Navin Chandak, and Thorsten Joachims. 2016a. Recommendations as treatments: debiasing learning and evaluation. In Proceedings of the 33rd International Conference on International Conference on Machine Learning-Volume 48. 1670--1679."},{"key":"e_1_3_2_2_90_1","unstructured":"Tobias Schnabel Adith Swaminathan Ashudeep Singh Navin Chandak and Thorsten Joachims. 2016b. Recommendations as treatments: debiasing learning and evaluation (ICML'16). JMLR.org 1670--1679."},{"key":"e_1_3_2_2_91_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539466"},{"key":"e_1_3_2_2_92_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657707"},{"key":"e_1_3_2_2_93_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_94_1","volume-title":"Query2doc: Query Expansion with Large Language Models. arXiv preprint arXiv:2303.07678","author":"Wang Liang","year":"2023","unstructured":"Liang Wang, Nan Yang, and Furu Wei. 2023. Query2doc: Query Expansion with Large Language Models. arXiv preprint arXiv:2303.07678 (2023)."},{"key":"e_1_3_2_2_95_1","volume-title":"International Conference on Machine Learning. PMLR, 6638--6647","author":"Wang Xiaojie","year":"2019","unstructured":"Xiaojie Wang, Rui Zhang, Yu Sun, and Jianzhong Qi. 2019. Doubly robust joint learning for recommendation on data missing not at random. In International Conference on Machine Learning. PMLR, 6638--6647."},{"key":"e_1_3_2_2_96_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441799"},{"key":"e_1_3_2_2_97_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467223"},{"key":"e_1_3_2_2_98_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.44"},{"key":"e_1_3_2_2_99_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463084"},{"key":"e_1_3_2_2_100_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Xie Sang Michael","year":"2024","unstructured":"Sang Michael Xie, Hieu Pham, Xuanyi Dong, Nan Du, Hanxiao Liu, Yifeng Lu, Percy S Liang, Quoc V Le, Tengyu Ma, and Adams Wei Yu. 2024. Doremi: Optimizing data mixtures speeds up language model pretraining. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_2_101_1","volume-title":"Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808","author":"Xiong Lee","year":"2020","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul Bennett, Junaid Ahmed, and Arnold Overwijk. 2020. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808 (2020)."},{"key":"e_1_3_2_2_102_1","volume-title":"International Conference on Machine Learning. PMLR, 10809--10819","author":"Ye Hui","year":"2020","unstructured":"Hui Ye, Zhiyu Chen, Da-Han Wang, and Brian Davison. 2020. Pretrained generalized autoregressive model with adaptive probabilistic label clusters for extreme multi-label text classification. In International Conference on Machine Learning. PMLR, 10809--10819."},{"key":"e_1_3_2_2_103_1","volume-title":"In Proceedings of NAACL.","author":"Kai Liu Ruiyang Ren Jing Liu","year":"2021","unstructured":"Jing Liu Kai Liu Ruiyang Ren Wayne Xin Zhao Daxiang Dong Hua Wu Yingqi Qu, Yuchen Ding and Haifeng Wang. 2021. RocketQA: An Optimized Training Approach to Dense Passage Retrieval for Open-Domain Question Answering. In In Proceedings of NAACL."},{"key":"e_1_3_2_2_104_1","volume-title":"Attentionxml: Label tree-based attention-aware deep model for high-performance extreme multi-label text classification. Advances in neural information processing systems","author":"You Ronghui","year":"2019","unstructured":"Ronghui You, Zihan Zhang, Ziye Wang, Suyang Dai, Hiroshi Mamitsuka, and Shanfeng Zhu. 2019. Attentionxml: Label tree-based attention-aware deep model for high-performance extreme multi-label text classification. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_105_1","first-page":"7267","article-title":"Fast multi-resolution transformer fine-tuning for extreme multi-label text classification","volume":"34","author":"Zhang Jiong","year":"2021","unstructured":"Jiong Zhang, Wei-Cheng Chang, Hsiang-Fu Yu, and Inderjit Dhillon. 2021. Fast multi-resolution transformer fine-tuning for extreme multi-label text classification. Advances in Neural Information Processing Systems, Vol. 34 (2021), 7267--7280.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_106_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380037"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709290","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709290","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:46:41Z","timestamp":1755359201000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709290"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":106,"alternative-id":["10.1145\/3690624.3709290","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709290","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}