{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T16:42:24Z","timestamp":1774456944872,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Academy of Finland - Research Council of Finland","award":["348215"],"award-info":[{"award-number":["348215"]}]},{"name":"Academy of Finland - Research Council of Finland","award":["347707"],"award-info":[{"award-number":["347707"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714704","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:47:11Z","timestamp":1745362031000},"page":"4124-4133","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["<scp>UniDEC<\/scp>\n            : Unified Dual Encoder and Classifier Training for Extreme Multi-Label Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-6847-5836","authenticated-orcid":false,"given":"Siddhant","family":"Kharbanda","sequence":"first","affiliation":[{"name":"University of California, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3007-0109","authenticated-orcid":false,"given":"Devaansh","family":"Gupta","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, CA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9235-4815","authenticated-orcid":false,"given":"Gururaj","family":"K","sequence":"additional","affiliation":[{"name":"Microsoft, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4486-2879","authenticated-orcid":false,"given":"Pankaj","family":"Malhotra","sequence":"additional","affiliation":[{"name":"Microsoft, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0669-5283","authenticated-orcid":false,"given":"Amit","family":"Singh","sequence":"additional","affiliation":[{"name":"Microsoft, Bengaluru, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3520-9627","authenticated-orcid":false,"given":"Cho-Jui","family":"Hsieh","sequence":"additional","affiliation":[{"name":"University of California, Los Angeles, CA, India"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3787-8971","authenticated-orcid":false,"given":"Rohit","family":"Babbar","sequence":"additional","affiliation":[{"name":"University of Bath, Bath, United Kingdom and Aalto University, Espoo, Finland"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"issue":"1","key":"e_1_3_2_1_1_1","first-page":"143","article-title":"Zipf's law and the internet","volume":"3","author":"Adamic L. A.","year":"2002","unstructured":"L. A. Adamic and B. A. Huberman. Zipf's law and the internet. Glottometrics, 3(1):143--150, 2002.","journal-title":"Glottometrics"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3018661.3018741"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05791-5"},{"key":"e_1_3_2_1_4_1","volume-title":"The extreme classification repository: Multi-label datasets and code","author":"Bhatia K.","year":"2016","unstructured":"K. Bhatia, K. Dahiya, H. Jain, A. Mittal, Y. Prabhu, and M. Varma. The extreme classification repository: Multi-label datasets and code, 2016."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403368"},{"key":"e_1_3_2_1_6_1","first-page":"2330","volume-title":"International Conference on Machine Learning","author":"Dahiya K.","year":"2021","unstructured":"K. Dahiya, A. Agarwal, D. Saini, K. Gururaj, J. Jiao, A. Singh, S. Agarwal, P. Kar, and M. Varma. Siamesexml: Siamese networks meet extreme classifiers with 100m labels. In International Conference on Machine Learning, pages 2330--2340. PMLR, 2021."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570392"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441810"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599301"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611975673.32"},{"key":"e_1_3_2_1_11_1","volume-title":"Supervised contrastive learning for pre-trained language model fine-tuning","author":"Gunel B.","year":"2021","unstructured":"B. Gunel, J. Du, A. Conneau, and V. Stoyanov. Supervised contrastive learning for pre-trained language model fine-tuning, 2021."},{"key":"e_1_3_2_1_12_1","volume-title":"The Twelfth International Conference on Learning Representations","author":"Gupta N.","year":"2024","unstructured":"N. Gupta, F. Devvrit, A. S. Rawat, S. Bhojanapalli, P. Jain, and I. S. Dhillon. Dualencoders for extreme multi-label classification. In The Twelfth International Conference on Learning Representations, 2024."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2939672.2939756"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of Machine Learning and Systems","author":"Jain V.","year":"2023","unstructured":"V. Jain, J. Prakash, D. Saini, J. Jiao, R. Ramjee, and M. Varma. Renee: End-to-end training of extreme classification models. Proceedings of Machine Learning and Systems, 2023."},{"key":"e_1_3_2_1_15_1","volume-title":"ICML","author":"Jasinska K.","year":"2016","unstructured":"K. Jasinska, K. Dembczynski, R. Busa-Fekete, K. Pfannschmidt, T. Klerx, and E. Hullermeier. Extreme F-measure Maximization using Sparse Probability Estimates. In ICML, June 2016."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16974"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-020-05888-2"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591699"},{"key":"e_1_3_2_1_20_1","first-page":"2074","article-title":"Cascadexml: Rethinking transformers for end-to-end multi-resolution training in extreme multi-label classification","volume":"35","author":"Kharbanda S.","year":"2022","unstructured":"S. Kharbanda, A. Banerjee, E. Schultheis, and R. Babbar. Cascadexml: Rethinking transformers for end-to-end multi-resolution training in extreme multi-label classification. Advances in Neural Information Processing Systems, 35:2074--2087, 2022.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_21_1","volume-title":"Gandalf : Data augmentation is all you need for extreme classification","author":"Kharbanda S.","year":"2023","unstructured":"S. Kharbanda, D. Gupta, E. Schultheis, A. Banerjee, V. Verma, and R. Babbar. Gandalf : Data augmentation is all you need for extreme classification, 2023."},{"key":"e_1_3_2_1_22_1","volume-title":"Supervised contrastive learning. Advances in neural information processing systems, 33:18661--18673","author":"Khosla P.","year":"2020","unstructured":"P. Khosla, P. Teterwak, C. Wang, A. Sarna, Y. Tian, P. Isola, A. Maschinot, C. Liu, and D. Krishnan. Supervised contrastive learning. Advances in neural information processing systems, 33:18661--18673, 2020."},{"key":"e_1_3_2_1_23_1","first-page":"32","article-title":"Multilabel reductions: what is my loss optimising?","author":"Menon A. K.","year":"2019","unstructured":"A. K. Menon, A. S. Rawat, S. Reddi, and S. Kumar. Multilabel reductions: what is my loss optimising? Advances in Neural Information Processing Systems, 32, 2019.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441807"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449815"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3159652.3159660"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185998"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450139"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.466"},{"key":"e_1_3_2_1_30_1","volume-title":"International Conference on Machine Learning","author":"Radford A.","year":"2021","unstructured":"A. Radford, J. W. Kim, C. Hallacy, A. Ramesh, G. Goh, S. Agarwal, G. Sastry, A. Askell, P. Mishkin, J. Clark, G. Krueger, and I. Sutskever. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning, 2021."},{"key":"e_1_3_2_1_31_1","volume-title":"CoRR","author":"Reddi S. J.","year":"2018","unstructured":"S. J. Reddi, S. Kale, F. Yu, D. N. H. Rice, J. Chen, and S. Kumar. Stochastic Negative Mining for Learning with Large Output Spaces. CoRR, 2018."},{"key":"e_1_3_2_1_32_1","volume-title":"Speeding-up one-vs-all training for extreme classification via smart initialization. arXiv preprint arXiv:2109.13122","author":"Schultheis E.","year":"2021","unstructured":"E. Schultheis and R. Babbar. Speeding-up one-vs-all training for extreme classification via smart initialization. arXiv preprint arXiv:2109.13122, 2021."},{"key":"e_1_3_2_1_33_1","volume-title":"Unbiased loss functions for multilabel classification with missing labels. arXiv preprint arXiv:2109.11282","author":"Schultheis E.","year":"2021","unstructured":"E. Schultheis and R. Babbar. Unbiased loss functions for multilabel classification with missing labels. arXiv preprint arXiv:2109.11282, 2021."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539466"},{"key":"e_1_3_2_1_35_1","volume-title":"NIPS","author":"Wydmuch M.","year":"2018","unstructured":"M. Wydmuch, K. Jasinska, M. Kuznetsov, R. Busa-Fekete, and K. Dembczynski. A no-regret generalization of hierarchical softmax to extreme multi-label classification. In NIPS, 2018."},{"key":"e_1_3_2_1_36_1","first-page":"2252","volume-title":"SIGIR","author":"Wydmuch M.","year":"2021","unstructured":"M. Wydmuch, K. Jasinska-Kobus, R. Babbar, and K. Dembczynski. Propensityscored probabilistic label trees. In SIGIR, pages 2252--2256, 2021."},{"key":"e_1_3_2_1_37_1","volume-title":"Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808","author":"Xiong L.","year":"2020","unstructured":"L. Xiong, C. Xiong, Y. Li, K.-F. Tang, J. Liu, P. Bennett, J. Ahmed, and A. Overwijk. Approximate nearest neighbor negative contrastive learning for dense text retrieval. arXiv preprint arXiv:2007.00808, 2020."},{"key":"e_1_3_2_1_38_1","volume-title":"NeurIPS","author":"You R.","year":"2019","unstructured":"R. You, Z. Zhang, Z.Wang, S. Dai, H. Mamitsuka, and S. Zhu. Attentionxml: Label tree-based attention-aware deep model for high-performance extreme multi-label text classification. In NeurIPS, 2019."},{"key":"e_1_3_2_1_39_1","first-page":"7267","article-title":"Fast multi-resolution transformer fine-tuning for extreme multi-label text classification","volume":"34","author":"Zhang J.","year":"2021","unstructured":"J. Zhang, W.-C. Chang, H.-F. Yu, and I. Dhillon. Fast multi-resolution transformer fine-tuning for extreme multi-label text classification. Advances in Neural Information Processing Systems, 34:7267--7280, 2021.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","location":"Sydney NSW Australia","acronym":"WWW '25","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714704","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714704","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:57Z","timestamp":1750295937000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714704"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":39,"alternative-id":["10.1145\/3696410.3714704","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714704","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}