{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T19:04:12Z","timestamp":1776452652453,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["347707"],"award-info":[{"award-number":["347707"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Poznan Supercomputing and Networking Center","award":["443"],"award-info":[{"award-number":["443"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539466","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:41Z","timestamp":1660331201000},"page":"1547-1557","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":21,"title":["On Missing Labels, Long-tails and Propensities in Extreme Multi-label Classification"],"prefix":"10.1145","author":[{"given":"Erik","family":"Schultheis","sequence":"first","affiliation":[{"name":"Aalto University, Helsinki, Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marek","family":"Wydmuch","sequence":"additional","affiliation":[{"name":"Poznan University of Technology, Poznan, Poland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rohit","family":"Babbar","sequence":"additional","affiliation":[{"name":"Aalto University, Helsinki, Finland"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Krzysztof","family":"Dembczynski","sequence":"additional","affiliation":[{"name":"Yahoo! Research &amp; Poznan University of Technology, New York, NY, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"crossref","unstructured":"Rahul Agrawal Archit Gupta Yashoteja Prabhu and Manik Varma. 2013. Multilabel learning with millions of labels: Recommending advertiser bid phrases for web pages. In WWW. 13--24.","DOI":"10.1145\/2488388.2488391"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Rohit Babbar and Bernhard Sch\u00f6lkopf. 2017. DiSMEC: Distributed Sparse Machines for Extreme Multi-label Classification. In WSDM. 721--729.","DOI":"10.1145\/3018661.3018741"},{"key":"e_1_3_2_2_3_1","volume-title":"Data scarcity, robustness and extreme multi-label classification. Machine Learning 108 (09","author":"Babbar Rohit","year":"2019","unstructured":"Rohit Babbar and Bernhard Sch\u00f6lkopf. 2019. Data scarcity, robustness and extreme multi-label classification. Machine Learning 108 (09 2019)."},{"key":"e_1_3_2_2_4_1","volume-title":"Shetty","author":"Bazaraa Mokhtar S.","year":"2006","unstructured":"Mokhtar S. Bazaraa, Hanif D. Sherali, and Chitharanjan M. Shetty. 2006. Nonlinear Programming: Theory and Algorithms. Wiley."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-020-05877-5"},{"key":"e_1_3_2_2_6_1","volume-title":"Strehl","author":"Beygelzimer Alina","year":"2009","unstructured":"Alina Beygelzimer, John Langford, Yury Lifshits, Gregory B. Sorkin, and Alexander L. Strehl. 2009. Conditional Probability Tree Estimation Analysis and Algorithms. In UAI. 51--58."},{"key":"e_1_3_2_2_7_1","unstructured":"Kush. Bhatia Kunal. Dahiya Himanshu Jain Anshul Mittal Yashoteja Prabhu and Manik Varma. 2016. The extreme classification repository: Multi-label datasets and code. http:\/\/manikvarma.org\/downloads\/XC\/XMLRepository.html"},{"key":"e_1_3_2_2_8_1","unstructured":"Kush Bhatia Himanshu Jain Purushottam Kar Manik Varma and Prateek Jain. 2015. Sparse Local Embeddings for Extreme Multi-label Classification. In NeurIPS. 730--738."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.5555\/1756006.1953028"},{"key":"e_1_3_2_2_10_1","volume-title":"Dhillon","author":"Chang Wei-Cheng","year":"2020","unstructured":"Wei-Cheng Chang, Hsiang-Fu Yu, Kai Zhong, Yiming Yang, and Inderjit S. Dhillon. 2020. Taming Pretrained Transformers for Extreme Multi-label Text Classification. In KDD. 3163--3171."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"crossref","unstructured":"Yin Cui Menglin Jia Tsung-Yi Lin Yang Song and Serge Belongie. 2019. Classbalanced loss based on effective number of samples. In CVPR. 9268--9277.","DOI":"10.1109\/CVPR.2019.00949"},{"key":"e_1_3_2_2_12_1","volume-title":"SiameseXML: Siamese Networks meet Extreme Classifiers with 100M Labels. In ICML. 2330--2340","author":"Dahiya Kunal","year":"2021","unstructured":"Kunal Dahiya, Ananye Agarwal, Deepak Saini, K Gururaj, Jian Jiao, Amit Singh, Sumeet Agarwal, Purushottam Kar, and Manik Varma. 2021. SiameseXML: Siamese Networks meet Extreme Classifiers with 100M Labels. In ICML. 2330--2340."},{"key":"e_1_3_2_2_13_1","unstructured":"Jia Deng Sanjeev Satheesh Alexander C. Berg and Fei-Fei Li. 2011. Fast and Balanced: Efficient Label Tree Learning for Large Scale Object Recognition. In NeurIPS. 567--575."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Charles Elkan and Keith Noto. 2008. Learning classifiers from only positive and unlabeled data. In KDD. 213--220.","DOI":"10.1145\/1401890.1401920"},{"key":"e_1_3_2_2_15_1","volume-title":"Learning from imbalanced data sets","author":"Fern\u00e1ndez Alberto","unstructured":"Alberto Fern\u00e1ndez, Salvador Garc\u00eda, Mikel Galar, Ronaldo C Prati, Bartosz Krawczyk, and Francisco Herrera. 2018. Learning from imbalanced data sets. Springer."},{"key":"e_1_3_2_2_16_1","unstructured":"Chuan Guo Ali Mousavi Xiang Wu Daniel N Holtmann-Rice Satyen Kale Sashank Reddi and Sanjiv Kumar. 2019. Breaking the Glass Ceiling for Embedding-Based Classifiers for Large Output Spaces. In NeurIPS."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3289600.3290979"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"Himanshu Jain Yashoteja Prabhu and Manik Varma. 2016. Extreme Multi-Label Loss Functions for Recommendation Tagging Ranking and Other Missing Label Applications. In KDD. 935--944.","DOI":"10.1145\/2939672.2939756"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Thorsten Joachims Adith Swaminathan and Tobias Schnabel. 2018. Unbiased Learning-to-Rank with Biased Feedback. In IJCAI. 5284--5288.","DOI":"10.24963\/ijcai.2018\/738"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-020-05888-2"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Julian McAuley Rahul Pandey and Jure Leskovec. 2015. Inferring Networks of Substitutable and Complementary Products. In KDD. 785--794.","DOI":"10.1145\/2783258.2783381"},{"key":"e_1_3_2_2_22_1","unstructured":"Tharun Kumar Reddy Medini Qixuan Huang Yiqiu Wang Vijai Mohan and Anshumali Shrivastava. 2019. Extreme Classification in Log Memory using Count-Min Sketch: A Case Study of Amazon Search with 50M Products. In NeurIPS. 13265--13275."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"crossref","unstructured":"Jorge J. Mor\u00e9. 1978. The Levenberg-Marquardt algorithm: Implementation and theory. In Numerical Analysis G. A. Watson (Ed.). Springer Berlin Heidelberg 105--116.","DOI":"10.1007\/BFb0067700"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/3122009.3242012"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Yashoteja Prabhu and Manik Varma. 2014. FastXML: a fast accurate and stable tree-classifier for extreme multi-label learning. In KDD. 263--272.","DOI":"10.1145\/2623330.2623651"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Mohammadreza Qaraei Erik Schultheis Priyanshu Gupta and Rohit Babbar. 2021. Convex Surrogates for Unbiased Loss Functions in Extreme Classification With Missing Labels. In WWW. 3711--3720.","DOI":"10.1145\/3442381.3450139"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390255"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1093\/jxb\/10.2.290"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"crossref","unstructured":"Yuta Saito Suguru Yaginuma Yuta Nishino Hayato Sakata and Kazuhide Nakata. 2020. Unbiased Recommender Learning from Missing-Not-At-Random Implicit Feedback. In WSDM. 501--509.","DOI":"10.1145\/3336191.3371783"},{"key":"e_1_3_2_2_30_1","volume-title":"Unbiased Loss Functions for Multilabel Classification with Missing Labels. CoRR abs\/2109.11282","author":"Schultheis Erik","year":"2021","unstructured":"Erik Schultheis and Rohit Babbar. 2021. Unbiased Loss Functions for Multilabel Classification with Missing Labels. CoRR abs\/2109.11282 (2021)."},{"key":"e_1_3_2_2_31_1","unstructured":"Shashank Singh and Justin Khim. 2021. Statistical Theory for Imbalanced Binary Classification. arXiv:2107.01777 [math.ST]"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"crossref","unstructured":"Yukihiro Tagami. 2017. AnnexML: Approximate Nearest Neighbor Search for Extreme Multi-label Classification. In KDD. 455--464.","DOI":"10.1145\/3097983.3097987"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"crossref","unstructured":"Pawel Teisseyre Jan Mielniczuk and Magorzataazcka. 2020. Different Strategies of Fitting Logistic Regression for Positive and Unlabelled Data. In ICCS. 3--17.","DOI":"10.1007\/978-3-030-50423-6_1"},{"key":"e_1_3_2_2_34_1","volume-title":"A Framework to Generate Synthetic Multi-label Datasets. Electronic Notes in Theoretical Computer Science 302 (02","author":"Tom\u00e1s Jimena","year":"2014","unstructured":"Jimena Tom\u00e1s, Newton Spola\u00f4r, Everton Cherman, and Maria-Carolina Monard. 2014. A Framework to Generate Synthetic Multi-label Datasets. Electronic Notes in Theoretical Computer Science 302 (02 2014), 155--176."},{"key":"e_1_3_2_2_35_1","first-page":"8501","article-title":"A theory of learning with corrupted labels","volume":"18","author":"Rooyen Brendan Van","year":"2017","unstructured":"Brendan Van Rooyen and Robert C. Williamson. 2017. A theory of learning with corrupted labels. Journal of Machine Learning Research 18, 1 (2017), 8501--8550.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_36_1","first-page":"2315","article-title":"Does Tail Label Help for Large-Scale Multi-Label Learning","volume":"31","author":"Yu-Feng Li TongWei","year":"2020","unstructured":"TongWei and Yu-Feng Li. 2020. Does Tail Label Help for Large-Scale Multi-Label Learning? IEEE Transactions on Neural Networks and Learning Systems 31, 7 (2020), 2315--2324.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"crossref","unstructured":"MarekWydmuch Kalina Jasinska-Kobus Rohit Babbar and Krzysztof Dembczynski. 2021. Propensity-Scored Probabilistic Label Trees. In SIGIR. 2252--2256.","DOI":"10.1145\/3404835.3463084"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Longqi Yang Yin Cui Yuan Xuan ChenyangWang Serge Belongie and Deborah Estrin. 2018. Unbiased Offline Recommender Evaluation for Missing-Not-at- Random Implicit Feedback. In RecSys. 279--287.","DOI":"10.1145\/3240323.3240355"},{"key":"e_1_3_2_2_39_1","volume-title":"Xing","author":"En-Hsu Yen Ian","year":"2017","unstructured":"Ian En-Hsu Yen, Xiangru Huang,Wei Dai, Pradeep Ravikumar, Inderjit S. Dhillon, and Eric P. Xing. 2017. PPDsparse: A Parallel Primal-Dual Sparse Method for Extreme Classification. In KDD. 545--553."},{"key":"e_1_3_2_2_40_1","unstructured":"Ronghui You Zihan Zhang Ziye Wang Suyang Dai Hiroshi Mamitsuka and Shanfeng Zhu. 2019. AttentionXML: Label Tree-based Attention-Aware Deep Model for High-Performance Extreme Multi-Label Text Classification. In NeurIPS. 5812--5822."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"crossref","unstructured":"Ziwei Zhu Yun He Yin Zhang and James Caverlee. 2020. Unbiased Implicit Recommendation and Propensity Estimation via Combinational Joint Learning. In RecSys. 551--556.","DOI":"10.1145\/3383313.3412210"},{"key":"e_1_3_2_2_42_1","unstructured":"Jingwei Zhuo Ziru Xu Wei Dai Han Zhu Han Li Jian Xu and Kun Gai. 2020. Learning Optimal Tree Models under Beam Search. In ICML. 11650--11659."}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Washington DC USA","acronym":"KDD '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539466","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539466","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:03:03Z","timestamp":1750186983000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539466"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":42,"alternative-id":["10.1145\/3534678.3539466","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539466","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}