{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T13:07:12Z","timestamp":1765544832156,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T00:00:00Z","timestamp":1644537600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NSF","award":["1750063"],"award-info":[{"award-number":["1750063"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,11]]},"DOI":"10.1145\/3488560.3498439","type":"proceedings-article","created":{"date-parts":[[2022,2,15]],"date-time":"2022-02-15T21:42:57Z","timestamp":1644961377000},"page":"842-850","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":11,"title":["On Sampling Collaborative Filtering Datasets"],"prefix":"10.1145","author":[{"given":"Noveen","family":"Sachdeva","sequence":"first","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, USA"}]},{"given":"Carole-Jean","family":"Wu","sequence":"additional","affiliation":[{"name":"Facebook AI Research, Cambridge, MA, USA"}]},{"given":"Julian","family":"McAuley","sequence":"additional","affiliation":[{"name":"University of California, San Diego, La Jolla, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,2,15]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Scaling Up Collaborative Filtering Data Sets through Randomized Fractal Expansions. arxiv","author":"Belletti Francois","year":"1905","unstructured":"Francois Belletti, Karthik Lakshmanan, Walid Krichene, Nicolas Mayoraz, Yi-Fan Chen, John Anderson, Taylor Robie, Tayo Oguntebi, Dan Shirron, and Amit Bleiwess. 2019. Scaling Up Collaborative Filtering Data Sets through Randomized Fractal Expansions. arxiv: 1905.09874 [cs.LG]"},{"key":"e_1_3_2_2_2_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Borsos Zal\u00e1n","year":"2020","unstructured":"Zal\u00e1n Borsos, Mojmir Mutny, and Andreas Krause. 2020. Coresets via Bilevel Optimization for Continual Learning and Streaming. In Advances in Neural Information Processing Systems, Vol. 33. Curran Associates, Inc."},{"volume-title":"On Target Item Sampling In Offline Recommender System Evaluation. In 14th ACM Conference on Recommender Systems .","author":"Castells R. Ca","unstructured":"R. Ca namares and P. Castells. 2020. On Target Item Sampling In Offline Recommender System Evaluation. In 14th ACM Conference on Recommender Systems .","key":"e_1_3_2_2_3_1"},{"volume-title":"XGBoost: A Scalable Tree Boosting System. In KDD '16 .","author":"Chen Tianqi","unstructured":"Tianqi Chen and Carlos Guestrin. [n.d.]. XGBoost: A Scalable Tree Boosting System. In KDD '16 .","key":"e_1_3_2_2_4_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_5_1","DOI":"10.1145\/3097983.3098202"},{"unstructured":"Junyoung Chung Caglar Gulcehre KyungHyun Cho and Yoshua Bengio. 2014. Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling. arxiv: 1412.3555 [cs.NE]","key":"e_1_3_2_2_6_1"},{"unstructured":"Cody Coleman Christopher Yeh Stephen Mussmann Baharan Mirzasoleiman Peter Bailis Percy Liang Jure Leskovec and Matei Zaharia. 2020. Selection via Proxy: Efficient Data Selection for Deep Learning. In ICLR .","key":"e_1_3_2_2_7_1"},{"volume-title":"Proceedings of the 13th ACM Conference on Recommender Systems (RecSys '19)","author":"Dacrema M.","unstructured":"M. Dacrema, P. Cremonesi, and D. Jannach. 2019. Are We Really Making Much Progress? A Worrying Analysis of Recent Neural Recommendation Approaches. In Proceedings of the 13th ACM Conference on Recommender Systems (RecSys '19).","key":"e_1_3_2_2_8_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_9_1","DOI":"10.1109\/CVPR.2009.5206848"},{"unstructured":"Xavier Glorot and Yoshua Bengio. 2010. Understanding the difficulty of training deep feedforward neural networks. In AISTATS .","key":"e_1_3_2_2_10_1"},{"volume-title":"Chasing Carbon: The Elusive Environmental Footprint of Computing. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA) .","author":"Gupta U.","unstructured":"U. Gupta, Y. Kim, S. Lee, J. Tse, H. S. Lee, G. Wei, D. Brooks, and C. Wu. 2021. Chasing Carbon: The Elusive Environmental Footprint of Computing. In 2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA) .","key":"e_1_3_2_2_11_1"},{"key":"e_1_3_2_2_12_1","volume-title":"The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis)","author":"Maxwell Harper F","year":"2015","unstructured":"F Maxwell Harper and Joseph A Konstan. 2015. The movielens datasets: History and context. Acm transactions on interactive intelligent systems (tiis) (2015)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_13_1","DOI":"10.1145\/3038912.3052569"},{"volume-title":"Proceedings of the 12th ACM Conference on Web Search and Data Mining .","author":"Jain H.","unstructured":"H. Jain, V. Balasubramanian, B. Chunduri, and M. Varma. 2019. Slice: Scalable Linear Extreme Classifiers Trained on 100 Million Labels for Related Searches. In Proceedings of the 12th ACM Conference on Web Search and Data Mining .","key":"e_1_3_2_2_14_1"},{"volume-title":"Proceedings of the SIGKDD Conference on Knowledge Discovery and Data Mining .","author":"Jain H.","unstructured":"H. Jain, Y. Prabhu, and M. Varma. 2016. Extreme Multi-label Loss Functions for Recommendation, Tagging, Ranking and Other Missing Label Applications. In Proceedings of the SIGKDD Conference on Knowledge Discovery and Data Mining .","key":"e_1_3_2_2_15_1"},{"volume-title":"Self-Attentive Sequential Recommendation. In 2018 IEEE International Conference on Data Mining .","author":"Kang W.","unstructured":"W. Kang and J. McAuley. 2018. Self-Attentive Sequential Recommendation. In 2018 IEEE International Conference on Data Mining .","key":"e_1_3_2_2_16_1"},{"volume-title":"Learning From Less Data: A Unified Data Subset Selection and Active Learning Framework for Computer Vision. In 2019 IEEE Winter Conference on Applications of Computer Vision (WACV) .","author":"Kaushal V.","unstructured":"V. Kaushal, R. Iyer, S. Kothawade, R. Mahadev, K. Doctor, and G. Ramakrishnan. 2019. Learning From Less Data: A Unified Data Subset Selection and Active Learning Framework for Computer Vision. In 2019 IEEE Winter Conference on Applications of Computer Vision (WACV) .","key":"e_1_3_2_2_17_1"},{"volume-title":"Auto-Encoding Variational Bayes. In 2nd International Conference on Learning Representations, ICLR 2014 .showeprint[arXiv]http:\/\/arxiv.org\/abs\/1312","author":"Diederik","unstructured":"Diederik P. Kingma and Max Welling. 2014. Auto-Encoding Variational Bayes. In 2nd International Conference on Learning Representations, ICLR 2014 .showeprint[arXiv]http:\/\/arxiv.org\/abs\/1312.6114v10 [stat.ML]","key":"e_1_3_2_2_18_1"},{"key":"e_1_3_2_2_19_1","volume-title":"Kipf and Max Welling","author":"Thomas","year":"2017","unstructured":"Thomas N. Kipf and Max Welling. 2017. Semi-Supervised Classification with Graph Convolutional Networks. In ICLR ."},{"key":"e_1_3_2_2_20_1","volume-title":"Computer","volume":"42","author":"Koren Yehuda","year":"2009","unstructured":"Yehuda Koren, Robert Bell, and Chris Volinsky. 2009. Matrix Factorization Techniques for Recommender Systems. Computer , Vol. 42, 8 (2009)."},{"key":"e_1_3_2_2_21_1","volume-title":"2021 IEEE International Conference on Acoustics, Speech and Signal Processing .","author":"Krause Andreas","year":"2021","unstructured":"Andreas Krause, Marco Tagliasacchi, and Zal\u00e1n Borsos. 2021. Semi-supervised batch active learning via bilevel optimization. In 2021 IEEE International Conference on Acoustics, Speech and Signal Processing ."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_22_1","DOI":"10.1145\/3394486.3403226"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_24_1","DOI":"10.1145\/1150402.1150479"},{"volume-title":"Proceedings of the 11th ACM SIGKDD Conference on Knowledge Discovery in Data Mining (KDD '05)","author":"Leskovec J.","unstructured":"J. Leskovec, J. Kleinberg, and C. Faloutsos. 2005. Graphs over Time: Densification Laws, Shrinking Diameters and Possible Explanations. In Proceedings of the 11th ACM SIGKDD Conference on Knowledge Discovery in Data Mining (KDD '05).","key":"e_1_3_2_2_25_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_26_1","DOI":"10.1145\/3178876.3186150"},{"volume-title":"Proceedings of Machine Learning and Systems .","author":"Mattson P.","unstructured":"P. Mattson, C. Cheng, G. Diamos, C. Coleman, P. Micikevicius, D. Patterson, H. Tang, G. Wei, P. Bailis, V. Bittorf, D. Brooks, D. Chen, D. Dutta, U. Gupta, K. Hazelwood, A. Hock, X. Huang, D. Kang, D. Kanter, N. Kumar, J. Liao, D. Narayanan, T. Oguntebi, G. Pekhimenko, L. Pentecost, Vijay Janapa, R., T. Robie, T. St John, C. Wu, L. Xu, C. Young, and M. Zaharia. 2020. MLPerf Training Benchmark. In Proceedings of Machine Learning and Systems .","key":"e_1_3_2_2_27_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_28_1","DOI":"10.1109\/ICDM.2012.110"},{"key":"e_1_3_2_2_29_1","volume-title":"Exploring Data Splitting Strategies for the Evaluation of Recommendation Models. In Fourteenth ACM Conference on Recommender Systems (RecSys '20)","author":"Meng Zaiqiao","year":"2020","unstructured":"Zaiqiao Meng, Richard McCreadie, Craig Macdonald, and Iadh Ounis. 2020. Exploring Data Splitting Strategies for the Evaluation of Recommendation Models. In Fourteenth ACM Conference on Recommender Systems (RecSys '20)."},{"key":"e_1_3_2_2_30_1","volume-title":"Prediction-based decisions and fairness: A catalogue of choices, assumptions, and definitions. arXiv preprint arXiv:1811.07867","author":"Mitchell Shira","year":"2018","unstructured":"Shira Mitchell, Eric Potash, Solon Barocas, Alexander D'Amour, and Kristian Lum. 2018. Prediction-based decisions and fairness: A catalogue of choices, assumptions, and definitions. arXiv preprint arXiv:1811.07867 (2018)."},{"volume-title":"Proceedings of The ACM International World Wide Web Conference .","author":"Mittal A.","unstructured":"A. Mittal, N. Sachdeva, S. Agrawal, S. Agarwal, P. Kar, and M. Varma. 2021. ECLARE: Extreme classification with label graph correlations. In Proceedings of The ACM International World Wide Web Conference .","key":"e_1_3_2_2_31_1"},{"key":"e_1_3_2_2_32_1","volume-title":"Proceedings of the International AAAI Conference on Web and Social Media","volume":"7","author":"Morstatter Fred","year":"2013","unstructured":"Fred Morstatter, J\u00fcrgen Pfeffer, Huan Liu, and Kathleen Carley. 2013. Is the Sample Good Enough? Comparing Data from Twitter's Streaming API with Twitter's Firehose. Proceedings of the International AAAI Conference on Web and Social Media , Vol. 7, 1 (Jun. 2013). https:\/\/ojs.aaai.org\/index.php\/ICWSM\/article\/view\/14401"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_33_1","DOI":"10.18653\/v1\/D19-1018"},{"unstructured":"S. Nowozin B. Cseke and R. Tomioka. 2016. f-GAN: Training Generative Neural Samplers using Variational Divergence Minimization. In NeurIPS .","key":"e_1_3_2_2_34_1"},{"unstructured":"L. Page S. Brin R. Motwani and T. Winograd. 1999. The PageRank citation ranking: Bringing order to the web. Technical Report. Stanford InfoLab.","key":"e_1_3_2_2_35_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_36_1","DOI":"10.1145\/775047.775059"},{"unstructured":"David Patterson Joseph Gonzalez Quoc Le Chen Liang Lluis-Miquel Munguia Daniel Rothchild David So Maud Texier and Jeff Dean. 2021. Carbon Emissions and Large Neural Network Training. arxiv: 2104.10350 [cs.LG]","key":"e_1_3_2_2_37_1"},{"key":"e_1_3_2_2_38_1","volume-title":"Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI '09)","author":"Rendle Steffen","year":"2009","unstructured":"Steffen Rendle, Christoph Freudenthaler, Zeno Gantner, and Lars Schmidt-Thieme. 2009. BPR: Bayesian Personalized Ranking from Implicit Feedback. In Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (UAI '09)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_39_1","DOI":"10.1145\/3289600.3291007"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_40_1","DOI":"10.1145\/3397271.3401281"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_41_1","DOI":"10.1145\/3394486.3403139"},{"volume-title":"Proceedings of The 33rd International Conference on Machine Learning .","author":"Schnabel T.","unstructured":"T. Schnabel, A. Swaminathan, A. Singh, N. Chandak, and T. Joachims. 2016. Recommendations as Treatments: Debiasing Learning and Evaluation. In Proceedings of The 33rd International Conference on Machine Learning .","key":"e_1_3_2_2_42_1"},{"key":"e_1_3_2_2_43_1","volume-title":"arxiv","author":"Schwartz Roy","year":"1907","unstructured":"Roy Schwartz, Jesse Dodge, Noah A. Smith, and Oren Etzioni. 2019. Green AI. arxiv: 1907.10597 [cs.CY]"},{"unstructured":"Ozan Sener and Silvio Savarese. 2018. Active Learning for Convolutional Neural Networks: A Core-Set Approach. In ICLR .","key":"e_1_3_2_2_44_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_45_1","DOI":"10.18653\/v1\/P19-1355"},{"unstructured":"Fan-Yun Sun Jordan Hoffman Vikas Verma and Jian Tang. 2020. InfoGraph: Unsupervised and Semi-supervised Graph-Level Representation Learning via Mutual Information Maximization. In ICLR .","key":"e_1_3_2_2_46_1"},{"unstructured":"M. Toneva A. Sordoni R. Combes A. Trischler Y. Bengio and G. Gordon. 2019. An Empirical Study of Example Forgetting during Deep Neural Network Learning. In ICLR .","key":"e_1_3_2_2_47_1"},{"unstructured":"A. Vaswani N. Shazeer N. Parmar J. Uszkoreit L. Jones A. Gomez \u0141. Kaiser and I. Polosukhin. 2017. Attention is All you Need. In NeurIPS .","key":"e_1_3_2_2_48_1"},{"volume-title":"Proceedings of the 12th ACM Conference on Recommender Systems .","author":"Wan M.","unstructured":"M. Wan and J. McAuley. 2018. Item Recommendation on Monotonic Behavior Chains. In Proceedings of the 12th ACM Conference on Recommender Systems .","key":"e_1_3_2_2_49_1"},{"key":"e_1_3_2_2_50_1","volume-title":"Sustainable AI: Environmental Implications, Challenges and Opportunities. arxiv: 2111.00364 [cs.LG]","author":"Wu C.","year":"2021","unstructured":"C. Wu, R. Raghavendra, U. Gupta, B. Acun, N. Ardalani, K. Maeng, G. Chang, F. A. Behram, J. Huang, C. Bai, M. Gschwind, A. Gupta, M. Ott, A. Melnikov, S. Candido, D. Brooks, G. Chauhan, B. Lee, H. S. Lee, B. Akyildiz, M. Balandat, J. Spisak, R. Jain, M. Rabbat, and K. Hazelwood. 2021. Sustainable AI: Environmental Implications, Challenges and Opportunities. arxiv: 2111.00364 [cs.LG]"},{"doi-asserted-by":"crossref","unstructured":"Amatriain X Jaimes A Oliver N and Pujol J.M. 2011. Data Mining Methods for Recommender Systems. In Recommender Systems Handbook . Springer.","key":"e_1_3_2_2_51_1","DOI":"10.1007\/978-0-387-85820-3_2"},{"volume-title":"Graph Convolutional Neural Networks for Web-Scale Recommender Systems. In KDD '18 .","author":"Ying R.","unstructured":"R. Ying, R. He, K. Chen, P. Eksombatchai, W. L. Hamilton, and J. Leskovec. 2018. Graph Convolutional Neural Networks for Web-Scale Recommender Systems. In KDD '18 .","key":"e_1_3_2_2_52_1"},{"doi-asserted-by":"crossref","unstructured":"Muhan Zhang Zhicheng Cui Marion Neumann and Yixin Chen. 2018. An End-to-End Deep Learning Architecture for Graph Classification. In AAAI .","key":"e_1_3_2_2_53_1","DOI":"10.1609\/aaai.v32i1.11782"},{"doi-asserted-by":"publisher","key":"e_1_3_2_2_54_1","DOI":"10.1145\/3383313.3412210"}],"event":{"sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"acronym":"WSDM '22","name":"WSDM '22: The Fifteenth ACM International Conference on Web Search and Data Mining","location":"Virtual Event AZ USA"},"container-title":["Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498439","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3488560.3498439","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488560.3498439","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3488560.3498439","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:51Z","timestamp":1750191531000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3488560.3498439"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,2,11]]},"references-count":53,"alternative-id":["10.1145\/3488560.3498439","10.1145\/3488560"],"URL":"https:\/\/doi.org\/10.1145\/3488560.3498439","relation":{},"subject":[],"published":{"date-parts":[[2022,2,11]]},"assertion":[{"value":"2022-02-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}