{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,7]],"date-time":"2026-07-07T15:36:01Z","timestamp":1783438561550,"version":"3.54.6"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","funder":[{"name":"Quantum Science and Technology-National Science and Technology Major Project &#x28;QNMP&#x29;","award":["2021ZD0302900"],"award-info":[{"award-number":["2021ZD0302900"]}]},{"name":"China National Natural Science Foundation","award":["62132018"],"award-info":[{"award-number":["62132018"]}]},{"name":"China National Natural Science Foundation","award":["62231015"],"award-info":[{"award-number":["62231015"]}]},{"name":"China National Natural Science Foundation","award":["62441228"],"award-info":[{"award-number":["62441228"]}]},{"name":"&#x5c;&quot;Pioneer&rdquo; and &ldquo;Leading Goose&rdquo; R&amp;D Program of Zhejiang","award":["2023C01029"],"award-info":[{"award-number":["2023C01029"]}]},{"name":"&#x5c;&quot;Pioneer&rdquo; and &ldquo;Leading Goose&rdquo; R&amp;D Program of Zhejiang","award":["2023C01143"],"award-info":[{"award-number":["2023C01143"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,2,22]]},"DOI":"10.1145\/3773966.3777937","type":"proceedings-article","created":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T17:50:01Z","timestamp":1771264201000},"page":"607-617","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Fast-DataShapley: Neural Modeling for Training Data Valuation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4980-6263","authenticated-orcid":false,"given":"Haifeng","family":"Sun","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Heifei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9656-6193","authenticated-orcid":false,"given":"Yu","family":"Xiong","sequence":"additional","affiliation":[{"name":"Fuxi AI Lab, NetEase Inc., Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6986-5825","authenticated-orcid":false,"given":"Runze","family":"Wu","sequence":"additional","affiliation":[{"name":"Fuxi AI Lab, NetEase Inc., Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-8274-0595","authenticated-orcid":false,"given":"Xinyu","family":"Cai","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Nanyang Avenue, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5420-0516","authenticated-orcid":false,"given":"Changjie","family":"Fan","sequence":"additional","affiliation":[{"name":"Fuxi AI Lab, NetEase Inc., Hangzhou, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1004-8588","authenticated-orcid":false,"given":"Lan","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Heifei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6070-6625","authenticated-orcid":false,"given":"Xiang-Yang","family":"Li","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Heifei, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,2,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Sample size planning for classification models. Analytica chimica acta","author":"Beleites Claudia","year":"2013","unstructured":"Claudia Beleites, Ute Neugebauer, Thomas Bocklitz, Christoph Krafft, and J\u00fcrgen Popp. 2013. Sample size planning for classification models. Analytica chimica acta, Vol. 760 (2013), 25-33."},{"key":"e_1_3_2_1_2_1","volume-title":"Evaluating and aggregating feature-based model explanations. arXiv preprint arXiv:2005.00631","author":"Bhatt Umang","year":"2020","unstructured":"Umang Bhatt, Adrian Weller, and Jos\u00e9 MF Moura. 2020. Evaluating and aggregating feature-based model explanations. arXiv preprint arXiv:2005.00631 (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445894"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1142\/S0219198909002431"},{"key":"e_1_3_2_1_5_1","volume-title":"Extremal principle solutions of games in characteristic function form: core, Chebychev and Shapley value generalizations. Econometrics of planning and efficiency","author":"Charnes A","year":"1988","unstructured":"A Charnes, B Golany, M Keane, and J Rousseau. 1988. Extremal principle solutions of games in characteristic function form: core, Chebychev and Shapley value generalizations. Econometrics of planning and efficiency (1988), 123-133."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16871"},{"key":"e_1_3_2_1_7_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 3457-3465","author":"Covert Ian","year":"2021","unstructured":"Ian Covert and Su-In Lee. 2021. Improving kernelshap: Practical shapley value estimation using linear regression. In International Conference on Artificial Intelligence and Statistics. PMLR, 3457-3465."},{"key":"e_1_3_2_1_8_1","first-page":"2881","article-title":"What neural networks memorize and why: Discovering the long tail via influence estimation","volume":"33","author":"Feldman Vitaly","year":"2020","unstructured":"Vitaly Feldman and Chiyuan Zhang. 2020. What neural networks memorize and why: Discovering the long tail via influence estimation. Advances in Neural Information Processing Systems, Vol. 33 (2020), 2881-2891.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_9_1","first-page":"554","volume-title":"Applications and Worksharing: 17th EAI International Conference, CollaborateCom 2021, Virtual Event, October 16-18, 2021, Proceedings, Part I 17","author":"Feng Zhenni","year":"2021","unstructured":"Zhenni Feng, Junchang Chen, and Tong Liu. 2021. An online truthful auction for IoT data trading with dynamic data owners. In Collaborative Computing: Networking, Applications and Worksharing: 17th EAI International Conference, CollaborateCom 2021, Virtual Event, October 16-18, 2021, Proceedings, Part I 17. Springer, 554-571."},{"key":"e_1_3_2_1_10_1","volume-title":"International conference on machine learning. PMLR, 2242-2251","author":"Ghorbani Amirata","year":"2019","unstructured":"Amirata Ghorbani and James Zou. 2019. Data shapley: Equitable valuation of data for machine learning. In International conference on machine learning. PMLR, 2242-2251."},{"key":"e_1_3_2_1_11_1","first-page":"45 45","volume-title":"Proceedings of the 8th International Conference on Operations Research and Enterprise Systems-Volume 1: ICORES","author":"Gim\u00e9nez Pradales Jos\u00e9 Miguel","year":"2019","unstructured":"Jos\u00e9 Miguel Gim\u00e9nez Pradales and Mar\u00eda Albina Puente del Campo. 2019. The Owen and the Owen-Banzhaf values applied to the study of the Madrid Assembly and the Andalusian Parliament in legislature 2015-2019. In Proceedings of the 8th International Conference on Operations Research and Enterprise Systems-Volume 1: ICORES, 45-52, 2019, Prague, Czech Republic. Scitepress, 45-52."},{"key":"e_1_3_2_1_12_1","volume-title":"Peter Hase, Mohit Bansal, and Caiming Xiong.","author":"Guo Han","year":"2020","unstructured":"Han Guo, Nazneen Fatema Rajani, Peter Hase, Mohit Bansal, and Caiming Xiong. 2020. Fastif: Scalable influence functions for efficient model interpretation and debugging. arXiv preprint arXiv:2012.15781 (2020)."},{"key":"e_1_3_2_1_13_1","volume-title":"Training data influence analysis and estimation: A survey. arXiv preprint arXiv:2212.04612","author":"Hammoudeh Zayd","year":"2022","unstructured":"Zayd Hammoudeh and Daniel Lowd. 2022. Training data influence analysis and estimation: A survey. arXiv preprint arXiv:2212.04612 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Hara Satoshi","year":"2019","unstructured":"Satoshi Hara, Atsushi Nitanda, and Takanori Maehara. 2019. Data cleansing for models trained with SGD. Advances in Neural Information Processing Systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_15_1","volume-title":"A benchmark for interpretability methods in deep neural networks. Advances in neural information processing systems","author":"Hooker Sara","year":"2019","unstructured":"Sara Hooker, Dumitru Erhan, Pieter-Jan Kindermans, and Been Kim. 2019. A benchmark for interpretability methods in deep neural networks. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_16_1","volume-title":"Neural tangent kernel: Convergence and generalization in neural networks. Advances in neural information processing systems","author":"Jacot Arthur","year":"2018","unstructured":"Arthur Jacot, Franck Gabriel, and Cl\u00e9ment Hongler. 2018. Neural tangent kernel: Convergence and generalization in neural networks. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_17_1","volume-title":"International Conference on Learning Representations.","author":"Jethani Neil","year":"2021","unstructured":"Neil Jethani, Mukund Sudarshan, Ian Connick Covert, Su-In Lee, and Rajesh Ranganath. 2021. Fastshap: Real-time shapley value estimation. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_18_1","volume-title":"Nezihe Merve Gurel, Bo Li, Ce Zhang, Costas J Spanos, and Dawn Song.","author":"Jia Ruoxi","year":"2019","unstructured":"Ruoxi Jia, David Dao, Boxin Wang, Frances Ann Hubis, Nezihe Merve Gurel, Bo Li, Ce Zhang, Costas J Spanos, and Dawn Song. 2019a. Efficient task-specific data valuation for nearest neighbor algorithms. arXiv preprint arXiv:1908.08619 (2019)."},{"key":"e_1_3_2_1_19_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1167-1176","author":"Jia Ruoxi","year":"2019","unstructured":"Ruoxi Jia, David Dao, Boxin Wang, Frances Ann Hubis, Nick Hynes, Nezihe Merve G\u00fcrel, Bo Li, Ce Zhang, Dawn Song, and Costas J Spanos. 2019b. Towards efficient data valuation based on the shapley value. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 1167-1176."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00814"},{"key":"e_1_3_2_1_21_1","volume-title":"Characterizing structural regularities of labeled data in overparameterized models. arXiv preprint arXiv:2002.03206","author":"Jiang Ziheng","year":"2020","unstructured":"Ziheng Jiang, Chiyuan Zhang, Kunal Talwar, and Michael C Mozer. 2020. Characterizing structural regularities of labeled data in overparameterized models. arXiv preprint arXiv:2002.03206 (2020)."},{"key":"e_1_3_2_1_22_1","volume-title":"groupShapley: Efficient prediction explanation with Shapley values for feature groups. arXiv preprint arXiv:2106.12228","author":"Jullum Martin","year":"2021","unstructured":"Martin Jullum, Annabelle Redelmeier, and Kjersti Aas. 2021. groupShapley: Efficient prediction explanation with Shapley values for feature groups. arXiv preprint arXiv:2106.12228 (2021)."},{"key":"e_1_3_2_1_23_1","first-page":"583","volume-title":"Nature","volume":"596","author":"Jumper John","year":"2021","unstructured":"John Jumper, Richard Evans, Alexander Pritzel, Tim Green, Michael Figurnov, Olaf Ronneberger, Kathryn Tunyasuvunakool, Russ Bates, Augustin \u017d\u00eddek, Anna Potapenko, et al., 2021. Highly accurate protein structure prediction with AlphaFold. Nature, Vol. 596, 7873 (2021), 583-589."},{"key":"e_1_3_2_1_24_1","volume-title":"International Conference on Machine Learning. PMLR, 10697-10707","author":"Kandpal Nikhil","year":"2022","unstructured":"Nikhil Kandpal, Eric Wallace, and Colin Raffel. 2022. Deduplicating training data mitigates privacy risks in language models. In International Conference on Machine Learning. PMLR, 10697-10707."},{"key":"e_1_3_2_1_25_1","volume-title":"International conference on machine learning. PMLR","author":"Koh Pang Wei","year":"2017","unstructured":"Pang Wei Koh and Percy Liang. 2017. Understanding black-box predictions via influence functions. In International conference on machine learning. PMLR, 1885-1894."},{"key":"e_1_3_2_1_26_1","unstructured":"Alex Krizhevsky Geoffrey Hinton et al. 2009. Learning multiple layers of features from tiny images. (2009)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/5.726791"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Robert F Ling. 1984. Residuals and influence in regression.","DOI":"10.2307\/1269506"},{"key":"e_1_3_2_1_29_1","volume-title":"A unified approach to interpreting model predictions. Advances in neural information processing systems","author":"Lundberg Scott M","year":"2017","unstructured":"Scott M Lundberg and Su-In Lee. 2017. A unified approach to interpreting model predictions. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_12"},{"key":"e_1_3_2_1_31_1","first-page":"19920","article-title":"Estimating training data influence by tracing gradient descent","volume":"33","author":"Pruthi Garima","year":"2020","unstructured":"Garima Pruthi, Frederick Liu, Satyen Kale, and Mukund Sundararajan. 2020. Estimating training data influence by tracing gradient descent. Advances in Neural Information Processing Systems, Vol. 33 (2020), 19920-19930.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","volume-title":"Data engineering for everyone. arXiv preprint arXiv:2102.11447","author":"Reddi Vijay Janapa","year":"2021","unstructured":"Vijay Janapa Reddi, Greg Diamos, Pete Warden, Peter Mattson, and David Kanter. 2021. Data engineering for everyone. arXiv preprint arXiv:2102.11447 (2021)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1006\/game.1997.0622"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i8.20791"},{"key":"e_1_3_2_1_35_1","first-page":"34574","article-title":"CS-Shapley: Class-wise Shapley Values for Data Valuation in Classification","volume":"35","author":"Schoch Stephanie","year":"2022","unstructured":"Stephanie Schoch, Haifeng Xu, and Yangfeng Ji. 2022. CS-Shapley: Class-wise Shapley Values for Data Valuation in Classification. Advances in Neural Information Processing Systems, Vol. 35 (2022), 34574-34585.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Lloyd S Shapley et al. 1953. A value for n-person games. Contributions to the Theory ofGames 2(28):307\u2013 317 (1953).","DOI":"10.1515\/9781400881970-018"},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Machine Learning. PMLR, 4577-4585","author":"Sharchilev Boris","year":"2018","unstructured":"Boris Sharchilev, Yury Ustinovskiy, Pavel Serdyukov, and Maarten Rijke. 2018. Finding influential training samples for gradient boosted decision trees. In International Conference on Machine Learning. PMLR, 4577-4585."},{"key":"e_1_3_2_1_38_1","volume-title":"Representer point selection via local jacobian expansion for post-hoc classifier explanation of deep neural networks and ensemble models. Advances in neural information processing systems","author":"Sui Yi","year":"2021","unstructured":"Yi Sui, Ga Wu, and Scott Sanner. 2021. Representer point selection via local jacobian expansion for post-hoc classifier explanation of deep neural networks and ensemble models. Advances in neural information processing systems, Vol. 34 (2021), 23347-23358."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00931"},{"key":"e_1_3_2_1_40_1","volume-title":"Proc. ICML.","author":"Wang Jingtan","year":"2024","unstructured":"Jingtan Wang, Xiaoqiang Lin, Rui Qiao, Chuan-Sheng Foo, and Bryan Kian Hsiang Low. 2024. Helpful or Harmful Data? Fine-tuning-free Shapley Attribution for Explaining Language Model Predictions. In Proc. ICML."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2016.7841024"},{"key":"e_1_3_2_1_42_1","volume-title":"International Conference on Machine Learning. PMLR, 24150-24176","author":"Wu Zhaoxuan","year":"2022","unstructured":"Zhaoxuan Wu, Yao Shu, and Bryan Kian Hsiang Low. 2022. Davinz: Data valuation using deep neural networks at initialization. In International Conference on Machine Learning. PMLR, 24150-24176."},{"key":"e_1_3_2_1_43_1","first-page":"223","volume-title":"Nature","volume":"602","author":"Wurman Peter R","year":"2022","unstructured":"Peter R Wurman, Samuel Barrett, Kenta Kawamoto, James MacGlashan, Kaushik Subramanian, Thomas J Walsh, Roberto Capobianco, Alisa Devlic, Franziska Eckert, Florian Fuchs, et al., 2022. Outracing champion Gran Turismo drivers with deep reinforcement learning. Nature, Vol. 602, 7896 (2022), 223-228."},{"key":"e_1_3_2_1_44_1","unstructured":"Shuangshuang Xue and Xiang-Yang Li. 2022. Competitive Online Truthful Time-Sensitive-Valued Data Auction. arXiv:2210.10945 [cs.GT]"},{"key":"e_1_3_2_1_45_1","volume-title":"Ian En-Hsu Yen, and Pradeep K Ravikumar","author":"Yeh Chih-Kuan","year":"2018","unstructured":"Chih-Kuan Yeh, Joon Kim, Ian En-Hsu Yen, and Pradeep K Ravikumar. 2018. Representer point selection for explaining deep neural networks. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_46_1","first-page":"32285","article-title":"First is Better Than Last for Language Data Influence","volume":"35","author":"Yeh Chih-Kuan","year":"2022","unstructured":"Chih-Kuan Yeh, Ankur Taly, Mukund Sundararajan, Frederick Liu, and Pradeep Ravikumar. 2022. First is Better Than Last for Language Data Influence. Advances in Neural Information Processing Systems, Vol. 35 (2022), 32285-32298.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"WSDM '26:The Nineteenth ACM International Conference on Web Search and Data Mining","location":"Boise ID USA","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the Nineteenth ACM International Conference on Web Search and Data Mining"],"original-title":[],"deposited":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T17:55:18Z","timestamp":1771264518000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3773966.3777937"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,21]]},"references-count":46,"alternative-id":["10.1145\/3773966.3777937","10.1145\/3773966"],"URL":"https:\/\/doi.org\/10.1145\/3773966.3777937","relation":{},"subject":[],"published":{"date-parts":[[2026,2,21]]},"assertion":[{"value":"2026-02-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}