{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T05:46:53Z","timestamp":1777873613514,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737056","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:07:39Z","timestamp":1754255259000},"page":"1891-1902","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["MOTTO: A Mixture-of-Experts Framework for Multi-Treatment, Multi-Outcome Treatment Effect Estimation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6872-0106","authenticated-orcid":false,"given":"Yiling","family":"Liu","sequence":"first","affiliation":[{"name":"Duke University, Durham, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9106-1092","authenticated-orcid":false,"given":"Wei","family":"Shi","sequence":"additional","affiliation":[{"name":"AI at Meta, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6510-8249","authenticated-orcid":false,"given":"Chen","family":"Fu","sequence":"additional","affiliation":[{"name":"AI at Meta, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7096-4612","authenticated-orcid":false,"given":"Ziyang","family":"Jiang","sequence":"additional","affiliation":[{"name":"AI at Meta, Menlo Park, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2045-0701","authenticated-orcid":false,"given":"Zhigang","family":"Hua","sequence":"additional","affiliation":[{"name":"AI at Meta, Sunnyvale, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1005-6385","authenticated-orcid":false,"given":"David","family":"Carlson","sequence":"additional","affiliation":[{"name":"Duke University, Durham, NC, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR","author":"Assaad Serge","year":"2021","unstructured":"Serge Assaad, Shuxi Zeng, Chenyang Tao, Shounak Datta, Nikhil Mehta, Ricardo Henao, Fan Li, and Lawrence Carin. 2021. Counterfactual representation learning with balancing weights. In International Conference on Artificial Intelligence and Statistics. PMLR, 1972-1980."},{"key":"e_1_3_2_2_2_1","volume-title":"Estimating treatment effects with causal forests: An application. Observational studies","author":"Athey Susan","year":"2019","unstructured":"Susan Athey and Stefan Wager. 2019. Estimating treatment effects with causal forests: An application. Observational studies, Vol. 5, 2 (2019), 37-51."},{"key":"e_1_3_2_2_3_1","volume-title":"Deepseekmoe: Towards ultimate expert specialization in mixture-of-experts language models. arXiv preprint arXiv:2401.06066(2024).","author":"Dai Damai","year":"2024","unstructured":"Damai Dai, Chengqi Deng, Chenggang Zhao, RX Xu, Huazuo Gao, Deli Chen, Jiashi Li, Wangding Zeng, Xingkai Yu, Y Wu, et al., 2024. Deepseekmoe: Towards ultimate expert specialization in mixture-of-experts language models. arXiv preprint arXiv:2401.06066(2024)."},{"key":"e_1_3_2_2_4_1","volume-title":"The 2019 ACM SIGKDD Workshop on Causal Discovery. PMLR, 34-49","author":"Du Shuyang","year":"2019","unstructured":"Shuyang Du, James Lee, and Farzin Ghaffarizadeh. 2019. Improve user retention with causal learning. In The 2019 ACM SIGKDD Workshop on Causal Discovery. PMLR, 34-49."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-02902-1"},{"key":"e_1_3_2_2_6_1","volume-title":"International Conference on Learning Representations.","author":"Hassanpour Negar","year":"2019","unstructured":"Negar Hassanpour and Russell Greiner. 2019. Learning disentangled representations for counterfactual regression. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1198\/jcgs.2010.08162"},{"key":"e_1_3_2_2_8_1","volume-title":"Thomas PA Debray, and Johannes B Reitsma.","author":"Hoogland Jeroen","year":"2021","unstructured":"Jeroen Hoogland, Joanna IntHout, Michail Belias, Maroeska M Rovers, Richard D Riley, Frank E. Harrell Jr, Karel GM Moons, Thomas PA Debray, and Johannes B Reitsma. 2021. A tutorial on individualized treatment effect prediction from randomized trials with a binary endpoint. Statistics in medicine, Vol. 40, 26 (2021), 5961-5981."},{"key":"e_1_3_2_2_9_1","first-page":"226","volume-title":"Entire Chain Uplift Modeling with Context-Enhanced Learning for Intelligent Marketing. In Companion Proceedings of the ACM on Web Conference","author":"Huang Yinqiu","year":"2024","unstructured":"Yinqiu Huang, Shuli Wang, Min Gao, Xue Wei, Changhao Li, Chuan Luo, Yinhua Zhu, Xiong Xiao, and Yi Luo. 2024. Entire Chain Uplift Modeling with Context-Enhanced Learning for Intelligent Marketing. In Companion Proceedings of the ACM on Web Conference 2024. 226-234."},{"key":"e_1_3_2_2_10_1","volume-title":"Adaptive mixtures of local experts. Neural computation","author":"Jacobs Robert A","year":"1991","unstructured":"Robert A Jacobs, Michael I Jordan, Steven J Nowlan, and Geoffrey E Hinton. 1991. Adaptive mixtures of local experts. Neural computation, Vol. 3, 1 (1991), 79-87."},{"key":"e_1_3_2_2_11_1","volume-title":"Diego de las Casas, Emma Bou Hanna, Florian Bressand, et al.","author":"Jiang Albert Q","year":"2024","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Antoine Roux, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Emma Bou Hanna, Florian Bressand, et al., 2024. Mixtral of experts. arXiv preprint arXiv:2401.04088(2024)."},{"key":"e_1_3_2_2_12_1","volume-title":"International Conference on Machine Learning. PMLR, 15023-15040","author":"Jiang Ziyang","year":"2023","unstructured":"Ziyang Jiang, Zhuoran Hou, Yiling Liu, Yiman Ren, Keyu Li, and David Carlson. 2023. Estimating causal effects using a multi-task deep ensemble. In International Conference on Machine Learning. PMLR, 15023-15040."},{"key":"e_1_3_2_2_13_1","volume-title":"International conference on machine learning. PMLR, 3020-3029","author":"Johansson Fredrik","year":"2016","unstructured":"Fredrik Johansson, Uri Shalit, and David Sontag. 2016. Learning representations for counterfactual inference. In International conference on machine learning. PMLR, 3020-3029."},{"key":"e_1_3_2_2_14_1","first-page":"1","article-title":"Generalization bounds and representation learning for estimation of potential outcomes and causal effects","volume":"23","author":"Johansson Fredrik D","year":"2022","unstructured":"Fredrik D Johansson, Uri Shalit, Nathan Kallus, and David Sontag. 2022. Generalization bounds and representation learning for estimation of potential outcomes and causal effects. Journal of Machine Learning Research, Vol. 23, 166 (2022), 1-50.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_15_1","volume-title":"The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 527-536","author":"Johansson Fredrik D","year":"2019","unstructured":"Fredrik D Johansson, David Sontag, and Rajesh Ranganath. 2019. Support and invertibility in domain-invariant representations. In The 22nd International Conference on Artificial Intelligence and Statistics. PMLR, 527-536."},{"key":"e_1_3_2_2_16_1","volume-title":"Hierarchical mixtures of experts and the EM algorithm. Neural computation","author":"Jordan Michael I","year":"1994","unstructured":"Michael I Jordan and Robert A Jacobs. 1994. Hierarchical mixtures of experts and the EM algorithm. Neural computation, Vol. 6, 2 (1994), 181-214."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599820"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1080\/10618600.2017.1356325"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1093\/biomet\/70.1.41"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1198\/016214504000001880"},{"key":"e_1_3_2_2_22_1","volume-title":"International conference on machine learning. PMLR, 3076-3085","author":"Shalit Uri","year":"2017","unstructured":"Uri Shalit, Fredrik D Johansson, and David Sontag. 2017. Estimating individual treatment effect: generalization bounds and algorithms. In International conference on machine learning. PMLR, 3076-3085."},{"key":"e_1_3_2_2_23_1","unstructured":"Noam Shazeer Azalia Mirhoseini Krzysztof Maziarz Andy Davis Quoc Le Geoffrey Hinton and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538(2017)."},{"key":"e_1_3_2_2_24_1","volume-title":"Adapting neural networks for the estimation of treatment effects. Advances in neural information processing systems","author":"Shi Claudia","year":"2019","unstructured":"Claudia Shi, David Blei, and Victor Veitch. 2019. Adapting neural networks for the estimation of treatment effects. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_25_1","volume-title":"ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 5065-5069","author":"Sun Zexu","year":"2024","unstructured":"Zexu Sun and Xu Chen. 2024. M 3 TN: Multi-Gate Mixture-of-Experts Based Multi-Valued Treatment Network for Uplift Modeling. In ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 5065-5069."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_2_27_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288(2023)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450078"},{"key":"e_1_3_2_2_29_1","unstructured":"Yuxiang Wei Zhaoxin Qiu Yingjie Li Yuke Sun and Xiaoling Li. 2024. Multi-Treatment Multi-Task Uplift Modeling for Enhancing User Growth. arXiv preprint arXiv:2408.12803(2024)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3714430"},{"key":"e_1_3_2_2_31_1","volume-title":"Representation learning for treatment effect estimation from observational data. Advances in neural information processing systems","author":"Yao Liuyi","year":"2018","unstructured":"Liuyi Yao, Sheng Li, Yaliang Li, Mengdi Huai, Jing Gao, and Aidong Zhang. 2018. Representation learning for treatment effect estimation from observational data. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_2_32_1","volume-title":"International Conference on Artificial Intelligence and Statistics. PMLR, 1005-1014","author":"Zhang Yao","year":"2020","unstructured":"Yao Zhang, Alexis Bellot, and Mihaela Schaar. 2020. Learning overlapping representations for the estimation of individualized treatment effects. In International Conference on Artificial Intelligence and Statistics. PMLR, 1005-1014."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/DSAA.2019.00057"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:06:03Z","timestamp":1777572363000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737056"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":33,"alternative-id":["10.1145\/3711896.3737056","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737056","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}