{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:04:22Z","timestamp":1777655062098,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China","award":["62102177"],"award-info":[{"award-number":["62102177"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20210181"],"award-info":[{"award-number":["BK20210181"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671715","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:54:55Z","timestamp":1724561695000},"page":"1290-1300","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Automatic Multi-Task Learning Framework with Neural Architecture Search in Recommendations"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9018-9649","authenticated-orcid":false,"given":"Shen","family":"Jiang","sequence":"first","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5069-5950","authenticated-orcid":false,"given":"Guanghui","family":"Zhu","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8939-4290","authenticated-orcid":false,"given":"Yue","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8746-8137","authenticated-orcid":false,"given":"Chunfeng","family":"Yuan","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1806-0936","authenticated-orcid":false,"given":"Yihua","family":"Huang","sequence":"additional","affiliation":[{"name":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Estimating or propagating gradients through stochastic neurons. arXiv preprint arXiv:1305.2982","author":"Bengio Yoshua","year":"2013","unstructured":"Yoshua Bengio. 2013. Estimating or propagating gradients through stochastic neurons. arXiv preprint arXiv:1305.2982 (2013)."},{"key":"e_1_3_2_2_2_1","volume-title":"Stabilizing darts with amended gradient estimation on architectural parameters. arXiv preprint arXiv:1910.11831","author":"Bi Kaifeng","year":"2019","unstructured":"Kaifeng Bi, Changping Hu, Lingxi Xie, Xin Chen, Longhui Wei, and Qi Tian. 2019. Stabilizing darts with amended gradient estimation on architectural parameters. arXiv preprint arXiv:1910.11831 (2019)."},{"key":"e_1_3_2_2_3_1","volume-title":"Gold-nas: Gradual, one-level, differentiable. arXiv preprint arXiv:2007.03331","author":"Bi Kaifeng","year":"2020","unstructured":"Kaifeng Bi, Lingxi Xie, Xin Chen, Longhui Wei, and Qi Tian. 2020. Gold-nas: Gradual, one-level, differentiable. arXiv preprint arXiv:2007.03331 (2020)."},{"key":"e_1_3_2_2_4_1","volume-title":"Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332","author":"Cai Han","year":"2018","unstructured":"Han Cai, Ligeng Zhu, and Song Han. 2018. Proxylessnas: Direct neural architecture search on target task and hardware. arXiv preprint arXiv:1812.00332 (2018)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/B978-1-55860-307-3.50012-5"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442442.3452323"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01396-x"},{"key":"e_1_3_2_2_8_1","volume-title":"International conference on machine learning. PMLR, 794--803","author":"Chen Zhao","year":"2018","unstructured":"Zhao Chen, Vijay Badrinarayanan, Chen-Yu Lee, and Andrew Rabinovich. 2018. Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks. In International conference on machine learning. PMLR, 794--803."},{"key":"e_1_3_2_2_9_1","first-page":"2039","article-title":"Just pick a sign: Optimizing deep multitask models with gradient sign dropout","volume":"33","author":"Chen Zhao","year":"2020","unstructured":"Zhao Chen, Jiquan Ngiam, Yanping Huang, Thang Luong, Henrik Kretzschmar, Yuning Chai, and Dragomir Anguelov. 2020. Just pick a sign: Optimizing deep multitask models with gradient sign dropout. Advances in Neural Information Processing Systems, Vol. 33 (2020), 2039--2050.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557559"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_28"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390177"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639344"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463022"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.5555\/3322706.3361996"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557624"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00197"},{"key":"e_1_3_2_2_19_1","unstructured":"Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li and Xiuqiang He. 2017. DeepFM: a factorization-machine based neural network for CTR prediction. arXiv preprint arXiv:1703.04247 (2017)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_32"},{"key":"e_1_3_2_2_21_1","volume-title":"A joint many-task model: Growing a neural network for multiple NLP tasks. arXiv preprint arXiv:1611.01587","author":"Hashimoto Kazuma","year":"2016","unstructured":"Kazuma Hashimoto, Caiming Xiong, Yoshimasa Tsuruoka, and Richard Socher. 2016. A joint many-task model: Growing a neural network for multiple NLP tasks. arXiv preprint arXiv:1611.01587 (2016)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512093"},{"key":"e_1_3_2_2_23_1","volume-title":"Rotograd: Gradient homogenization in multitask learning. arXiv preprint arXiv:2103.02631","author":"Javaloy Adri\u00e1n","year":"2021","unstructured":"Adri\u00e1n Javaloy and Isabel Valera. 2021. Rotograd: Gradient homogenization in multitask learning. arXiv preprint arXiv:2103.02631 (2021)."},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of the Thirty-seventh Conference on Neural Information Processing Systems.","author":"Jiang Shen","year":"2023","unstructured":"Shen Jiang, Zipeng Ji, Guanghui Zhu, Chunfeng Yuan, and Yihua Huang. 2023. Operation-Level Early Stopping for Robustifying Differentiable NAS. In Proceedings of the Thirty-seventh Conference on Neural Information Processing Systems."},{"key":"e_1_3_2_2_25_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 7482--7491","author":"Kendall Alex","year":"2018","unstructured":"Alex Kendall, Yarin Gal, and Roberto Cipolla. 2018. Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In Proceedings of the IEEE conference on computer vision and pattern recognition. 7482--7491."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599769"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220023"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.07.045"},{"key":"e_1_3_2_2_29_1","volume-title":"Darts: Differentiable architecture search. arXiv preprint arXiv:1806.09055","author":"Liu Hanxiao","year":"2018","unstructured":"Hanxiao Liu, Karen Simonyan, and Yiming Yang. 2018. Darts: Differentiable architecture search. arXiv preprint arXiv:1806.09055 (2018)."},{"key":"e_1_3_2_2_30_1","volume-title":"Learnable embedding sizes for recommender systems. arXiv preprint arXiv:2101.07577","author":"Liu Siyi","year":"2021","unstructured":"Siyi Liu, Chen Gao, Yihong Chen, Depeng Jin, and Yong Li. 2021. Learnable embedding sizes for recommender systems. arXiv preprint arXiv:2101.07577 (2021)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00197"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301216"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_2_35_1","unstructured":"Krzysztof Maziarz Efi Kokiopoulou Andrea Gesmundo Luciano Sbaiz Gabor Bartok and Jesse Berent. 2019. Gumbel-matrix routing for flexible multi-task learning. (2019)."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.433"},{"key":"e_1_3_2_2_37_1","volume-title":"SAME: Scenario Adaptive Mixture-of-Experts for Promotion-Aware Click-Through Rate Prediction. arXiv preprint arXiv:2112.13747","author":"Pan Xiaofeng","year":"2021","unstructured":"Xiaofeng Pan, Yibin Shen, Jing Zhang, Keren Yu, Hong Wen, Shui Liu, Chengjun Mao, and Bo Cao. 2021. SAME: Scenario Adaptive Mixture-of-Experts for Promotion-Aware Click-Through Rate Prediction. arXiv preprint arXiv:2112.13747 (2021)."},{"key":"e_1_3_2_2_38_1","volume-title":"International Conference on Machine Learning. PMLR, 4095--4104","author":"Pham Hieu","year":"2018","unstructured":"Hieu Pham, Melody Guan, Barret Zoph, Quoc Le, and Jeff Dean. 2018. Efficient neural architecture search via parameters sharing. In International Conference on Machine Learning. PMLR, 4095--4104."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403359"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0151"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2010.127"},{"key":"e_1_3_2_2_42_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder. 2017. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098 (2017)."},{"key":"e_1_3_2_2_43_1","volume-title":"Sluice networks: Learning what to share between loosely related tasks. arXiv preprint arXiv:1705.08142","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder, Joachim Bingel, Isabelle Augenstein, and Anders S\u00f8gaard. 2017. Sluice networks: Learning what to share between loosely related tasks. arXiv preprint arXiv:1705.08142, Vol. 2 (2017)."},{"key":"e_1_3_2_2_44_1","volume-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538 (2017)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357925"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_2_47_1","volume-title":"Rethinking architecture selection in differentiable NAS. arXiv preprint arXiv:2108.04392","author":"Wang Ruochen","year":"2021","unstructured":"Ruochen Wang, Minhao Cheng, Xiangning Chen, Xiaocheng Tang, and Cho-Jui Hsieh. 2021. Rethinking architecture selection in differentiable NAS. arXiv preprint arXiv:2108.04392 (2021)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3124749.3124754"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591767"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467071"},{"key":"e_1_3_2_2_51_1","volume-title":"SNAS: stochastic neural architecture search. arXiv preprint arXiv:1812.09926","author":"Xie Sirui","year":"2018","unstructured":"Sirui Xie, Hehui Zheng, Chunxiao Liu, and Liang Lin. 2018. SNAS: stochastic neural architecture search. arXiv preprint arXiv:1812.09926 (2018)."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6143"},{"key":"e_1_3_2_2_53_1","volume-title":"Understanding straight-through estimator in training activation quantized neural nets. arXiv preprint arXiv:1903.05662","author":"Yin Penghang","year":"2019","unstructured":"Penghang Yin, Jiancheng Lyu, Shuai Zhang, Stanley Osher, Yingyong Qi, and Jack Xin. 2019. Understanding straight-through estimator in training activation quantized neural nets. arXiv preprint arXiv:1903.05662 (2019)."},{"key":"e_1_3_2_2_54_1","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Yu Tianhe","year":"2020","unstructured":"Tianhe Yu, Saurabh Kumar, Abhishek Gupta, Sergey Levine, Karol Hausman, and Chelsea Finn. 2020. Gradient Surgery for Multi-Task Learning. In Advances in Neural Information Processing Systems, Vol. 33. Curran Associates, Inc., 5824--5836."},{"key":"e_1_3_2_2_55_1","first-page":"11480","article-title":"Tenrec: A large-scale multipurpose benchmark dataset for recommender systems","volume":"35","author":"Yuan Guanghu","year":"2022","unstructured":"Guanghu Yuan, Fajie Yuan, Yudong Li, Beibei Kong, Shujie Li, Lei Chen, Min Yang, Chenyun Yu, Bo Hu, Zang Li, et al. 2022. Tenrec: A large-scale multipurpose benchmark dataset for recommender systems. Advances in Neural Information Processing Systems, Vol. 35 (2022), 11480--11493.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_56_1","volume-title":"Proceedings, Part VI 13","author":"Zhang Zhanpeng","year":"2014","unstructured":"Zhanpeng Zhang, Ping Luo, Chen Change Loy, and Xiaoou Tang. 2014. Facial landmark detection by deep multi-task learning. In Computer Vision--ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6--12, 2014, Proceedings, Part VI 13. Springer, 94--108."},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346997"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3532030"},{"key":"e_1_3_2_2_59_1","volume-title":"Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578","author":"Zoph Barret","year":"2016","unstructured":"Barret Zoph and Quoc V Le. 2016. Neural architecture search with reinforcement learning. arXiv preprint arXiv:1611.01578 (2016)."},{"key":"e_1_3_2_2_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531942"}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671715","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671715","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:06:00Z","timestamp":1750291560000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671715"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":60,"alternative-id":["10.1145\/3637528.3671715","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671715","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}