{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,26]],"date-time":"2025-07-26T09:00:16Z","timestamp":1753520416421,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":19,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,7,18]],"date-time":"2023-07-18T00:00:00Z","timestamp":1689638400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,7,19]]},"DOI":"10.1145\/3539618.3591993","type":"proceedings-article","created":{"date-parts":[[2023,7,19]],"date-time":"2023-07-19T00:22:23Z","timestamp":1689726143000},"page":"2032-2036","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Gradient Coordination for Quantifying and Maximizing Knowledge Transference in Multi-Task Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6644-627X","authenticated-orcid":false,"given":"Xuanhua","family":"Yang","sequence":"first","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2227-8916","authenticated-orcid":false,"given":"Jianxin","family":"Zhao","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3058-5383","authenticated-orcid":false,"given":"Shaoguo","family":"Liu","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5353-7803","authenticated-orcid":false,"given":"Liang","family":"Wang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4037-6315","authenticated-orcid":false,"given":"Bo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,7,18]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Multitask learning. Machine learning","author":"Caruana Rich","year":"1997","unstructured":"Rich Caruana. 1997. Multitask learning. Machine learning, Vol. 28, 1 (1997), 41--75."},{"key":"e_1_3_2_1_2_1","volume-title":"International conference on machine learning. PMLR, 794--803","author":"Chen Zhao","year":"2018","unstructured":"Zhao Chen, Vijay Badrinarayanan, Chen-Yu Lee, and Andrew Rabinovich. 2018. Gradnorm: Gradient normalization for adaptive loss balancing in deep multitask networks. In International conference on machine learning. PMLR, 794--803."},{"key":"e_1_3_2_1_3_1","unstructured":"Christopher Fifty Ehsan Amid Zhe Zhao Tianhe Yu Rohan Anil and Chelsea Finn. 2021. Efficiently Identifying Task Groupings for Multi-Task Learning. In Neural Information Processing Systems."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/2872427.2883037"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512093"},{"key":"e_1_3_2_1_6_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_7_1","volume-title":"Juho Lee, and Sung Ju Hwang.","author":"Lee Seanie","year":"2021","unstructured":"Seanie Lee, Hae Beom Lee, Juho Lee, and Sung Ju Hwang. 2021. Sequential Reptile: Inter-Task Gradient Alignment for Multilingual Learning. arXiv preprint arXiv:2110.02600 (2021)."},{"key":"e_1_3_2_1_8_1","volume-title":"Liang Wang, and Bo Zheng.","author":"Lin Zihan","year":"2022","unstructured":"Zihan Lin, Xuanhua Yang, Shaoguo Liu, Xiaoyu Peng, Wayne Xin Zhao, Liang Wang, and Bo Zheng. 2022. Personalized Inter-Task Contrastive Learning for CTR&CVR Joint Estimation. arXiv preprint arXiv:2208.13442 (2022)."},{"key":"e_1_3_2_1_9_1","first-page":"18878","article-title":"Conflict-averse gradient descent for multi-task learning","volume":"34","author":"Liu Bo","year":"2021","unstructured":"Bo Liu, Xingchao Liu, Xiaojie Jin, Peter Stone, and Qiang Liu. 2021. Conflict-averse gradient descent for multi-task learning. Advances in Neural Information Processing Systems, Vol. 34 (2021), 18878--18890.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_1_12_1","volume-title":"An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder. 2017. An overview of multi-task learning in deep neural networks. arXiv preprint arXiv:1706.05098 (2017)."},{"key":"e_1_3_2_1_13_1","volume-title":"Multi-task learning as multi-objective optimization. Advances in neural information processing systems","author":"Sener Ozan","year":"2018","unstructured":"Ozan Sener and Vladlen Koltun. 2018. Multi-task learning as multi-objective optimization. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_14_1","volume-title":"On negative interference in multilingual models: Findings and a meta-learning treatment. arXiv preprint arXiv:2010.03017","author":"Wang Zirui","year":"2020","unstructured":"Zirui Wang, Zachary C Lipton, and Yulia Tsvetkov. 2020a. On negative interference in multilingual models: Findings and a meta-learning treatment. arXiv preprint arXiv:2010.03017 (2020)."},{"key":"e_1_3_2_1_15_1","volume-title":"Gradient vaccine: Investigating and improving multi-task optimization in massively multilingual models. arXiv preprint arXiv:2010.05874","author":"Wang Zirui","year":"2020","unstructured":"Zirui Wang, Yulia Tsvetkov, Orhan Firat, and Yuan Cao. 2020b. Gradient vaccine: Investigating and improving multi-task optimization in massively multilingual models. arXiv preprint arXiv:2010.05874 (2020)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482061"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3404835.3463053"},{"key":"e_1_3_2_1_18_1","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"Yu Tianhe","year":"2020","unstructured":"Tianhe Yu, Saurabh Kumar, Abhishek Gupta, Sergey Levine, Karol Hausman, and Chelsea Finn. 2020. Gradient surgery for multi-task learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 5824--5836.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_19_1","volume-title":"International Conference on Machine Learning. PMLR, 4120--4129","author":"Zheng Shuxin","year":"2017","unstructured":"Shuxin Zheng, Qi Meng, Taifeng Wang, Wei Chen, Nenghai Yu, Zhi-Ming Ma, and Tie-Yan Liu. 2017. Asynchronous stochastic gradient descent with delay compensation. In International Conference on Machine Learning. PMLR, 4120--4129."}],"event":{"name":"SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Taipei Taiwan","acronym":"SIGIR '23"},"container-title":["Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591993","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3539618.3591993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:37:48Z","timestamp":1750178268000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3539618.3591993"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,18]]},"references-count":19,"alternative-id":["10.1145\/3539618.3591993","10.1145\/3539618"],"URL":"https:\/\/doi.org\/10.1145\/3539618.3591993","relation":{},"subject":[],"published":{"date-parts":[[2023,7,18]]},"assertion":[{"value":"2023-07-18","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}