{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T19:10:06Z","timestamp":1755889806744,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,13]]},"DOI":"10.1145\/3726302.3729939","type":"proceedings-article","created":{"date-parts":[[2025,7,14]],"date-time":"2025-07-14T01:18:36Z","timestamp":1752455916000},"page":"2122-2131","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CSMF: Cascaded Selective Mask Fine-Tuning for Multi-Objective Embedding-Based Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-6335-7405","authenticated-orcid":false,"given":"Hao","family":"Deng","sequence":"first","affiliation":[{"name":"Alibaba International Digital Commerce Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5786-7627","authenticated-orcid":false,"given":"Haibo","family":"Xing","sequence":"additional","affiliation":[{"name":"Alibaba International Digital Commerce Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1365-5375","authenticated-orcid":false,"given":"Kanefumi","family":"Matsuyama","sequence":"additional","affiliation":[{"name":"Alibaba International Digital Commerce Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9104-1881","authenticated-orcid":false,"given":"Moyu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba International Digital Commerce Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7252-5207","authenticated-orcid":false,"given":"Jinxin","family":"Hu","sequence":"additional","affiliation":[{"name":"Alibaba International Digital Commerce Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6529-0692","authenticated-orcid":false,"given":"Hong","family":"Wen","sequence":"additional","affiliation":[{"name":"Unaffiliated, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6057-7886","authenticated-orcid":false,"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba International Digital Commerce Group, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3742-4910","authenticated-orcid":false,"given":"Xiaoyi","family":"Zeng","sequence":"additional","affiliation":[{"name":"Alibaba International Digital Commerce Group, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6595-7661","authenticated-orcid":false,"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Computer Science, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,13]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"12th USENIX symposium on operating systems design and implementation (OSDI 16)","author":"Abadi Mart\u00edn","year":"2016","unstructured":"Mart\u00edn Abadi, Paul Barham, Jianmin Chen, Zhifeng Chen, Andy Davis, Jeffrey Dean, Matthieu Devin, Sanjay Ghemawat, Geoffrey Irving, Michael Isard, et al. 2016. {TensorFlow}: a system for {Large-Scale} machine learning. In 12th USENIX symposium on operating systems design and implementation (OSDI 16). 265--283."},{"key":"e_1_3_2_1_2_1","volume-title":"Maged Saeed AlShaibani, and Irfan Ahmad","author":"Alyafeai Zaid","year":"2020","unstructured":"Zaid Alyafeai, Maged Saeed AlShaibani, and Irfan Ahmad. 2020. A survey on transfer learning in natural language processing. arXiv preprint arXiv:2007.04239 (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/1273496.1273513"},{"key":"e_1_3_2_1_4_1","volume-title":"Multi-task learning with deep neural networks: A survey. arXiv preprint arXiv:2009.09796","author":"Crawshaw Michael","year":"2020","unstructured":"Michael Crawshaw. 2020. Multi-task learning with deep neural networks: A survey. arXiv preprint arXiv:2009.09796 (2020)."},{"key":"e_1_3_2_1_5_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"Fine-tuning pretrained language models: Weight initializations, data orders, and early stopping. arXiv preprint arXiv:2002.06305","author":"Dodge Jesse","year":"2020","unstructured":"Jesse Dodge, Gabriel Ilharco, Roy Schwartz, Ali Farhadi, Hannaneh Hajishirzi, and Noah Smith. 2020. Fine-tuning pretrained language models: Weight initializations, data orders, and early stopping. arXiv preprint arXiv:2002.06305 (2020)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498478"},{"key":"e_1_3_2_1_8_1","volume-title":"Simcse: Simple contrastive learning of sentence embeddings. arXiv preprint arXiv:2104.08821","author":"Gao Tianyu","year":"2021","unstructured":"Tianyu Gao, Xingcheng Yao, and Danqi Chen. 2021. Simcse: Simple contrastive learning of sentence embeddings. arXiv preprint arXiv:2104.08821 (2021)."},{"key":"e_1_3_2_1_9_1","volume-title":"Lora: Efficient low rank adaptation of large models. arXiv preprint arXiv:2402.12354","author":"Hayou Soufiane","year":"2024","unstructured":"Soufiane Hayou, Nikhil Ghosh, and Bin Yu. 2024. Lora: Efficient low rank adaptation of large models. arXiv preprint arXiv:2402.12354 (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543873.3584633"},{"key":"e_1_3_2_1_11_1","volume-title":"Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton. 2015. Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_12_1","volume-title":"Universal language model fine-tuning for text classification. arXiv preprint arXiv:1801.06146","author":"Howard Jeremy","year":"2018","unstructured":"Jeremy Howard and Sebastian Ruder. 2018. Universal language model fine-tuning for text classification. arXiv preprint arXiv:1801.06146 (2018)."},{"key":"e_1_3_2_1_13_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403305"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2505515.2505665"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557137"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_18_1","unstructured":"Morgan Kaufinann. 2006. Data mining: Concepts and techniques. (2006) 4."},{"key":"e_1_3_2_1_19_1","volume-title":"Convolutional neural networks for sentence classification. arXiv preprint arXiv:1408.5882","author":"Kim Yoon","year":"2014","unstructured":"Yoon Kim. 2014. Convolutional neural networks for sentence classification. arXiv preprint arXiv:1408.5882 (2014)."},{"key":"e_1_3_2_1_20_1","volume-title":"Kwang-Ting Cheng, and Min-Hung Chen.","author":"Liu Shih-Yang","year":"2024","unstructured":"Shih-Yang Liu, Chien-Yi Wang, Hongxu Yin, Pavlo Molchanov, Yu-Chiang Frank Wang, Kwang-Ting Cheng, and Min-Hung Chen. 2024. Dora: Weight-decomposed low-rank adaptation. arXiv preprint arXiv:2402.09353 (2024)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220007"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_5"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00810"},{"key":"e_1_3_2_1_25_1","volume-title":"Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems","author":"Mikolov Tomas","year":"2013","unstructured":"Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems (2013)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412713"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/371920.372071"},{"key":"e_1_3_2_1_28_1","volume-title":"A survey on understanding, visualizations, and explanation of deep neural networks. arXiv preprint arXiv:2102.01792","author":"Shahroudnejad Atefeh","year":"2021","unstructured":"Atefeh Shahroudnejad. 2021. A survey on understanding, visualizations, and explanation of deep neural networks. arXiv preprint arXiv:2102.01792 (2021)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_1_30_1","unstructured":"Xu Wang Jiangxia Cao Zhiyi Fu Kun Gai and Guorui Zhou. 2024. HoME: Hierarchy of Multi-Gate Experts for Multi-Task Learning at Kuaishou. arXiv preprint arXiv:2408.05430 (2024)."},{"key":"e_1_3_2_1_31_1","volume-title":"Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang.","author":"Wang Yuhao","year":"2023","unstructured":"Yuhao Wang, Ha Tsz Lam, Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang. 2023. Multi-task deep recommender systems: A survey. arXiv preprint arXiv:2302.03525 (2023)."},{"key":"e_1_3_2_1_32_1","volume-title":"Parameter-efficient fine-tuning for pre-trained vision models: A survey. arXiv preprint arXiv:2402.02242","author":"Xin Yi","year":"2024","unstructured":"Yi Xin, Siqi Luo, Haodi Zhou, Junlong Du, Xiaohong Liu, Yue Fan, Qing Li, and Yuntao Du. 2024. Parameter-efficient fine-tuning for pre-trained vision models: A survey. arXiv preprint arXiv:2402.02242 (2024)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539062"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2023.111236"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346996"},{"key":"e_1_3_2_1_36_1","first-page":"5824","article-title":"Gradient surgery for multi-task learning","volume":"33","author":"Yu Tianhe","year":"2020","unstructured":"Tianhe Yu, Saurabh Kumar, Abhishek Gupta, Sergey Levine, Karol Hausman, and Chelsea Finn. 2020. Gradient surgery for multi-task learning. Advances in Neural Information Processing Systems, Vol. 33 (2020), 5824--5836.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539212"},{"key":"e_1_3_2_1_38_1","volume-title":"Distillation based multi-task learning: A candidate generation model for improving reading duration. arXiv preprint arXiv:2102.07142","author":"Zhao Zhong","year":"2021","unstructured":"Zhong Zhao, Yanmei Fu, Hanming Liang, Li Ma, Guangyao Zhao, and Hongwei Jiang. 2021. Distillation based multi-task learning: A candidate generation model for improving reading duration. arXiv preprint arXiv:2102.07142 (2021)."},{"key":"e_1_3_2_1_39_1","volume-title":"Multi-Objective Personalized Product Retrieval in Taobao Search. arXiv preprint arXiv:2210.04170","author":"Zheng Yukun","year":"2022","unstructured":"Yukun Zheng, Jiang Bian, Guanghao Meng, Chao Zhang, Honggang Wang, Zhixuan Zhang, Sen Li, Tao Zhuang, Qingwen Liu, and Xiaoyi Zeng. 2022. Multi-Objective Personalized Product Retrieval in Taobao Search. arXiv preprint arXiv:2210.04170 (2022)."},{"key":"e_1_3_2_1_40_1","volume-title":"LoRA-drop: Efficient LoRA Parameter Pruning based on Output Evaluation. arXiv preprint arXiv:2402.07721","author":"Zhou Hongyun","year":"2024","unstructured":"Hongyun Zhou, Xiangyu Lu, Wang Xu, Conghui Zhu, and Tiejun Zhao. 2024. LoRA-drop: Efficient LoRA Parameter Pruning based on Output Evaluation. arXiv preprint arXiv:2402.07721 (2024)."}],"event":{"name":"SIGIR '25: The 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Padua Italy","acronym":"SIGIR '25"},"container-title":["Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3726302.3729939","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T18:33:26Z","timestamp":1755887606000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3726302.3729939"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,13]]},"references-count":40,"alternative-id":["10.1145\/3726302.3729939","10.1145\/3726302"],"URL":"https:\/\/doi.org\/10.1145\/3726302.3729939","relation":{},"subject":[],"published":{"date-parts":[[2025,7,13]]},"assertion":[{"value":"2025-07-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}