{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:05:10Z","timestamp":1755907510858,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":54,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T00:00:00Z","timestamp":1720569600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"King Abdullah University of Science and Technology (KAUST)","award":["FCC\/1\/1976-44-01, FCC\/1\/1976-45-01, REI\/1\/5234-01-01, RGC\/3\/4816-01-01, REI\/1\/5414-01-01, REI\/1\/5289-01-01, and REI\/1\/5404-01-01"],"award-info":[{"award-number":["FCC\/1\/1976-44-01, FCC\/1\/1976-45-01, REI\/1\/5234-01-01, RGC\/3\/4816-01-01, REI\/1\/5414-01-01, REI\/1\/5289-01-01, and REI\/1\/5404-01-01"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3626772.3657789","type":"proceedings-article","created":{"date-parts":[[2024,7,11]],"date-time":"2024-07-11T12:40:05Z","timestamp":1720701605000},"page":"2018-2027","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Flexible and Adaptable Summarization via Expertise Separation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6633-0796","authenticated-orcid":false,"given":"Xiuying","family":"Chen","sequence":"first","affiliation":[{"name":"CBRC, KAUST &amp; MBZUAI, Jeddah, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0883-3678","authenticated-orcid":false,"given":"Mingzhe","family":"Li","sequence":"additional","affiliation":[{"name":"Ant Group, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1301-3700","authenticated-orcid":false,"given":"Shen","family":"Gao","sequence":"additional","affiliation":[{"name":"Shandong University, Qingdao, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1033-6598","authenticated-orcid":false,"given":"Xin","family":"Cheng","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7326-0098","authenticated-orcid":false,"given":"Qingqing","family":"Zhu","sequence":"additional","affiliation":[{"name":"Peking University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3356-6823","authenticated-orcid":false,"given":"Rui","family":"Yan","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7108-3574","authenticated-orcid":false,"given":"Xin","family":"Gao","sequence":"additional","affiliation":[{"name":"CBRC, CEMSE, KAUST, Jeddah, Saudi Arabia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3574-5665","authenticated-orcid":false,"given":"Xiangliang","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Notre Dame &amp; KAUST, South Bend, IN, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,7,11]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proc. of NeurIPS","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot learners. Proc. of NeurIPS (2020)."},{"unstructured":"Xiuying Chen Mingzhe Li Xin Gao and Xiangliang Zhang. 2023. Towards Improving Faithfulness in Abstractive Summarization. In Advances in Neural Information Processing Systems.","key":"e_1_3_2_1_2_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_3_1","DOI":"10.1109\/TASLP.2022.3230539"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.18653\/v1\/2023.findings-acl.901"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.18653\/v1\/N18-2097"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_6_1","DOI":"10.18653\/v1\/P18-1128"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.18653\/v1\/P19-1102"},{"unstructured":"William Fedus Barret Zoph and Noam Shazeer. 2021. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity.","key":"e_1_3_2_1_8_1"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.18653\/v1\/2022.emnlp-main.426"},{"key":"e_1_3_2_1_10_1","volume-title":"Specializing Smaller Language Models towards Multi-Step Reasoning. arXiv preprint arXiv:2301.12726","author":"Fu Yao","year":"2023","unstructured":"Yao Fu, Hao Peng, Litu Ou, Ashish Sabharwal, and Tushar Khot. 2023. Specializing Smaller Language Models towards Multi-Step Reasoning. arXiv preprint arXiv:2301.12726 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"Proc. of ICCL. 3263--3273","author":"Gao Ze-Feng","year":"2022","unstructured":"Ze-Feng Gao, Peiyu Liu, Wayne Xin Zhao, Zhong-Yi Lu, and Ji-Rong Wen. 2022. Parameter-Efficient Mixture-of-Experts Architecture for Pre-trained Language Models. In Proc. of ICCL. 3263--3273."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_12_1","DOI":"10.18653\/v1\/N18-1065"},{"key":"e_1_3_2_1_13_1","volume-title":"Sparsely activated mixture-of-experts are robust multi-task learners. arXiv preprint arXiv:2204.07689","author":"Gupta Shashank","year":"2022","unstructured":"Shashank Gupta, Subhabrata Mukherjee, Krishan Subudhi, Eduardo Gonzalez, Damien Jose, Ahmed H Awadallah, and Jianfeng Gao. 2022. Sparsely activated mixture-of-experts are robust multi-task learners. arXiv preprint arXiv:2204.07689 (2022)."},{"key":"e_1_3_2_1_14_1","volume-title":"Proc. of NIPS.","author":"Hermann Karl Moritz","year":"2015","unstructured":"Karl Moritz Hermann, Tomas Kocisky, Edward Grefenstette, Lasse Espeholt, Will Kay, Mustafa Suleyman, and Phil Blunsom. 2015. Teaching machines to read and comprehend. In Proc. of NIPS."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.18653\/v1\/W17-4513"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1145\/3477495.3531908"},{"key":"e_1_3_2_1_17_1","volume-title":"Proc. of NAACL.","author":"Kim Byeongchang","year":"2019","unstructured":"Byeongchang Kim, Hyunwoo Kim, and Gunhee Kim. 2019. Abstractive Summarization of Reddit Posts with Multi-level Memory Networks. In Proc. of NAACL."},{"key":"e_1_3_2_1_18_1","volume-title":"MEAL: Stable and Active Learning for Few-Shot Prompting. arXiv preprint arXiv:2211.08358","author":"K\u00f6ksal Abdullatif","year":"2022","unstructured":"Abdullatif K\u00f6ksal, Timo Schick, and Hinrich Sch\u00fctze. 2022. MEAL: Stable and Active Learning for Few-Shot Prompting. arXiv preprint arXiv:2211.08358 (2022)."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the 2nd Workshop on New Frontiers in Summarization.","author":"Kornilova Anastassia","year":"2019","unstructured":"Anastassia Kornilova and Vladimir Eidelman. 2019. BillSum: A Corpus for Automatic Summarization of US Legislation. In Proceedings of the 2nd Workshop on New Frontiers in Summarization."},{"key":"e_1_3_2_1_20_1","volume-title":"Wikihow: A large scale text summarization dataset. arXiv preprint arXiv:1810.09305","author":"Koupaee Mahnaz","year":"2018","unstructured":"Mahnaz Koupaee and William Yang Wang. 2018. Wikihow: A large scale text summarization dataset. arXiv preprint arXiv:1810.09305 (2018)."},{"key":"e_1_3_2_1_21_1","volume-title":"Proc. of ICML.","author":"Lewis Mike","year":"2021","unstructured":"Mike Lewis, Shruti Bhosale, Tim Dettmers, Naman Goyal, and Luke Zettlemoyer. 2021. Base layers: Simplifying training of large, sparse models. In Proc. of ICML."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.18653\/v1\/2020.acl-main.703"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"e_1_3_2_1_24_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.18653\/v1\/2023.acl-long.228"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_26_1","DOI":"10.18653\/v1\/2022.findings-acl.46"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_27_1","DOI":"10.18653\/v1\/2021.acl-long.268"},{"key":"e_1_3_2_1_28_1","volume-title":"Proc. of NeurIPS","author":"Mustafa Basil","year":"2022","unstructured":"Basil Mustafa, Carlos Riquelme, Joan Puigcerver, Rodolphe Jenatton, and Neil Houlsby. 2022. Multimodal Contrastive Learning with LIMoE: the Language- Image Mixture of Experts. Proc. of NeurIPS (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.5555\/2391200.2391218"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_30_1","DOI":"10.18653\/v1\/D18-1206"},{"key":"e_1_3_2_1_31_1","volume-title":"Proc. of NeurIPS","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training language models to follow instructions with human feedback. Proc. of NeurIPS (2022)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_32_1","DOI":"10.1145\/3477495.3531901"},{"key":"e_1_3_2_1_33_1","volume-title":"Proc. of AISTATS.","author":"Radiya-Dixit Evani","year":"2020","unstructured":"Evani Radiya-Dixit and Xin Wang. 2020. How fine can fine-tuning be? learning efficient language models. In Proc. of AISTATS."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_34_1","DOI":"10.18653\/v1\/2022.acl-long.309"},{"key":"e_1_3_2_1_35_1","volume-title":"Proc. of NeurIPS","author":"Roller Stephen","year":"2021","unstructured":"Stephen Roller, Sainbayar Sukhbaatar, Jason Weston, et al. 2021. Hash layers for large sparse models. Proc. of NeurIPS (2021)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_36_1","DOI":"10.18653\/v1\/E17-2007"},{"key":"e_1_3_2_1_37_1","volume-title":"Manning","author":"Liu Peter J.","year":"2017","unstructured":"Abigail See, Peter J. Liu, and Christopher D. Manning. 2017. Get To The Point: Summarization with Pointer-Generator Networks. In Proc. of ACL."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_38_1","DOI":"10.18653\/v1\/P19-1212"},{"key":"e_1_3_2_1_39_1","volume-title":"Proc. of ICLR.","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. OUTRAGEOUSLY LARGE NEURAL NETWORKS: THE SPARSELY-GATED MIXTURE-OF-EXPERTS LAYER. In Proc. of ICLR."},{"doi-asserted-by":"crossref","unstructured":"Shubo Tian Qiao Jin Lana Yeganova Po-Ting Lai Qingqing Zhu Xiuying Chen Yifan Yang Qingyu Chen Won Kim Donald C Comeau et al. 2023. Opportunities and Challenges for ChatGPT and Large Language Models in Biomedicine and Health. arXiv preprint arXiv:2306.10070 (2023).","key":"e_1_3_2_1_40_1","DOI":"10.1093\/bib\/bbad493"},{"key":"e_1_3_2_1_41_1","volume-title":"Proc. of NIPS.","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. In Proc. of NIPS."},{"key":"e_1_3_2_1_42_1","volume-title":"Exploring domain shift in extractive text summarization. arXiv preprint arXiv:1908.11664","author":"Wang Danqing","year":"2019","unstructured":"Danqing Wang, Pengfei Liu, Ming Zhong, Jie Fu, Xipeng Qiu, and Xuanjing Huang. 2019. Exploring domain shift in extractive text summarization. arXiv preprint arXiv:1908.11664 (2019)."},{"key":"e_1_3_2_1_43_1","volume-title":"International Conference on Machine Learning.","author":"Wang Peng","year":"2022","unstructured":"Peng Wang, An Yang, Rui Men, Junyang Lin, Shuai Bai, Zhikang Li, Jianxin Ma, Chang Zhou, Jingren Zhou, and Hongxia Yang. 2022. Ofa: Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_44_1","volume-title":"Saksham Singhal, Subhojit Som, et al.","author":"Wang Wenhui","year":"2022","unstructured":"Wenhui Wang, Hangbo Bao, Li Dong, Johan Bjorck, Zhiliang Peng, Qiang Liu, Kriti Aggarwal, Owais Khan Mohammed, Saksham Singhal, Subhojit Som, et al. 2022. Image as a Foreign Language: BEiT Pretraining for All Vision and Vision- Language Tasks. arXiv preprint arXiv:2208.10442 (2022)."},{"key":"e_1_3_2_1_45_1","volume-title":"Exploring the limits of chatgpt for query or aspect-based text summarization. arXiv preprint arXiv:2302.08081","author":"Yang Xianjun","year":"2023","unstructured":"Xianjun Yang, Yan Li, Xinlu Zhang, Haifeng Chen, and Wei Cheng. 2023. Exploring the limits of chatgpt for query or aspect-based text summarization. arXiv preprint arXiv:2302.08081 (2023)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_46_1","DOI":"10.18653\/v1\/2021.naacl-main.471"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_47_1","DOI":"10.1007\/978-3-030-58580-8_41"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_48_1","DOI":"10.18653\/v1\/P19-1043"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_49_1","DOI":"10.1145\/3397271.3401205"},{"key":"e_1_3_2_1_50_1","volume-title":"Proc. of ICLR.","author":"Zhang Tianyi","year":"2020","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2020. BERTScore: Evaluating Text Generation with BERT. In Proc. of ICLR."},{"key":"e_1_3_2_1_51_1","volume-title":"Benchmarking large language models for news summarization. arXiv preprint arXiv:2301.13848","author":"Zhang Tianyi","year":"2023","unstructured":"Tianyi Zhang, Faisal Ladhak, Esin Durmus, Percy Liang, Kathleen McKeown, and Tatsunori B Hashimoto. 2023. Benchmarking large language models for news summarization. arXiv preprint arXiv:2301.13848 (2023)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_52_1","DOI":"10.18653\/v1\/2020.emnlp-main.174"},{"key":"e_1_3_2_1_53_1","volume-title":"Proc. of ICLR.","author":"Zuo Simiao","year":"2021","unstructured":"Simiao Zuo, Xiaodong Liu, Jian Jiao, Young Jin Kim, Hany Hassan, Ruofei Zhang, Jianfeng Gao, and Tuo Zhao. 2021. Taming Sparsely Activated Transformer with Stochastic Experts. In Proc. of ICLR."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_54_1","DOI":"10.18653\/v1\/2022.naacl-main.116"}],"event":{"sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"acronym":"SIGIR 2024","name":"SIGIR 2024: The 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","location":"Washington DC USA"},"container-title":["Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657789","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3626772.3657789","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:41:39Z","timestamp":1755841299000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3626772.3657789"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":54,"alternative-id":["10.1145\/3626772.3657789","10.1145\/3626772"],"URL":"https:\/\/doi.org\/10.1145\/3626772.3657789","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}