{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T00:00:51Z","timestamp":1773619251513,"version":"3.50.1"},"reference-count":102,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T00:00:00Z","timestamp":1755561600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,19]],"date-time":"2025-08-19T00:00:00Z","timestamp":1755561600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1007\/s11432-023-4128-3","type":"journal-article","created":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T02:32:53Z","timestamp":1755916373000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["OpenBA: an open-sourced 15B bilingual asymmetric Seq2Seq model pre-trained from scratch"],"prefix":"10.1007","volume":"68","author":[{"given":"Juntao","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zecheng","family":"Tang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuyang","family":"Ding","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pinzheng","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pei","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wangjie","family":"You","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dan","family":"Qiao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chenyu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenliang","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guohong","family":"Fu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qiaoming","family":"Zhu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guodong","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Min","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,8,19]]},"reference":[{"key":"4128_CR1","unstructured":"Kaplan J, McCandlish S, Henighan T, et al. Scaling laws for neural language models. 2020. ArXiv:2001.08361"},{"key":"4128_CR2","first-page":"4057","volume-title":"Proceedings of International Conference on Machine Learning. PMLR","author":"A Clark","year":"2022","unstructured":"Clark A, De Las Casas D, Guy A, et al. Unified scaling laws for routed language models. In: Proceedings of International Conference on Machine Learning. PMLR, 2022. 4057\u20134086"},{"key":"4128_CR3","unstructured":"Hoffmann J, Borgeaud S, Mensch A, et al. Training compute-optimal large language models. 2022. ArXiv:2203.15556"},{"key":"4128_CR4","unstructured":"Touvron H, Lavril T, Izacard G, et al. LLaMA: open and efficient foundation language models. 2023. ArXiv:2302.13971"},{"key":"4128_CR5","unstructured":"Scao T L, Fan A, Akiki C, et al. Bloom: a 176B-parameter open-access multilingual language model. 2022. ArXiv:2211.05100"},{"key":"4128_CR6","unstructured":"Touvron H, Martin L, Stone K, et al. LLaMA 2: open foundation and fine-tuned chat models. 2023. ArXiv:2307.09288"},{"key":"4128_CR7","unstructured":"Chung H W, Hou L, Longpre S, et al. Scaling instruction-finetuned language models. 2022. ArXiv:2210.11416"},{"key":"4128_CR8","unstructured":"Soltan S, Ananthakrishnan S, FitzGerald J, et al. Alexatm 20B: few-shot learning using a large-scale multilingual Seq2Seq model. 2022. ArXiv:2208.01448"},{"key":"4128_CR9","unstructured":"Zeng A, Liu X, Du Z, et al. GLM-130B: an open bilingual pre-trained model. 2022. ArXiv:2210.02414"},{"key":"4128_CR10","volume-title":"Baichuan-7b","author":"Inc. B","year":"2023","unstructured":"Inc. B. Baichuan-7b. https:\/\/github.com\/baichuan-inc\/Baichuan-7B, 2023"},{"key":"4128_CR11","unstructured":"Wang H, Liu C, Xi N, et al. Huatuo: tuning LLaMA model with chinese medical knowledge. 2023. ArXiv:2304.06975"},{"key":"4128_CR12","volume-title":"Luotuo: an instruction-following chinese language model, lora tuning on LLaMA","author":"Z Leng","year":"2023","unstructured":"Leng Z, Chen Q, Li C. Luotuo: an instruction-following chinese language model, lora tuning on LLaMA. https:\/\/github.com\/LC1332\/Chinese-alpaca-lora, 2023"},{"key":"4128_CR13","unstructured":"Chen Z, Jiang F, Chen J, et al. Phoenix: democratizing ChatGPT across languages. 2023. ArXiv:2304.10453"},{"key":"4128_CR14","unstructured":"Cui Y, Yang Z, Yao X. Efficient and effective text encoding for chinese LLaMA and alpaca. 2023. ArXiv:2304.08177"},{"key":"4128_CR15","doi-asserted-by":"publisher","first-page":"888","DOI":"10.1007\/s11633-024-1502-8","volume":"21","author":"T X Sun","year":"2024","unstructured":"Sun T X, Zhang X T, He Z F, et al. MOSS: an open conversational large language model. Mach Intell Res, 2024, 21: 888\u2013905","journal-title":"Mach Intell Res"},{"key":"4128_CR16","first-page":"320","volume-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics","author":"Z Du","year":"2022","unstructured":"Du Z, Qian Y, Liu X, et al. GLM: general language model pretraining with autoregressive blank infilling. In: Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics. 2022. 320\u2013335"},{"key":"4128_CR17","unstructured":"Longpre S, Hou L, Vu T, et al. The flan collection: designing data and methods for effective instruction tuning. 2023. ArXiv:2301.13688"},{"key":"4128_CR18","volume-title":"Proceedings of the Eleventh International Conference on Learning Representations. Kigali","author":"Y Tay","year":"2022","unstructured":"Tay Y, Dehghani M, Tran V Q, et al. UL2: unifying language learning paradigms. In: Proceedings of the Eleventh International Conference on Learning Representations. Kigali, 2022"},{"key":"4128_CR19","first-page":"46595","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems. New Orleans","author":"L Zheng","year":"2023","unstructured":"Zheng L, Chiang W L, Sheng Y, et al. Judging LLM-as-a-judge with mT-bench and chatbot arena, In: Proceedings of the 37th International Conference on Neural Information Processing Systems. New Orleans, 2023. 46595\u201346623"},{"key":"4128_CR20","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/6.591665","volume":"34","author":"R R Schaller","year":"1997","unstructured":"Schaller R R. Moore\u2019s law: past, present and future. IEEE Spectr, 1997, 34: 52\u201359","journal-title":"IEEE Spectr"},{"key":"4128_CR21","volume-title":"Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics. New Orleans","author":"A Radford","year":"2018","unstructured":"Radford A, Narasimhan K, Salimans T, et al. Improving language understanding by generative pre-training. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics. New Orleans, 2018"},{"key":"4128_CR22","first-page":"1877","volume-title":"Proceedings of the 34th Conference on Neural Information Processing Systems (NeurIPS 2020). Vancouver","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, et al. Language models are few-shot learners. In: Proceedings of the 34th Conference on Neural Information Processing Systems (NeurIPS 2020). Vancouver, 2020. 1877\u20131901"},{"key":"4128_CR23","unstructured":"Zeng W, Ren X, Su T, et al. Pangu-alpha: large-scale autoregressive pretrained chinese language models with auto-parallel computation. 2021. ArXiv:2104.12369"},{"key":"4128_CR24","unstructured":"Sun Y, Wang S, Feng S, et al. ERNIE 3.0: large-scale knowledge enhanced pre-training for language understanding and generation. 2021. ArXiv:2107.02137"},{"key":"4128_CR25","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1016\/j.fmre.2021.11.011","volume":"1","author":"M Zhang","year":"2021","unstructured":"Zhang M, Li J. A commentary of GPT-3 in MIT technology review 2021. Fundam Research, 2021, 1: 831\u2013833","journal-title":"Fundam Research"},{"key":"4128_CR26","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1016\/j.aiopen.2021.12.003","volume":"2","author":"Z Zhang","year":"2021","unstructured":"Zhang Z, Gu Y, Han X, et al. CPM-2: large-scale cost-effective pre-trained language models. AI Open, 2021, 2: 216\u2013224","journal-title":"AI Open"},{"key":"4128_CR27","unstructured":"Zhang S, Roller S, Goyal N, et al. OPT: open pre-trained transformer language models. 2022. ArXiv:2205.01068"},{"key":"4128_CR28","first-page":"6000","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems (NIPS\u201917). California","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems (NIPS\u201917). California, 2017. 6000\u20136010"},{"key":"4128_CR29","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, et al. Language models are unsupervised multitask learners. OpenAI blog, 2019, 1: 9","journal-title":"OpenAI blog"},{"key":"4128_CR30","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel C, Shazeer N, Roberts A, et al. Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res, 2020, 21: 5485\u20135551","journal-title":"J Mach Learn Res"},{"key":"4128_CR31","unstructured":"Rae J W, Borgeaud S, Cai T, et al. Scaling language models: methods, analysis & insights from training gopher. 2021. ArXiv:2112.11446"},{"key":"4128_CR32","unstructured":"Smith S, Patwary M, Norick B, et al. Using deepspeed and megatron to train megatron-turing NLG 530B, a large-scale generative language model. 2022. ArXiv:2201.11990"},{"key":"4128_CR33","first-page":"27730","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems. New Orleans","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, et al. Training language models to follow instructions with human feedback. In: Proceedings of the 36th International Conference on Neural Information Processing Systems. New Orleans, 2022. 27730\u201327744"},{"key":"4128_CR34","unstructured":"Bubeck S, Chandrasekaran V, Eldan R, et al. Sparks of artificial general intelligence: early experiments with GPT-4. 2023. ArXiv:2303.12712"},{"key":"4128_CR35","unstructured":"Penedo G, Malartic Q, Hesslow D, et al. The refinedweb dataset for falcon LLM: outperforming curated corpora with web data, and web data only. 2023. ArXiv:2306.01116"},{"key":"4128_CR36","doi-asserted-by":"publisher","first-page":"1314","DOI":"10.1109\/SP40000.2020.00095","volume-title":"Proceedings of 2020 IEEE Symposium on Security and Privacy (SP). IEEE","author":"X Pan","year":"2020","unstructured":"Pan X, Zhang M, Ji S, et al. Privacy risks of general-purpose language models. In: Proceedings of 2020 IEEE Symposium on Security and Privacy (SP). IEEE, 2020. 1314\u20131331"},{"key":"4128_CR37","unstructured":"Chowdhery A, Narang S, Devlin J, et al. PmLM: scaling language modeling with pathways. 2022. ArXiv:2204.02311"},{"key":"4128_CR38","unstructured":"Hendrycks D, Burns C, Basart S, et al. Measuring massive multitask language understanding. 2020. ArXiv:2009.03300"},{"key":"4128_CR39","unstructured":"Li H, Zhang Y, Koto F, et al. CMMLU: measuring massive multitask language understanding in chinese. 2023. ArXiv:2306.09212"},{"key":"4128_CR40","unstructured":"Fu Z, Lam W, Yu Q, et al. Decoder-only or encoder-decoder? Interpreting language model as a regularized encoder-decoder. 2023. ArXiv:2304.04052"},{"key":"4128_CR41","first-page":"3266","volume-title":"Proceedings of the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019)","author":"A Wang","year":"2019","unstructured":"Wang A, Pruksachatkun Y, Nangia N, et al. SuperGLUE: a stickier benchmark for general-purpose language understanding systems. In: Proceedings of the 33rd Conference on Neural Information Processing Systems (NeurIPS 2019). 2019. 3266\u20133280"},{"key":"4128_CR42","unstructured":"Huang Y, Bai Y, Zhu Z, et al. C-Eval: a multi-level multi-discipline chinese evaluation suite for foundation models. 2023. ArXiv:2305.08322"},{"key":"4128_CR43","unstructured":"Zhang S, Dong L, Li X, et al. Instruction tuning for large language models: a survey. 2023. ArXiv:2308.10792"},{"key":"4128_CR44","unstructured":"Wei J, Bosma M, Zhao V Y, et al. Finetuned language models are zero-shot learners. 2021. ArXiv:2109.01652"},{"key":"4128_CR45","unstructured":"Sanh V, Webson A, Raffel C, et al. Multitask prompted training enables zero-shot task generalization. 2021. ArXiv:2110.08207"},{"key":"4128_CR46","unstructured":"Nye M, Andreassen A J, Gur-Ari G, et al. Show your work: scratchpads for intermediate computation with language models. 2021. ArXiv:2112.00114"},{"key":"4128_CR47","unstructured":"Wei J, Wang X, Schuurmans D, et al. Chain of thought prompting elicits reasoning in large language models. 2022. ArXiv:2201.11903"},{"key":"4128_CR48","unstructured":"Wang X, Wei J, Schuurmans D, et al. Self-consistency improves chain of thought reasoning in language models. 2022. ArXiv:2203.11171"},{"key":"4128_CR49","first-page":"15476","volume-title":"Proceedings of the 36th Conference on Neural Information Processing Systems (NeurIPS 2022). New Orleans","author":"E Zelikman","year":"2022","unstructured":"Zelikman E, Wu Y, Mu J, et al. STaR: bootstrapping reasoning with reasoning. In: Proceedings of the 36th Conference on Neural Information Processing Systems (NeurIPS 2022). New Orleans, 2022. 15476\u201315488"},{"key":"4128_CR50","unstructured":"Wu Y, Zhao Y, Li Z, et al. Improving cross-task generalization with step-by-step instructions. 2023. ArXiv:2305.04429"},{"key":"4128_CR51","unstructured":"Xu C, Sun Q, Zheng K, et al. WizardLM: empowering large language models to follow complex instructions. 2023. ArXiv:2304.12244"},{"key":"4128_CR52","volume-title":"Stanford alpaca: an instruction-following LLaMA model","author":"R Taori","year":"2023","unstructured":"Taori R, Gulrajani I, Zhang T, et al. Stanford alpaca: an instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca, 2023"},{"key":"4128_CR53","volume-title":"Free dolly: introducing the world first truly open instruction-tuned LLM","author":"M Conover","year":"2023","unstructured":"Conover M, Hayes M, Mathur A, et al. Free dolly: introducing the world first truly open instruction-tuned LLM. Company Blog, 2023"},{"key":"4128_CR54","volume-title":"Code Alpaca: an instruction-following LLaMA model for code generation","author":"S Chaudhary","year":"2023","unstructured":"Chaudhary S. Code Alpaca: an instruction-following LLaMA model for code generation. https:\/\/github.com\/sahil280114\/codealpaca, 2023"},{"key":"4128_CR55","volume-title":"Proceedings of the Eleventh International Conference on Learning Representations (ICLR 2023)","author":"Z Zhang","year":"2023","unstructured":"Zhang Z, Zhang A, Li M, et al. Automatic chain of thought prompting in large language models. In: Proceedings of the Eleventh International Conference on Learning Representations (ICLR 2023). 2023"},{"key":"4128_CR56","unstructured":"Gao L, Biderman S, Black S, et al. The Pile: an 800GB dataset of diverse text for language modeling. 2020. ArXiv:2101.00027"},{"key":"4128_CR57","first-page":"19822","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing System. Red Hook","author":"M Ding","year":"2021","unstructured":"Ding M, Yang Z, Hong W, et al. CogView: mastering text-to-image generation via transformers. In: Proceedings of the 35th International Conference on Neural Information Processing System. Red Hook, 2021. 19822\u201319835"},{"key":"4128_CR58","first-page":"12381","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems. Red Hook","author":"B Zhang","year":"2019","unstructured":"Zhang B, Sennrich R. Root mean square layer normalization. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems. Red Hook, 2019. 12381\u201312392"},{"key":"4128_CR59","unstructured":"Su J, Lu Y, Pan S, et al. RoFormer: enhanced transformer with rotary position embedding. 2021. ArXiv:2104.09864"},{"key":"4128_CR60","unstructured":"Shazeer N. GLU variants improve transformer. 2020. ArXiv:2002.05202"},{"key":"4128_CR61","doi-asserted-by":"crossref","unstructured":"Xue L, Constant N, Roberts A, et al. mT5: a massively multilingual pre-trained text-to-text transformer. 2020. ArXiv:2010.11934","DOI":"10.18653\/v1\/2021.naacl-main.41"},{"key":"4128_CR62","first-page":"49","volume-title":"Proceedings of Feature Extraction: Modern Questions and Challenges at NIPS 2015. PMLR","author":"E Barshan","year":"2015","unstructured":"Barshan E, Fieguth P. Stage-wise training: an improved feature learning strategy for deep models. In: Proceedings of Feature Extraction: Modern Questions and Challenges at NIPS 2015. PMLR, 2015. 49\u201359"},{"key":"4128_CR63","first-page":"7871","volume-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics","author":"M Lewis","year":"2019","unstructured":"Lewis M, Liu Y, Goyal N, et al. BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. 2019, 7871\u20137880"},{"key":"4128_CR64","doi-asserted-by":"crossref","unstructured":"Min S, Lewis M, Zettlemoyer L, et al. MetaiCL: learning to learn in context. 2021. ArXiv:2110.15943","DOI":"10.18653\/v1\/2022.naacl-main.201"},{"key":"4128_CR65","unstructured":"Guan J, Mao X, Fan C, et al. Long text generation by modeling sentence-level and discourse-level coherence. 2021. ArXiv:2105.08963"},{"key":"4128_CR66","series-title":"White paper","first-page":"55","volume-title":"Introduction to infiniband for end users","author":"P Grun","year":"2010","unstructured":"Grun P. Introduction to infiniband for end users. White paper, InfiniBand Trade Association, 2010, 55"},{"key":"4128_CR67","unstructured":"Shoeybi M, Patwary M, Puri R, et al. Megatron-LM: training multi-billion parameter language models using model parallelism. 2019. ArXiv:1909.08053"},{"key":"4128_CR68","first-page":"1","volume-title":"Proceedings of SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE","author":"S Rajbhandari","year":"2020","unstructured":"Rajbhandari S, Rasley J, Ruwase O, et al. ZeRO: memory optimizations toward training trillion parameter models. In: Proceedings of SC20: International Conference for High Performance Computing, Networking, Storage and Analysis. IEEE, 2020. 1\u201316"},{"key":"4128_CR69","unstructured":"Nijkamp E, Pang B, Hayashi H, et al. CodeGen: an open large language model for code with multi-turn program synthesis. 2022. ArXiv:2203.13474"},{"key":"4128_CR70","unstructured":"Zhou K, Zhu Y, Chen Z, et al. Don\u2019t make your LLM an evaluation benchmark cheater. 2023. ArXiv:2311.01964"},{"key":"4128_CR71","doi-asserted-by":"crossref","unstructured":"Bandarkar L, Liang D, Muller B, et al. The belebele benchmark: a parallel reading comprehension dataset in 122 language variants. 2023. ArXiv:2308.16884","DOI":"10.18653\/v1\/2024.acl-long.44"},{"key":"4128_CR72","doi-asserted-by":"crossref","unstructured":"Suzgun M, Scales N, Sch\u00e4rli N, et al. Challenging big-bench tasks and whether chain-of-thought can solve them. 2022. ArXiv:2210.09261","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"4128_CR73","first-page":"795","volume-title":"Proceedings of Machine Learning and Systems","author":"C J Wu","year":"2022","unstructured":"Wu C J, Raghavendra R, Gupta U, et al. Sustainable AI: environmental implications, challenges and opportunities. In: Proceedings of Machine Learning and Systems. 2022. 795\u2013813"},{"key":"4128_CR74","unstructured":"Li Z, Zhang S, Zhao H, et al. BatGPT: a bidirectional autoregessive talker from generative pre-trained transformer. 2023. ArXiv:2307.00360"},{"key":"4128_CR75","unstructured":"Devlin J, Chang M W, Lee K, et al. BERT: pre-training of deep bidirectional transformers for language understanding. 2018. ArXiv:1810.04805"},{"key":"4128_CR76","doi-asserted-by":"crossref","unstructured":"Liang D, Gonen H, Mao Y, et al. XLM-V: overcoming the vocabulary bottleneck in multilingual masked language models. 2023. ArXiv:2301.10472","DOI":"10.18653\/v1\/2023.emnlp-main.813"},{"key":"4128_CR77","doi-asserted-by":"crossref","unstructured":"Chi Z, Dong L, Wei F, et al. InfoXLM: an information-theoretic framework for cross-lingual language model pre-training. 2020. ArXiv:2007.07834","DOI":"10.18653\/v1\/2021.naacl-main.280"},{"key":"4128_CR78","doi-asserted-by":"publisher","first-page":"522","DOI":"10.1162\/tacl_a_00474","volume":"10","author":"N Goyal","year":"2022","unstructured":"Goyal N, Gao C, Chaudhary V, et al. The flores-101 evaluation benchmark for low-resource and multilingual machine translation. Trans Assoc Comput Linguistics, 2022, 10: 522\u2013538","journal-title":"Trans Assoc Comput Linguistics"},{"key":"4128_CR79","doi-asserted-by":"publisher","first-page":"531","DOI":"10.1007\/978-3-030-60450-9_42","volume-title":"Proceedings of CCF International Conference on Natural Language Processing and Chinese Computing. Springer","author":"X Liu","year":"2020","unstructured":"Liu X, Zhang C, Chen X, et al. CLTS: a new chinese long text summarization dataset. In: Proceedings of CCF International Conference on Natural Language Processing and Chinese Computing. Springer, 2020. 531\u2013542"},{"key":"4128_CR80","first-page":"665","volume-title":"Proceedings of Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies. Portland","author":"W Coster","year":"2011","unstructured":"Coster W, Kauchak D. Simple english wikipedia: a new text simplification task. In: Proceedings of Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies. Portland, 2011. 665\u2013669"},{"key":"4128_CR81","first-page":"839","volume-title":"Proceedings of Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. San Diego","author":"N Mostafazadeh","year":"2016","unstructured":"Mostafazadeh N, Chambers N, He X, et al. A corpus and cloze evaluation for deeper understanding of commonsense stories. In: Proceedings of Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. San Diego, 2016. 839\u2013849"},{"key":"4128_CR82","first-page":"74","volume-title":"Proceedings of Text Summarization Branches Out. Barcelona","author":"C Y Lin","year":"2004","unstructured":"Lin C Y. ROUGE: a package for automatic evaluation of summaries. In: Proceedings of Text Summarization Branches Out. Barcelona, 2004. 74\u201381"},{"key":"4128_CR83","doi-asserted-by":"publisher","first-page":"186","DOI":"10.18653\/v1\/W18-6319","volume-title":"Proceedings of Proceedings of the Third Conference on Machine Translation: Research Papers","author":"M Post","year":"2018","unstructured":"Post M. A call for clarity in reporting BLEU scores. In: Proceedings of Proceedings of the Third Conference on Machine Translation: Research Papers, 2018. 186\u2013191"},{"key":"4128_CR84","doi-asserted-by":"crossref","unstructured":"Li J, Galley M, Brockett C, et al. A diversity-promoting objective function for neural conversation models. 2015. ArXiv:1510.03055","DOI":"10.18653\/v1\/N16-1014"},{"key":"4128_CR85","unstructured":"Shao Z, Huang M, Wen J, et al. Long and diverse text generation with planning-based hierarchical variational model. 2019. ArXiv:1908.06605"},{"key":"4128_CR86","first-page":"4816","volume-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems. Red Hook","author":"K Pillutla","year":"2021","unstructured":"Pillutla K, Swayamdipta S, Zellers R, et al. MAUVE: measuring the gap between neural text and human text using divergence frontiers. In: Proceedings of the 35th International Conference on Neural Information Processing Systems. Red Hook, 2021. 4816\u20134828"},{"key":"4128_CR87","volume-title":"GPT-J-6B: a 6 billion parameter autoregressive language model","author":"B Wang","year":"2021","unstructured":"Wang B, Komatsuzaki A. GPT-J-6B: a 6 billion parameter autoregressive language model. https:\/\/github.com\/kingoflolz\/mesh-transformer-jax, May 2021"},{"key":"4128_CR88","first-page":"23716","volume-title":"Proceedings of 36th Conference on Neural Information Processing Systems","author":"J B Alayrac","year":"2022","unstructured":"Alayrac J B, Donahue J, Luc P, et al. Flamingo: a visual language model for few-shot learning. In: Proceedings of 36th Conference on Neural Information Processing Systems. 2022. 23716\u201323736"},{"key":"4128_CR89","first-page":"46595","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems. New Orleans","author":"L Zheng","year":"2023","unstructured":"Zheng L, Chiang W L, Sheng Y, et al. Judging LLM-as-a-judge with mT-bench and chatbot arena. In: Proceedings of the 37th International Conference on Neural Information Processing Systems. New Orleans, 2023. 46595\u201346623"},{"key":"4128_CR90","unstructured":"Rawte V, Sheth A, Das A. A survey of hallucination in large foundation models. 2023. ArXiv:2309.05922"},{"key":"4128_CR91","doi-asserted-by":"publisher","first-page":"2299","DOI":"10.18653\/v1\/2024.findings-naacl.149","volume-title":"Proceedings of Findings of the Association for Computational Linguistics: EACL 2024. St. Julian\u2019s","author":"J Zhang","year":"2024","unstructured":"Zhang J, Qian K, Liu Z, et al. DialogStudio: towards richest and most diverse unified dataset collection for conversational AI. In: Proceedings of Findings of the Association for Computational Linguistics: EACL 2024. St. Julian\u2019s, 2024. 2299\u20132315"},{"key":"4128_CR92","volume-title":"BELLE: be everyone\u2019s large language model engine","author":"Y Ji","year":"2023","unstructured":"Ji Y, Deng Y, Gong Y, et al. BELLE: be everyone\u2019s large language model engine. https:\/\/github.com\/LianjiaTech\/BELLE, 2023"},{"key":"4128_CR93","unstructured":"Luo Z, Xu C, Zhao P, et al. WizardCoder: empowering code large language models with Evol-Instruct. 2023. ArXiv:2306.08568"},{"key":"4128_CR94","unstructured":"Li X, Yu P, Zhou C, et al. Self-alignment with instruction backtranslation. 2023. ArXiv:2308.06259"},{"key":"4128_CR95","volume-title":"Free dolly: introducing the world\u2019s first truly open instruction-tuned LLM","author":"M Conover","year":"2023","unstructured":"Conover M, Hayes M, Mathur A, et al. Free dolly: introducing the world\u2019s first truly open instruction-tuned LLM. Company Blog, 2023"},{"key":"4128_CR96","unstructured":"Zhou C, Liu P, Xu P, et al. LIMA: less is more for alignment. 2023. ArXiv:2305.11206"},{"key":"4128_CR97","unstructured":"Schick T, Dwivedi-Yu J, Dess\u00ec R, et al. Toolformer: language models can teach themselves to use tools. 2023. ArXiv:2302.04761"},{"key":"4128_CR98","unstructured":"Wu C, Yin S, Qi W, et al. Visual ChatGPT: talking, drawing and editing with visual foundation models. 2023. ArXiv:2303.04671"},{"key":"4128_CR99","unstructured":"Zhou W, Zhang S, Gu Y, et al. UniversalNER: targeted distillation from large language models for open named entity recognition. 2023. ArXiv:2308.03279"},{"key":"4128_CR100","first-page":"8386","volume-title":"Proceedings of 2013 IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE","author":"J Liu","year":"2013","unstructured":"Liu J, Pasupat P, Cyphers S, et al. Asgard: a portable architecture for multilingual dialogue systems. In: Proceedings of 2013 IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, 2013. 8386\u20138390"},{"key":"4128_CR101","first-page":"13452","volume-title":"Proceedings of Proceedings of the AAAI Conference on Artificial Intelligence","author":"Z Liu","year":"2021","unstructured":"Liu Z, Xu Y, Yu T, et al. CrossNER: evaluating cross-domain named entity recognition. In: Proceedings of Proceedings of the AAAI Conference on Artificial Intelligence, 2021. 13452\u201313460"},{"key":"4128_CR102","unstructured":"Wang X, Zhou W, Zu C, et al. InstructUIE: multi-task instruction tuning for unified information extraction. 2023. ArXiv:2304.08085"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-023-4128-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-023-4128-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-023-4128-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T12:26:29Z","timestamp":1757420789000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-023-4128-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,19]]},"references-count":102,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2025,9]]}},"alternative-id":["4128"],"URL":"https:\/\/doi.org\/10.1007\/s11432-023-4128-3","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,19]]},"assertion":[{"value":"5 October 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 February 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 August 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"192103"}}