{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T07:09:07Z","timestamp":1776150547870,"version":"3.50.1"},"reference-count":45,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,3,27]],"date-time":"2024-03-27T00:00:00Z","timestamp":1711497600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,3,27]],"date-time":"2024-03-27T00:00:00Z","timestamp":1711497600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci. China Inf. Sci."],"published-print":{"date-parts":[[2024,5]]},"DOI":"10.1007\/s11432-021-3536-5","type":"journal-article","created":{"date-parts":[[2024,4,12]],"date-time":"2024-04-12T11:02:01Z","timestamp":1712919721000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":41,"title":["CPT: a pre-trained unbalanced transformer for both Chinese language understanding and generation"],"prefix":"10.1007","volume":"67","author":[{"given":"Yunfan","family":"Shao","sequence":"first","affiliation":[]},{"given":"Zhichao","family":"Geng","sequence":"additional","affiliation":[]},{"given":"Yitao","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Junqi","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Hang","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Fei","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhe","family":"Li","sequence":"additional","affiliation":[]},{"given":"Hujun","family":"Bao","sequence":"additional","affiliation":[]},{"given":"Xipeng","family":"Qiu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,27]]},"reference":[{"key":"3536_CR1","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"X P Qiu","year":"2020","unstructured":"Qiu X P, Sun T X, Xu Y G, et al. Pre-trained models for natural language processing: a survey. Sci China Tech Sci, 2020, 63: 1872\u20131897","journal-title":"Sci China Tech Sci"},{"key":"3536_CR2","unstructured":"Devlin J, Chang M, Lee K, et al. BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2019. 4171\u20134186"},{"key":"3536_CR3","unstructured":"Liu Y, Ott M, Goyal N, et al. RoBERTa: a robustly optimized BERT pretraining approach. 2019. ArXiv:1907.11692"},{"key":"3536_CR4","doi-asserted-by":"crossref","unstructured":"Lewis M, Liu Y, Goyal N, et al. BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 2020. 7871\u20137880","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"3536_CR5","unstructured":"Radford A, Narasimhan K, Salimans T, et al. Improving language understanding by generative pre-training. 2018. https:\/\/www.cs.ubc.ca\/\u223camuham01\/LING530\/papers\/radford2018improving.pdf"},{"key":"3536_CR6","unstructured":"Cui Y M, Che W X, Liu T, et al. Pre-training with whole word masking for Chinese BERT. 2019. ArXiv:1906.08101"},{"key":"3536_CR7","unstructured":"Sun Y, Wang S H, Li Y K, et al. ERNIE: enhanced representation through knowledge integration. 2019. ArXiv:1904.09223"},{"key":"3536_CR8","unstructured":"Wei J Q, Ren X Z, Li X G, et al. NEZHA: neural contextualized representation for Chinese language understanding. 2019. ArXiv:1909.00204"},{"key":"3536_CR9","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1016\/j.aiopen.2021.07.001","volume":"2","author":"Z Zhang","year":"2021","unstructured":"Zhang Z, Han X, Zhou H, et al. CPM: a large-scale generative Chinese pre-trained language model. AI Open, 2021, 2: 93\u201399","journal-title":"AI Open"},{"key":"3536_CR10","doi-asserted-by":"publisher","first-page":"216","DOI":"10.1016\/j.aiopen.2021.12.003","volume":"2","author":"Z Zhang","year":"2021","unstructured":"Zhang Z, Gu Y, Han X, et al. CPM-2: large-scale cost-effective pre-trained language models. AI Open, 2021, 2: 216\u2013224","journal-title":"AI Open"},{"key":"3536_CR11","unstructured":"Zeng W, Ren X Z, Su T, et al. Pangu-\u03b1: large-scale autoregressive pretrained Chinese language models with auto-parallel computation. 2021. ArXiv:2104.12369"},{"key":"3536_CR12","unstructured":"Dong L, Yang N, Wang W H, et al. Unified language model pre-training for natural language understanding and generation. In: Proceedings of the 33rd International Conference on Neural Information Processing Systems, 2019. 13063\u201313075"},{"key":"3536_CR13","unstructured":"Bao H B, Dong L, Wei F R, et al. UniLMv2: pseudo-masked language models for unified language model pre-training. In: Proceedings of the 37th International Conference on Machine Learning, 2020. 642\u2013652"},{"key":"3536_CR14","unstructured":"Du Z X, Qian Y J, Liu X, et al. All NLP tasks are generation tasks: a general pretraining framework. 2021. ArXiv:2103.10360"},{"key":"3536_CR15","doi-asserted-by":"crossref","unstructured":"Bi B, Li C L, Wu C, et al. PALM: pre-training an autoencoding&autoregressive language model for context-conditioned generation. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, 2020. 8681\u20138691","DOI":"10.18653\/v1\/2020.emnlp-main.700"},{"key":"3536_CR16","unstructured":"Sun Y, Wang S H, Feng S K, et al. ERNIE 3.0: large-scale knowledge enhanced pre-training for language understanding and generation. 2021. ArXiv:2107.02137"},{"key":"3536_CR17","doi-asserted-by":"crossref","unstructured":"Diao S Z, Bai J X, Song Y, et al. ZEN: pre-training Chinese text encoder enhanced by n-gram representations. In: Proceedings of the Findings of the Association for Computational Linguistics, 2020. 4729\u20134740","DOI":"10.18653\/v1\/2020.findings-emnlp.425"},{"key":"3536_CR18","doi-asserted-by":"crossref","unstructured":"Sun Y, Wang S H, Li Y K, et al. ERNIE 2.0: a continual pre-training framework for language understanding. In: Proceedings of the AAAI Technical Track: Natural Language Processing, 2020. 8968\u20138975","DOI":"10.1609\/aaai.v34i05.6428"},{"key":"3536_CR19","doi-asserted-by":"crossref","unstructured":"Sun Z J, Li X Y, Sun X F, et al. Chinesebert: Chinese pretraining enhanced by glyph and pinyin information. 2021. ArXiv:2106.16038","DOI":"10.18653\/v1\/2021.acl-long.161"},{"key":"3536_CR20","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel C, Shazeer N, Roberts A, et al. Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res, 2020, 21: 5485\u20135551","journal-title":"J Mach Learn Res"},{"key":"3536_CR21","doi-asserted-by":"crossref","unstructured":"Dou Z, Liu P F, Hayashi H, et al. GSum: a general framework for guided neural abstractive summarization. In: Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021. 4830\u20134842","DOI":"10.18653\/v1\/2021.naacl-main.384"},{"key":"3536_CR22","doi-asserted-by":"crossref","unstructured":"Liu Y X, Liu P F. SimCLS: A simple framework for contrastive learning of abstractive summarization. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics, 2021. 1065\u20131072","DOI":"10.18653\/v1\/2021.acl-short.135"},{"key":"3536_CR23","doi-asserted-by":"crossref","unstructured":"Lin Z J, Madotto A, Winata G I, et al. MinTL: minimalist transfer learning for task-oriented dialogue systems. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), 2020. 3391\u20133405","DOI":"10.18653\/v1\/2020.emnlp-main.273"},{"key":"3536_CR24","doi-asserted-by":"crossref","unstructured":"Liu X, He P, Chen W, et al. Multi-task deep neural networks for natural language understanding. In: Proceedings of the 57th Conference of the Association for Computational Linguistics, 2019. 4487\u20134496","DOI":"10.18653\/v1\/P19-1441"},{"key":"3536_CR25","doi-asserted-by":"crossref","unstructured":"Aghajanyan A, Gupta A, Shrivastava A, et al. Muppet: massive multi-task representations with pre-finetuning. 2021. ArXiv:2101.11038","DOI":"10.18653\/v1\/2021.emnlp-main.468"},{"key":"3536_CR26","unstructured":"Wei J, Bosma M, Zhao V Y, et al. Finetuned language models are zero-shot learners. 2021. ArXiv:2109.01652"},{"key":"3536_CR27","unstructured":"Kasai J, Pappas N, Peng H, et al. Deep encoder, shallow decoder: reevaluating non-autoregressive machine translation. In: Proceedings of International Conference on Learning Representations, 2021"},{"key":"3536_CR28","doi-asserted-by":"crossref","unstructured":"Sun X, Ge T, Wei F R, et al. Instantaneous grammatical error correction with shallow aggressive decoding. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics, 2021. 5937\u20135947","DOI":"10.18653\/v1\/2021.acl-long.462"},{"key":"3536_CR29","doi-asserted-by":"crossref","unstructured":"Schick T, Sch\u00fctze H. It\u2019s not just size that matters: small language models are also few-shot learners. In: Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021. 2339\u20132352","DOI":"10.18653\/v1\/2021.naacl-main.185"},{"key":"3536_CR30","doi-asserted-by":"crossref","unstructured":"Gao T Y, Fisch A, Chen D Q. Making pre-trained language models better few-shot learners. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics, 2021. 3816\u20133830","DOI":"10.18653\/v1\/2021.acl-long.295"},{"key":"3536_CR31","unstructured":"Liu P F, Yuan W Z, Fu J L, et al. Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. 2021. ArXiv:2107.13586"},{"key":"3536_CR32","doi-asserted-by":"crossref","unstructured":"Xu L, Hu H, Zhang X W, et al. CLUE: a Chinese language understanding evaluation benchmark. In: Proceedings of the 28th International Conference on Computational Linguistics, 2020. 4762\u20134772","DOI":"10.18653\/v1\/2020.coling-main.419"},{"key":"3536_CR33","unstructured":"Zhang X S, Li P S, Li H. AMBERT: a pre-trained language model with multi-grained tokenization. 2020. ArXiv:2008.11869"},{"key":"3536_CR34","unstructured":"Emerson T. The second international Chinese word segmentation bakeoff. In: Proceedings of the 4th SIGHAN Workshop on Chinese Language Processing, 2005"},{"key":"3536_CR35","unstructured":"Levow G. The third international Chinese language processing bakeoff: word segmentation and named entity recognition. In: Proceedings of the 5th SIGHAN Workshop on Chinese Language Processing, 2006. 108\u2013117"},{"key":"3536_CR36","doi-asserted-by":"crossref","unstructured":"Li X N, Shao Y F, Sun T X, et al. Accelerating BERT inference for sequence labeling via early-exit. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics, 2021. 189\u2013199","DOI":"10.18653\/v1\/2021.acl-long.16"},{"key":"3536_CR37","doi-asserted-by":"crossref","unstructured":"Li X B, Yan H, Qiu X P, et al. FLAT: Chinese NER using flat-lattice transformer. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 2020. 6836\u20136842","DOI":"10.18653\/v1\/2020.acl-main.611"},{"key":"3536_CR38","doi-asserted-by":"crossref","unstructured":"Qiu X P, Pei H Z, Yan H, et al. A concise model for multi-criteria Chinese word segmentation with transformer encoder. In: Proceedings of the Findings of the Association for Computational Linguistics, 2020. 2887\u20132897","DOI":"10.18653\/v1\/2020.findings-emnlp.260"},{"key":"3536_CR39","doi-asserted-by":"crossref","unstructured":"Cui Y M, Liu T, Che W X, et al. A span-extraction dataset for Chinese machine reading comprehension. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, 2019. 5882\u20135888","DOI":"10.18653\/v1\/D19-1600"},{"key":"3536_CR40","unstructured":"Shao C C, Liu T, Lai Y T, et al. DRCD: a Chinese machine reading comprehension dataset. 2018. ArXiv:1806.00920"},{"key":"3536_CR41","doi-asserted-by":"crossref","unstructured":"Cui Y M, Che W X, Liu T, et al. Revisiting pre-trained models for Chinese natural language processing. In: Proceedings of the Findings of the Association for Computational Linguistics, 2020. 657\u2013668","DOI":"10.18653\/v1\/2020.findings-emnlp.58"},{"key":"3536_CR42","doi-asserted-by":"crossref","unstructured":"Hu B T, Chen Q C, Zhu F Z. LCSTS: a large scale Chinese short text summarization dataset. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, 2015. 1967\u20131972","DOI":"10.18653\/v1\/D15-1229"},{"key":"3536_CR43","doi-asserted-by":"crossref","unstructured":"Shao Z H, Huang M L, Wen J T, et al. Long and diverse text generation with planning-based hierarchical variational model. In: Proceedings of the Conference on Empirical Methods in Natural Language Processing, 2019. 3255\u20133266","DOI":"10.18653\/v1\/D19-1321"},{"key":"3536_CR44","doi-asserted-by":"publisher","first-page":"726","DOI":"10.1162\/tacl_a_00343","volume":"8","author":"Y Liu","year":"2020","unstructured":"Liu Y, Gu J, Goyal N, et al. Multilingual denoising pre-training for neural machine translation. Trans Assoc Comput Linguist, 2020, 8: 726\u2013742","journal-title":"Trans Assoc Comput Linguist"},{"key":"3536_CR45","doi-asserted-by":"crossref","unstructured":"Xue L T, Constant N, Roberts A, et al. mT5: a massively multilingual pre-trained text-to-text transformer. In: Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021. 483\u2013498","DOI":"10.18653\/v1\/2021.naacl-main.41"}],"container-title":["Science China Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-021-3536-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11432-021-3536-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11432-021-3536-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T19:52:43Z","timestamp":1750362763000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11432-021-3536-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,27]]},"references-count":45,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,5]]}},"alternative-id":["3536"],"URL":"https:\/\/doi.org\/10.1007\/s11432-021-3536-5","relation":{},"ISSN":["1674-733X","1869-1919"],"issn-type":[{"value":"1674-733X","type":"print"},{"value":"1869-1919","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,3,27]]},"assertion":[{"value":"22 December 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 April 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 March 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"152102"}}