{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T09:10:02Z","timestamp":1751015402512,"version":"3.41.0"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T00:00:00Z","timestamp":1747008000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T00:00:00Z","timestamp":1747008000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100011246","name":"State Key Laboratory of Novel Software Technology","doi-asserted-by":"publisher","award":["KFKT2021B39"],"award-info":[{"award-number":["KFKT2021B39"]}],"id":[{"id":"10.13039\/501100011246","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s00521-025-11253-y","type":"journal-article","created":{"date-parts":[[2025,5,12]],"date-time":"2025-05-12T19:05:14Z","timestamp":1747076714000},"page":"14453-14470","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning economically for Chinese word segmentation: tuning pretrained model via active learning and N-gram preference"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2153-5824","authenticated-orcid":false,"given":"Zhiyuan","family":"Ma","sequence":"first","affiliation":[]},{"given":"Jiwei","family":"Qin","sequence":"additional","affiliation":[]},{"given":"Song","family":"Tang","sequence":"additional","affiliation":[]},{"given":"Jinpeng","family":"Mi","sequence":"additional","affiliation":[]},{"given":"Dan","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"11253_CR1","doi-asserted-by":"crossref","unstructured":"Bao Z, Li S, Gao S, et\u00a0al (2017) Neural domain adaptation with contextualized character embedding for Chinese word segmentation. In: proc. of natural language processing and Chinese computing (NLPCC), Springer. Springer, Dalian, China, pp 419\u2013430, doi 10.1007\/978-3-319-73618-1_35","DOI":"10.1007\/978-3-319-73618-1_35"},{"key":"11253_CR2","doi-asserted-by":"crossref","unstructured":"Cai T, Zhou Y, Zheng H (2020) Cost-quality adaptive active learning for Chinese clinical named entity recognition. In: proc. of IEEE international conference on bioinformatics and biomedicine (BIBM). IEEE, Seoul, Korea (South), pp 528\u2013533, doi 10.1109\/BIBM49941.2020.9313302","DOI":"10.1109\/BIBM49941.2020.9313302"},{"key":"11253_CR3","doi-asserted-by":"publisher","first-page":"12535","DOI":"10.1007\/s00521-021-05896-w","volume":"33","author":"T Cai","year":"2021","unstructured":"Cai T, Ma Z, Zheng H et al (2021) NE-LP: normalized entropy-and loss prediction-based sampling for active learning in Chinese word segmentation on EHRs. Neural Computing Appl 33:12535\u201312549. https:\/\/doi.org\/10.1007\/s00521-021-05896-w","journal-title":"Neural Computing Appl"},{"key":"11253_CR4","doi-asserted-by":"publisher","first-page":"1749","DOI":"10.1007\/s10994-020-05897-1","volume":"109","author":"HS Chang","year":"2020","unstructured":"Chang HS, Vembu S, Mohan S et al (2020) Using error decay prediction to overcome practical issues of deep active learning for named entity recognition. Mach Learning 109:1749\u20131778. https:\/\/doi.org\/10.1007\/s10994-020-05897-1","journal-title":"Mach Learning"},{"key":"11253_CR5","doi-asserted-by":"publisher","unstructured":"Chou TH, Lin CY, Kao HY (2023) Advancing multi-criteria Chinese word segmentation through criterion classification and denoising. In: Rogers A, Boyd-Graber J, Okazaki N (Eds) Proc. of the Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). Association for Computational Linguistics, Toronto, Canada, pp 6460\u20136476, https:\/\/doi.org\/10.18653\/v1\/2023.acl-long.356","DOI":"10.18653\/v1\/2023.acl-long.356"},{"key":"11253_CR6","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang MW, Lee K, et\u00a0al (2019) BERT: Pre-training of deep bidirectional transformers for language understanding. In: proc. of the conference of the North American chapter of the association for computational linguistics (NAACL), pp 4171\u20134186, https:\/\/doi.org\/10.18653\/v1\/N19-1423, https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"11253_CR7","doi-asserted-by":"publisher","unstructured":"Diao S, Bai J, Song Y, et\u00a0al (2020) ZEN: Pre-training Chinese text encoder enhanced by n-gram representations. In: proc. of findings of the association for computational linguistics: EMNLP, pp 4729\u20134740, https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.425, https:\/\/aclanthology.org\/2020.findings-emnlp.425","DOI":"10.18653\/v1\/2020.findings-emnlp.425"},{"key":"11253_CR8","doi-asserted-by":"crossref","unstructured":"Ding N, Long D, Xu G, et\u00a0al (2020) Coupling distant annotation and adversarial training for cross-domain Chinese word segmentation. In: proc. of the annual meeting of the association for computational linguistics (ACL). Association for Computational Linguistics, Online, pp 6662\u20136671 doi 10.18653\/v1\/2020.acl-main.595","DOI":"10.18653\/v1\/2020.acl-main.595"},{"issue":"9","key":"11253_CR9","doi-asserted-by":"publisher","first-page":"755","DOI":"10.1093\/bioinformatics\/14.9.755","volume":"14","author":"SR Eddy","year":"1998","unstructured":"Eddy SR (1998) Profile hidden Markov models. Bioinformatics 14(9):755\u2013763","journal-title":"Bioinformatics"},{"key":"11253_CR10","unstructured":"Emerson T (2005) The second international Chinese word segmentation bakeoff. In: proc. of the SIGHAN workshop on Chinese language processing (SIGHAN@IJCNLP\u201905), pp 123\u2013133, https:\/\/aclanthology.org\/I05-3017"},{"key":"11253_CR11","doi-asserted-by":"crossref","unstructured":"Fu J, Liu P, Zhang Q, et\u00a0al (2020) RethinkCWS: Is Chinese word segmentation a solved task? In: Proc. of Conference on Empirical Methods in Natural Language Processing (EMNLP). Association for Computational Linguistics, pp 5676\u20135686, 10.18653\/v1\/2020.emnlp-main.457","DOI":"10.18653\/v1\/2020.emnlp-main.457"},{"key":"11253_CR12","doi-asserted-by":"publisher","first-page":"2933","DOI":"10.1109\/TASLP.2020.3030487","volume":"28","author":"L Gan","year":"2020","unstructured":"Gan L, Zhang Y (2020) Investigating self-attention network for Chinese word segmentation. IEEE\/ACM Trans Audio, Speech, Lang Process 28:2933\u20132941","journal-title":"IEEE\/ACM Trans Audio, Speech, Lang Process"},{"key":"11253_CR13","doi-asserted-by":"crossref","unstructured":"Gong J, Chen X, Gui T, et\u00a0al (2019) Switch-LSTMs for multi-criteria Chinese word segmentation. In: proc. of the AAAI conference on artificial intelligence (AAAI), pp 6457\u20136464 doi 10.1609\/aaai.v33i01.33016457","DOI":"10.1609\/aaai.v33i01.33016457"},{"key":"11253_CR14","doi-asserted-by":"crossref","unstructured":"He R, Cai S, Ming Z, et\u00a0al (2022) Weighted self distillation for Chinese word segmentation. In: proc. of findings of the association for computational linguistics (Findings), pp 1757\u20131770 doi 10.18653\/v1\/2022.findings-acl.139","DOI":"10.18653\/v1\/2022.findings-acl.139"},{"key":"11253_CR15","doi-asserted-by":"crossref","unstructured":"Huang K, Huang D, Liu Z, et\u00a0al (2020) A joint multiple criteria model in transfer learning for cross-domain Chinese word segmentation. In: proc. of the conference on empirical methods in natural language processing (EMNLP). Association for computational linguistics, Online, pp 3873\u20133882 doi 10.18653\/v1\/2020.emnlp-main.318","DOI":"10.18653\/v1\/2020.emnlp-main.318"},{"key":"11253_CR16","doi-asserted-by":"crossref","unstructured":"Huang K, Yu H, Liu J, et\u00a0al (2021) Lexicon-based graph convolutional network for Chinese word segmentation. In: proc. of findings of the association for computational linguistics: EMNLP 2021. association for computational linguistics, pp 2908\u20132917, doi 10.18653\/v1\/2021.findings-emnlp.248","DOI":"10.18653\/v1\/2021.findings-emnlp.248"},{"issue":"2","key":"11253_CR17","doi-asserted-by":"publisher","first-page":"2109","DOI":"10.1109\/TNNLS.2022.3186855","volume":"35","author":"S Huang","year":"2024","unstructured":"Huang S, Wang T, Xiong H et al (2024) Temporal output discrepancy for loss estimation-based active learning. IEEE Trans Neural Netw Learning Syst 35(2):2109\u20132123. https:\/\/doi.org\/10.1109\/TNNLS.2022.3186855","journal-title":"IEEE Trans Neural Netw Learning Syst"},{"key":"11253_CR18","doi-asserted-by":"crossref","unstructured":"Jawahar G, Sagot B, Seddah D (2019) What does BERT learn about the structure of language? In: proc. of the annual meeting of the association for computational linguistics (ACL), pp 3651\u20133657 doi 10.18653\/v1\/P19-1356","DOI":"10.18653\/v1\/P19-1356"},{"key":"11253_CR19","doi-asserted-by":"crossref","unstructured":"Jiang P, Long D, Zhang Y, et\u00a0al (2022) Unsupervised boundary-aware language model pretraining for Chinese sequence labeling. In: Goldberg Y, Kozareva Z, Zhang Y (Eds) proceedings of the 2022 conference on empirical methods in natural language processing. association for computational linguistics, Abu Dhabi, United Arab Emirates, pp 526\u2013537 doi 10.18653\/v1\/2022.emnlp-main.34","DOI":"10.18653\/v1\/2022.emnlp-main.34"},{"key":"11253_CR20","doi-asserted-by":"crossref","unstructured":"Ke Z, Shi L, Sun S, et\u00a0al (2021) Pre-training with meta learning for Chinese word segmentation. In: proc. of the conference of the North American chapter of the association for computational linguistics (NAACL), pp 5514\u20135523, doi 10.18653\/v1\/2021.naacl-main.436","DOI":"10.18653\/v1\/2021.naacl-main.436"},{"key":"11253_CR21","unstructured":"Lafferty JD, McCallum A, Pereira F (2001) Conditional random fields: Probabilistic models for segmenting and labeling sequence data. In: proc. of conference on international conference on machine learning (ICML) doi 10.1145\/3605943"},{"key":"11253_CR22","doi-asserted-by":"publisher","first-page":"8928","DOI":"10.1007\/s10489-021-02269-7","volume":"51","author":"B Li","year":"2021","unstructured":"Li B, Xu W, Xu Z et al (2021) A two-domain coordinated sentence similarity scheme for question-answering robots regarding unpredictable outliers and non-orthogonal categories. Applied Intelligence 51:8928\u20138944. https:\/\/doi.org\/10.1007\/s10489-021-02269-7","journal-title":"Applied Intelligence"},{"key":"11253_CR23","unstructured":"Li S, Zhou G, Huang CR (2012) Active learning for Chinese word segmentation. In: proc. of international conference on computational linguistics (COLING), pp 683\u2013692, https:\/\/aclanthology.org\/C12-2067"},{"key":"11253_CR24","doi-asserted-by":"crossref","unstructured":"Li W, Song Y, Su Q, et\u00a0al (2022) Chinese word segmentation with BERT oriented probing and transformation. In: proc. of findings of the association for computational linguistics (Findings), pp 3935\u20133940 doi 10.18653\/v1\/2022.findings-acl.310","DOI":"10.18653\/v1\/2022.findings-acl.310"},{"key":"11253_CR25","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1016\/j.neucom.2019.01.085","volume":"338","author":"J Liu","year":"2019","unstructured":"Liu J, Wu F, Wu C et al (2019) Neural Chinese word segmentation with dictionary. Neurocomputing 338:46\u201354","journal-title":"Neurocomputing"},{"key":"11253_CR26","doi-asserted-by":"publisher","DOI":"10.1145\/3593023","author":"Y Ma","year":"2023","unstructured":"Ma Y, Zhang Y, Sangaiah AK et al (2023) Active learning for name entity recognition with external knowledge. ACM Trans Asian and Low-Resour Lang Inf Process. https:\/\/doi.org\/10.1145\/3593023","journal-title":"ACM Trans Asian and Low-Resour Lang Inf Process"},{"key":"11253_CR27","doi-asserted-by":"crossref","unstructured":"Maimaiti M, Liu Y, Zheng Y, et\u00a0al (2021) Segment, mask, and predict: Augmenting Chinese word segmentation with self-supervision. In: proc. of conference on empirical methods in natural language processing. association for computational linguistics, Online and Punta Cana, Dominican Republic, pp 2068\u20132077, doi 10.18653\/v1\/2021.emnlp-main.158","DOI":"10.18653\/v1\/2021.emnlp-main.158"},{"key":"11253_CR28","doi-asserted-by":"crossref","unstructured":"Nguyen DV, Vo LB, Thin DV, et\u00a0al (2021) Span labeling approach for vietnamese and Chinese word segmentation. In: Proc. of Pacific Rim international conference on artificial intelligence (PRICAI), Springer. Springer, Hanoi, Vietnam, pp 244\u2013258, doi 10.1007\/978-3-030-89363-7_19","DOI":"10.1007\/978-3-030-89363-7_19"},{"key":"11253_CR29","doi-asserted-by":"crossref","unstructured":"Qiu L, Zhang Y (2015) Word segmentation for Chinese novels. In: Proc. of the AAAI conference on artificial intelligence (AAAI), pp 2440\u20132446 doi 10.1609\/aaai.v29i1.9523","DOI":"10.1609\/aaai.v29i1.9523"},{"key":"11253_CR30","doi-asserted-by":"crossref","unstructured":"Schr\u00f6der C, Niekler A, Potthast M (2022) Revisiting uncertainty-based query strategies for active learning with transformers. In: findings of the association for computational linguistics: ACL 2022. association for computational linguistics, Dublin, Ireland, pp 2194\u20132203, 10.18653\/v1\/2022.findings-acl.172","DOI":"10.18653\/v1\/2022.findings-acl.172"},{"key":"11253_CR31","unstructured":"Shen S, Li Z, Qi G (2021) Active learning for event extraction with memory-based loss prediction model. arXiv preprint https:\/\/arxiv.org\/abs\/2112.03073"},{"issue":"11","key":"11253_CR32","doi-asserted-by":"publisher","first-page":"21201","DOI":"10.1109\/TITS.2022.3182371","volume":"23","author":"S Su","year":"2022","unstructured":"Su S, Qu J, Cao Y et al (2022) Adversarial training lattice LSTM for named entity recognition of rail fault texts. IEEE Trans Intell Transp Syst 23(11):21201\u201321215","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"11253_CR33","unstructured":"Tang X, Wang J, Su Q (2022) Chinese word segmentation with heterogeneous graph neural network. ArXiv preprint abs\/2201.08975. https:\/\/api.semanticscholar.org\/CorpusID:246240867"},{"key":"11253_CR34","doi-asserted-by":"crossref","unstructured":"Tian Y, Song Y, Ao X, et\u00a0al (2020a) Joint Chinese word segmentation and part-of-speech tagging via two-way attentions of auto-analyzed knowledge. In: Proc. of the annual meeting of the association for computational linguistics (ACL), pp 8286\u20138296, doi 10.18653\/v1\/2020.acl-main.735","DOI":"10.18653\/v1\/2020.acl-main.735"},{"key":"11253_CR35","doi-asserted-by":"crossref","unstructured":"Tian Y, Song Y, Xia F, et\u00a0al (2020b) Improving Chinese word segmentation with wordhood memory networks. In: Proc. of the Annual Meeting of the Association for Computational Linguistics (ACL), pp 8274\u20138285, doi 10.18653\/v1\/2020.acl-main.734","DOI":"10.18653\/v1\/2020.acl-main.734"},{"key":"11253_CR36","doi-asserted-by":"crossref","unstructured":"Tian Y, Chen G, Qin H, et\u00a0al (2021) Federated Chinese word segmentation with global character associations. In: findings of the association for computational linguistics: ACL-IJCNLP 2021. Association for Computational Linguistics, pp 4306\u20134313, https:\/\/aclanthology.org\/2021.findings-acl.376","DOI":"10.18653\/v1\/2021.findings-acl.376"},{"key":"11253_CR37","unstructured":"Wan C, Jin F, Qiao Z, et\u00a0al (2021) Unsupervised active learning with loss prediction. Neural Computing and Applications pp 1\u20139. https:\/\/link.springer.com\/article\/10.1007\/s00521-021-06480-y"},{"key":"11253_CR38","doi-asserted-by":"publisher","unstructured":"Wertz L, Bogojeska J, Mirylenka K, et\u00a0al (2023) Reinforced active learning for low-resource, domain-specific, multi-label text classification. In: findings of the association for computational linguistics: ACL 2023. Association for Computational Linguistics, Toronto, Canada, pp 10959\u201310977, https:\/\/doi.org\/10.18653\/v1\/2023.findings-acl.697, https:\/\/aclanthology.org\/2023.findings-acl.697","DOI":"10.18653\/v1\/2023.findings-acl.697"},{"key":"11253_CR39","doi-asserted-by":"crossref","unstructured":"Wu F, Liu J, Wu C, et\u00a0al (2019) Neural Chinese named entity recognition via cnn-lstm-crf and joint training with word segmentation. In: Proc. of The World Wide Web Conference (WWW), pp 3342\u20133348, doi 10.1145\/3308558.3313743","DOI":"10.1145\/3308558.3313743"},{"issue":"5","key":"11253_CR40","doi-asserted-by":"publisher","first-page":"384","DOI":"10.1109\/TAI.2021.3087116","volume":"2","author":"D Yan","year":"2021","unstructured":"Yan D, Cao H, Wang T et al (2021) Graph-based knowledge acquisition with convolutional networks for distribution network patrol robots. IEEE Trans Artif Intell 2(5):384\u2013393","journal-title":"IEEE Trans Artif Intell"},{"key":"11253_CR41","doi-asserted-by":"publisher","first-page":"78","DOI":"10.1162\/tacl_a_00301","volume":"8","author":"H Yan","year":"2020","unstructured":"Yan H, Qiu X, Huang X (2020) A graph-based model for joint Chinese word segmentation and dependency parsing. Trans Assoc Computational Linguist 8:78\u201392","journal-title":"Trans Assoc Computational Linguist"},{"issue":"1","key":"11253_CR42","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1186\/s12911-023-02127-1","volume":"23","author":"Q Ye","year":"2023","unstructured":"Ye Q, Cai T, Ji X et al (2023) Subsequence and distant supervision based active learning for relation extraction of Chinese medical texts. BMC Med Inform Decis Mak 23(1):34\u201345. https:\/\/doi.org\/10.1186\/s12911-023-02127-1","journal-title":"BMC Med Inform Decis Mak"},{"key":"11253_CR43","unstructured":"Ye Y, Zhang Y, Li W, et\u00a0al (2019) Improving cross-domain Chinese word segmentation with word embeddings. In: proc. of annual conference of the North American chapter of the association for computational linguistics (NAACL). Association for computational linguistics, Minneapolis, USA, pp 2726\u20132735, 10.18653\/v1\/N19-1279"},{"key":"11253_CR44","doi-asserted-by":"crossref","unstructured":"Yoo D, Kweon IS (2019) Learning loss for active learning. In: Proc. of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 93\u2013102","DOI":"10.1109\/CVPR.2019.00018"},{"issue":"5","key":"11253_CR45","doi-asserted-by":"publisher","first-page":"949","DOI":"10.1049\/cje.2021.00.363","volume":"31","author":"H Yu","year":"2022","unstructured":"Yu H, Huang K, Wang Y et al (2022) Lexicon-augmented cross-domain Chinese word segmentation with graph convolutional network. Chin J Electron 31(5):949\u2013957","journal-title":"Chin J Electron"},{"key":"11253_CR46","doi-asserted-by":"crossref","unstructured":"Zhang Z, Strubell E, Hovy E (2022) A survey of active learning for natural language processing. In: proc. of the conference on empirical methods in natural language processing (EMNLP). Association for Computational Linguistics, pp 6166\u20136190, 10.18653\/v1\/2022.emnlp-main.414","DOI":"10.18653\/v1\/2022.emnlp-main.414"},{"key":"11253_CR47","doi-asserted-by":"crossref","unstructured":"Zhao X, Yang M, Qu Q, et\u00a0al (2020) Improving neural Chinese word segmentation with lexicon-enhanced adaptive attention. In: Proc. international conference on research and development in information retrieval (SIGIR), pp 1953\u20131956, 10.1145\/3397271.3401328","DOI":"10.1145\/3397271.3401328"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11253-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-025-11253-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-025-11253-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,27]],"date-time":"2025-06-27T08:29:57Z","timestamp":1751012997000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-025-11253-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,12]]},"references-count":47,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["11253"],"URL":"https:\/\/doi.org\/10.1007\/s00521-025-11253-y","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2025,5,12]]},"assertion":[{"value":"24 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 May 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}