{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,5]],"date-time":"2026-03-05T02:26:43Z","timestamp":1772677603485,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:00:00Z","timestamp":1760054400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T00:00:00Z","timestamp":1760054400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","award":["No.202406410051"],"award-info":[{"award-number":["No.202406410051"]}],"id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Earth Sci Inform"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s12145-025-02007-4","type":"journal-article","created":{"date-parts":[[2025,10,10]],"date-time":"2025-10-10T09:07:17Z","timestamp":1760087237000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["LLM-GeoCWS: a semi-supervised chinese word segmentation method using large language model for geoscience domain"],"prefix":"10.1007","volume":"18","author":[{"given":"Deping","family":"Chu","sequence":"first","affiliation":[]},{"given":"Zhuo","family":"Tan","sequence":"additional","affiliation":[]},{"given":"Bo","family":"Wan","sequence":"additional","affiliation":[]},{"given":"Fang","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Shunping","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Yuanjian","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Mingqing","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Yan","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,10]]},"reference":[{"key":"2007_CR1","unstructured":"Achiam J, Adler S, Agarwal S et al (2023) Gpt-4 technical report. arXiv preprint arXiv:230308774"},{"key":"2007_CR2","doi-asserted-by":"publisher","first-page":"2318","DOI":"10.18653\/v1\/2024.findings-acl.137","volume-title":"Findings of the association for computational linguistics: ACL 2024","author":"J Chen","year":"2024","unstructured":"Chen J, Xiao S, Zhang P et al (2024) M3-Embedding: Multi-Linguality, Multi-Functionality, Multi-Granularity text embeddings through Self-Knowledge distillation. In: Ku L-W, Martins A, Srikumar V (eds) Findings of the association for computational linguistics: ACL 2024. Association for Computational Linguistics, Bangkok, Thailand, pp 2318\u20132335"},{"key":"2007_CR3","first-page":"3039","volume":"46","author":"DP Chu","year":"2021","unstructured":"Chu DP, Wan B, Li H et al (2021) Geological entity recognition based on ELMO-CNN-BiLSTM-CRF model. Earth Sci 46:3039\u20133048","journal-title":"Earth Sci"},{"key":"2007_CR4","doi-asserted-by":"publisher","first-page":"2169","DOI":"10.1080\/13658816.2022.2087224","volume":"36","author":"D Chu","year":"2022","unstructured":"Chu D, Wan B, Li H et al (2022) A machine learning approach to extracting spatial information from geological texts in Chinese. Int J Geogr Inf Sci 36:2169\u20132193. https:\/\/doi.org\/10.1080\/13658816.2022.2087224","journal-title":"Int J Geogr Inf Sci"},{"key":"2007_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.126378","volume":"268","author":"D Chu","year":"2025","unstructured":"Chu D, Wan B, Ni H et al (2025) Geosmie: an event extraction framework for document-level spatial morphological information extraction. Expert Syst Appl 268:126378","journal-title":"Expert Syst Appl"},{"key":"2007_CR6","doi-asserted-by":"crossref","unstructured":"Diao S, Bai J, Song Y et al (2020) ZEN: Pre-training Chinese text encoder enhanced by N-gram representations. In: Findings of the Association for Computational Linguistics: EMNLP 2020. pp 4729\u20134740","DOI":"10.18653\/v1\/2020.findings-emnlp.425"},{"key":"2007_CR7","doi-asserted-by":"publisher","first-page":"2933","DOI":"10.1109\/TASLP.2020.3030487","volume":"28","author":"L Gan","year":"2020","unstructured":"Gan L, Zhang Y (2020) Investigating self-attention network for Chinese word segmentation. IEEE ACM Trans Audio Speech Lang Process 28:2933\u20132941","journal-title":"IEEE ACM Trans Audio Speech Lang Process"},{"key":"2007_CR8","doi-asserted-by":"crossref","unstructured":"He R, Cai S, Ming Z, Zhang J (2022) Weighted self distillation for chinese word segmentation. In: Findings of the Association for Computational Linguistics: ACL 2022. pp 1757\u20131770","DOI":"10.18653\/v1\/2022.findings-acl.139"},{"key":"2007_CR9","doi-asserted-by":"crossref","unstructured":"Huang K, Yu H, Liu J et al (2021) Lexicon-based graph convolutional network for Chinese word segmentation. In: Findings of the Association for Computational Linguistics: EMNLP 2021. pp 2908\u20132917","DOI":"10.18653\/v1\/2021.findings-emnlp.248"},{"key":"2007_CR10","unstructured":"Kenton JDM-WC, Toutanova LK (2019) BERT: Pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT. pp 4171\u20134186"},{"key":"2007_CR11","doi-asserted-by":"crossref","unstructured":"Li X, Meng Y, Sun X et al (2019) Is word segmentation necessary for deep learning of Chinese representations? In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. pp 3242\u20133252","DOI":"10.18653\/v1\/P19-1314"},{"key":"2007_CR12","doi-asserted-by":"publisher","DOI":"10.1029\/2021EA001673","volume":"8","author":"W Li","year":"2021","unstructured":"Li W, Ma K, Qiu Q et al (2021) Chinese word segmentation based on self-learning model and geological knowledge for the geoscience domain. Earth Space Sci 8:e2021EA001673. https:\/\/doi.org\/10.1029\/2021EA001673","journal-title":"Earth Space Sci"},{"key":"2007_CR13","unstructured":"Liu Y, Ott M, Goyal N et al (2019) RoBERTa: a robustly optimized BERT pretraining approach"},{"key":"2007_CR14","doi-asserted-by":"publisher","first-page":"166488","DOI":"10.1109\/ACCESS.2021.3136567","volume":"9","author":"BH Ngo","year":"2021","unstructured":"Ngo BH, Kim JH, Chae YJ, Cho SI (2021a) Multi-view collaborative learning for semi-supervised domain adaptation. IEEE Access 9:166488\u2013166501","journal-title":"IEEE Access"},{"key":"2007_CR15","doi-asserted-by":"publisher","first-page":"128467","DOI":"10.1109\/ACCESS.2021.3110605","volume":"9","author":"BH Ngo","year":"2021","unstructured":"Ngo BH, Park JH, Park SJ, Cho SI (2021b) Semi-supervised domain adaptation using explicit class-wise matching for domain-invariant and class-discriminative feature learning. IEEE Access 9:128467\u2013128480","journal-title":"IEEE Access"},{"key":"2007_CR16","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i6.32662","author":"BH Ngo","year":"2025","unstructured":"Ngo BH, Bui DC, Do-Tran N-T, Choi TJ (2025a) Higda: hierarchical graph of nodes to learn local-to-global topology for semi-supervised domain adaptation. Proceedings of the AAAI Conference on Artificial Intelligence. https:\/\/doi.org\/10.1609\/aaai.v39i6.32662","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"2007_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2025.126597","volume":"271","author":"BH Ngo","year":"2025","unstructured":"Ngo BH, Bui DC, Choi TJ (2025b) How to enrich cross-domain representations? Data augmentation, cycle-pseudo labeling, and category-aware graph learning. Expert Syst Appl 271:126597","journal-title":"Expert Syst Appl"},{"key":"2007_CR18","doi-asserted-by":"publisher","first-page":"26797","DOI":"10.1007\/s10489-023-04950-5","volume":"53","author":"JH Park","year":"2023","unstructured":"Park JH, Kim JH, Ngo BH et al (2023) Adversarial representation teaching with perturbation-agnostic student-teacher structure for semi-supervised learning. Appl Intell 53:26797\u201326809. https:\/\/doi.org\/10.1007\/s10489-023-04950-5","journal-title":"Appl Intell"},{"key":"2007_CR19","doi-asserted-by":"crossref","unstructured":"Pei W, Ge T, Chang B (2014) Max-margin tensor neural network for Chinese word segmentation. In: Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers). pp 293\u2013303","DOI":"10.3115\/v1\/P14-1028"},{"key":"2007_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cageo.2018.08.006","volume":"121","author":"Q Qiu","year":"2018","unstructured":"Qiu Q, Xie Z, Wu L, Li W (2018) Dgeosegmenter: a dictionary-based Chinese word segmenter for the geoscience domain. Comput Geosci 121:1\u201311","journal-title":"Comput Geosci"},{"key":"2007_CR21","doi-asserted-by":"publisher","first-page":"565","DOI":"10.1007\/s12145-019-00390-3","volume":"12","author":"Q Qiu","year":"2019","unstructured":"Qiu Q, Xie Z, Wu L et al (2019) BiLSTM-CRF for geological named entity recognition from the geoscience literature. Earth Sci Inform 12:565\u2013579. https:\/\/doi.org\/10.1007\/s12145-019-00390-3","journal-title":"Earth Sci Inform"},{"key":"2007_CR22","doi-asserted-by":"publisher","first-page":"1393","DOI":"10.1007\/s12145-020-00527-9","volume":"13","author":"Q Qiu","year":"2020","unstructured":"Qiu Q, Xie Z, Wu L, Tao L (2020) Automatic spatiotemporal and semantic information extraction from unstructured geoscience reports using text mining techniques. Earth Sci Inf 13:1393\u20131410. https:\/\/doi.org\/10.1007\/s12145-020-00527-9","journal-title":"Earth Sci Inf"},{"key":"2007_CR23","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1080\/19475683.2023.2186487","volume":"29","author":"Q Qiu","year":"2023","unstructured":"Qiu Q, Xie Z, Ma K, Tian M (2023) Bertcws: unsupervised multi-granular Chinese word segmentation based on a BERT method for the geoscience domain. Ann GIS 29:387\u2013399. https:\/\/doi.org\/10.1080\/19475683.2023.2186487","journal-title":"Ann GIS"},{"key":"2007_CR24","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/j.patrec.2021.11.010","volume":"155","author":"AC Rouhou","year":"2022","unstructured":"Rouhou AC, Dhiaf M, Kessentini Y, Salem SB (2022) Transformer-based approach for joint handwriting and named entity recognition in historical document. Pattern Recognit Lett 155:128\u2013134","journal-title":"Pattern Recognit Lett"},{"key":"2007_CR25","doi-asserted-by":"publisher","first-page":"9652","DOI":"10.18653\/v1\/2024.findings-acl.575","volume-title":"Findings of the association for computational linguistics: ACL 2024","author":"J Santoso","year":"2024","unstructured":"Santoso J, Sutanto P, Cahyadi B, Setiawan E (2024) Pushing the limits of Low-Resource NER using LLM artificial data generation. In: Ku L-W, Martins A, Srikumar V (eds) Findings of the association for computational linguistics: ACL 2024. Association for Computational Linguistics, Bangkok, Thailand, pp 9652\u20139667"},{"key":"2007_CR26","doi-asserted-by":"crossref","unstructured":"Tian Y, Song Y, Ao X et al (2020a) Joint Chinese word segmentation and part-of-speech tagging via two-way attentions of auto-analyzed knowledge. In: Proceedings of the 58th annual meeting of the association for computational linguistics. pp 8286\u20138296","DOI":"10.18653\/v1\/2020.acl-main.735"},{"key":"2007_CR27","doi-asserted-by":"crossref","unstructured":"Tian Y, Song Y, Xia F et al (2020b) Improving Chinese word segmentation with wordhood memory networks. In: Proceedings of the 58th annual meeting of the association for computational linguistics. pp 8274\u20138285","DOI":"10.18653\/v1\/2020.acl-main.734"},{"key":"2007_CR28","doi-asserted-by":"publisher","DOI":"10.1016\/j.cageo.2024.105571","volume":"187","author":"M Tian","year":"2024","unstructured":"Tian M, Ma K, Wu Q et al (2024) Joint extraction of entity relations from geological reports based on a novel relation graph convolutional network. Comput Geosci 187:105571","journal-title":"Comput Geosci"},{"key":"2007_CR29","unstructured":"Van der Maaten L, Hinton G (2008) Visualizing data using t-SNE. J Mach Learn Res 9:2579\u20132605"},{"key":"2007_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2023.103268","volume":"60","author":"B Wan","year":"2023","unstructured":"Wan B, Dong S, Chu D et al (2023) A deep neural network model for coreference resolution in geological domain. Inf Process Manag 60:103268","journal-title":"Inf Process Manag"},{"key":"2007_CR31","doi-asserted-by":"publisher","first-page":"2076","DOI":"10.3390\/app15031404","volume":"15","author":"B Wan","year":"2025","unstructured":"Wan B, Tan Z, Chu D et al (2025) Semi-supervised Chinese word segmentation in geological domain using pseudo-lexicon and self-training strategy. Appl Sci 15:2076\u20133417. https:\/\/doi.org\/10.3390\/app15031404","journal-title":"Appl Sci"},{"key":"2007_CR32","unstructured":"Wang C, Xu B (2017) Convolutional neural network with word embeddings for Chinese word segmentation. In: Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers). pp 163\u2013172"},{"key":"2007_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.cageo.2022.105229","volume":"168","author":"B Wang","year":"2022","unstructured":"Wang B, Wu L, Xie Z et al (2022) Understanding geological reports based on knowledge graphs using a deep learning approach. Comput Geosci 168:105229","journal-title":"Comput Geosci"},{"key":"2007_CR34","doi-asserted-by":"crossref","unstructured":"Xie T, Li Q, Zhang J et al (2023) Empirical study of zero-shot NER with ChatGPT. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing. pp 7935\u20137956","DOI":"10.18653\/v1\/2023.emnlp-main.493"},{"key":"2007_CR35","unstructured":"Xue N (2003) Chinese word segmentation as character tagging. In: International Journal of Computational Linguistics & Chinese Language Processing, Volume 8, Number 1, February 2003: Special Issue on Word Formation and Chinese Language Processing. pp 29\u201348"},{"key":"2007_CR36","doi-asserted-by":"crossref","unstructured":"Zhang H-P, Yu H-K, Xiong D, Liu Q (2003) HHMM-based Chinese lexical analyzer ICTCLAS. In: Proceedings of the second SIGHAN workshop on Chinese language processing. pp 184\u2013187","DOI":"10.3115\/1119250.1119280"},{"key":"2007_CR37","doi-asserted-by":"publisher","first-page":"1723","DOI":"10.1016\/j.patrec.2004.06.015","volume":"25","author":"L Zheng","year":"2004","unstructured":"Zheng L, Hassin AH, Tang X (2004) A new algorithm for machine printed Arabic character segmentation. Pattern Recognit Lett 25:1723\u20131729","journal-title":"Pattern Recognit Lett"}],"container-title":["Earth Science Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12145-025-02007-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12145-025-02007-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12145-025-02007-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,26]],"date-time":"2025-12-26T06:01:03Z","timestamp":1766728863000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12145-025-02007-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,10]]},"references-count":37,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["2007"],"URL":"https:\/\/doi.org\/10.1007\/s12145-025-02007-4","relation":{},"ISSN":["1865-0473","1865-0481"],"issn-type":[{"value":"1865-0473","type":"print"},{"value":"1865-0481","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,10]]},"assertion":[{"value":"23 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 October 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"525"}}