{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,12]],"date-time":"2025-07-12T01:05:28Z","timestamp":1752282328590,"version":"3.37.3"},"reference-count":48,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:00:00Z","timestamp":1731369600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T00:00:00Z","timestamp":1731369600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003708","name":"Korea Institute of Science and Technology Information","doi-asserted-by":"crossref","award":["K-22-L03-C03-S01","K-22-L03-C03-S01","K-22-L03-C03-S01","K-22-L03-C03-S01","K-22-L03-C03-S01"],"award-info":[{"award-number":["K-22-L03-C03-S01","K-22-L03-C03-S01","K-22-L03-C03-S01","K-22-L03-C03-S01","K-22-L03-C03-S01"]}],"id":[{"id":"10.13039\/501100003708","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["2021R1F1A1060117","2021R1F1A1060117"],"award-info":[{"award-number":["2021R1F1A1060117","2021R1F1A1060117"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003569","name":"Ministry of Food and Drug Safety","doi-asserted-by":"crossref","award":["22183MFDS431"],"award-info":[{"award-number":["22183MFDS431"]}],"id":[{"id":"10.13039\/501100003569","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100002573","name":"Yonsei University","doi-asserted-by":"crossref","award":["2023-22-0123"],"award-info":[{"award-number":["2023-22-0123"]}],"id":[{"id":"10.13039\/501100002573","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s11227-024-06597-6","type":"journal-article","created":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T11:03:27Z","timestamp":1731409407000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["TC-BERT: large-scale language model for Korean technology commercialization documents"],"prefix":"10.1007","volume":"81","author":[{"given":"Taero","family":"Kim","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Changdae","family":"Oh","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyeji","family":"Hwang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eunkyeong","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yewon","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yunjeong","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sungjin","family":"Kim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hosik","family":"Choi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kyungwoo","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,12]]},"reference":[{"key":"6597_CR1","unstructured":"Kenton JDM-WC, Toutanova LK (2019) Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, pp 4171\u20134186"},{"key":"6597_CR2","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A et al (2020) Language models are few-shot learners. Adv Neural Inf Process Syst 33:1877\u20131901","journal-title":"Adv Neural Inf Process Syst"},{"key":"6597_CR3","unstructured":"Chowdhery A, Narang S, Devlin J, Bosma M, Mishra G, Roberts A, Barham P, Chung HW, Sutton C, Gehrmann S, et\u00a0al (2022) Palm: scaling language modeling with pathways, arXiv preprint arXiv:2204.02311"},{"key":"6597_CR4","unstructured":"Hoffmann J, Borgeaud S, Mensch A, Buchatskaya E, Cai T, Rutherford E, Casas DdL, Hendricks LA, Welbl J, Clark A, et\u00a0al (2022) Training compute-optimal large language models, arXiv preprint arXiv:2203.15556"},{"key":"6597_CR5","doi-asserted-by":"crossref","unstructured":"Goel V, Sahnan D, Venktesh V, Sharma G, Dwivedi D, Mohania M (2022) K-12bert: bert for k-12 education. In: Artificial Intelligence in Education. Posters and Late Breaking Results, Workshops and Tutorials, Industry and Innovation Tracks, Practitioners\u2019 and Doctoral Consortium: 23rd International Conference, AIED 2022, Durham, UK, July 27\u201331, 2022, Proceedings, Part II, Springer, pp 595\u2013598","DOI":"10.1007\/978-3-031-11647-6_123"},{"issue":"2","key":"6597_CR6","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103194","volume":"60","author":"M Suzuki","year":"2023","unstructured":"Suzuki M, Sakaji H, Hirano M, Izumi K (2023) Constructing and analyzing domain-specific language model for financial text mining. Inf Process Manag 60(2):103194","journal-title":"Inf Process Manag"},{"issue":"1","key":"6597_CR7","first-page":"1","volume":"3","author":"Y Gu","year":"2021","unstructured":"Gu Y, Tinn R, Cheng H, Lucas M, Usuyama N, Liu X, Naumann T, Gao J, Poon H (2021) Domain-specific language model pretraining for biomedical natural language processing. ACM Trans Comput Healthcare (HEALTH) 3(1):1\u201323","journal-title":"ACM Trans Comput Healthcare (HEALTH)"},{"issue":"4","key":"6597_CR8","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee J, Yoon W, Kim S, Kim D, Kim S, So CH, Kang J (2020) Biobert: a pre-trained biomedical language representation model for biomedical text mining. Bioinformatics 36(4):1234\u20131240","journal-title":"Bioinformatics"},{"issue":"11","key":"6597_CR9","doi-asserted-by":"publisher","first-page":"12766","DOI":"10.1007\/s11227-023-05160-z","volume":"79","author":"E Al-Bashabsheh","year":"2023","unstructured":"Al-Bashabsheh E, Alaiad A, Al-Ayyoub M, Beni-Yonis O, Zitar RA, Abualigah L (2023) Improving clinical documentation: automatic inference of icd-10 codes from patient notes using bert model. J Supercomput 79(11):12766\u201312790","journal-title":"J Supercomput"},{"key":"6597_CR10","unstructured":"Shen JT, Yamashita M, Prihar E, Heffernan N, Wu X, Graff B, Lee D (2021) Mathbert: a pre-trained language model for general nlp tasks in mathematics education. In: NeurIPS 2021 Math AI for Education Workshop"},{"issue":"11","key":"6597_CR11","doi-asserted-by":"publisher","first-page":"11871","DOI":"10.1007\/s11227-023-05099-1","volume":"79","author":"A Siagh","year":"2023","unstructured":"Siagh A, Laallam FZ, Kazar O, Salem H (2023) An improved sentiment classification model based on data quality and word embeddings. J Supercomput 79(11):11871\u201311894","journal-title":"J Supercomput"},{"key":"6597_CR12","unstructured":"Wan C-X, Li B (2022) Financial causal sentence recognition based on bert-cnn text classification. J Supercomput 1\u201325"},{"key":"6597_CR13","doi-asserted-by":"publisher","DOI":"10.1016\/j.wpi.2020.101965","volume":"61","author":"J-S Lee","year":"2020","unstructured":"Lee J-S, Hsiang J (2020) Patent classification by fine-tuning bert language model. World Patent Inf 61:101965","journal-title":"World Patent Inf"},{"key":"6597_CR14","doi-asserted-by":"crossref","unstructured":"Freunek M, Bodmer A (2021) Bert based patent novelty search by training claims to their own description, arXiv preprint arXiv:2103.01126","DOI":"10.11114\/aef.v8i5.5182"},{"issue":"16","key":"6597_CR15","doi-asserted-by":"publisher","first-page":"7994","DOI":"10.3390\/app12167994","volume":"12","author":"Y Kim","year":"2022","unstructured":"Kim Y, Park S, Kang J (2022) Technology commercialization activation model using imagification of variables. Appl Sci 12(16):7994","journal-title":"Appl Sci"},{"key":"6597_CR16","doi-asserted-by":"publisher","DOI":"10.2478\/amns.2023.2.00100","author":"X Ji","year":"2024","unstructured":"Ji X, Qin J, Wu J, Zhang Y (2024) The mechanism of innovation-driven emerging technology generation based on big data fusion in the perspective of technological self-reliance and self-improvement. Appl Math Nonlinear Sci. https:\/\/doi.org\/10.2478\/amns.2023.2.00100","journal-title":"Appl Math Nonlinear Sci"},{"key":"6597_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.est.2024.112894","volume":"98","author":"X Zhao","year":"2024","unstructured":"Zhao X, Wu W, Wu D (2024) Technological trajectory analysis in lithium battery manufacturing: Based on patent claims perspective. J Energy Storage 98:112894","journal-title":"J Energy Storage"},{"key":"6597_CR18","doi-asserted-by":"crossref","unstructured":"Wortsman M, Ilharco G, Kim JW, Li M, Kornblith S, Roelofs R, Lopes RG, Hajishirzi H, Farhadi A, Namkoong H, et\u00a0al (2022) Robust fine-tuning of zero-shot models, in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7959\u20137971","DOI":"10.1109\/CVPR52688.2022.00780"},{"key":"6597_CR19","unstructured":"Kumar A, Raghunathan A, Jones R, Ma T, Liang P (2022) Fine-tuning can distort pretrained features and underperform out-of-distribution, arXiv preprint arXiv:2202.10054"},{"key":"6597_CR20","unstructured":"Kaplan J, McCandlish S, Henighan T, Brown TB, Chess B, Child R, Gray S, Radford A, Wu J, Amodei D (2020) Scaling laws for neural language models, arXiv preprint arXiv:2001.08361"},{"key":"6597_CR21","unstructured":"Shoeybi M, Patwary M, Puri R, LeGresley P, Casper J, Catanzaro B (2019) Megatron-lm: Training multi-billion parameter language models using model parallelism, arXiv preprint arXiv:1909.08053"},{"key":"6597_CR22","unstructured":"Scao TL, Fan A., Akiki C, Pavlick E, Ili\u0107 S, Hesslow D, Castagn\u00e9 R, Luccioni AS, Yvon F, Gall\u00e9 M, et\u00a0al (2022) Bloom: a 176b-parameter open-access multilingual language model, arXiv preprint arXiv:2211.05100"},{"issue":"5","key":"6597_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103029","volume":"59","author":"G Jiang","year":"2022","unstructured":"Jiang G, Liu S, Zhao Y, Sun Y, Zhang M (2022) Fake news detection via knowledgeable prompt learning. Inf Process Manag 59(5):103029. https:\/\/doi.org\/10.1016\/j.ipm.2022.103029","journal-title":"Inf Process Manag"},{"issue":"6","key":"6597_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103107","volume":"59","author":"E G\u00fcndogan","year":"2022","unstructured":"G\u00fcndogan E, Kaya M (2022) Deep learning based conference program organization system from determining articles in session to scheduling. Inf Process Manag 59(6):103107","journal-title":"Inf Process Manag"},{"issue":"1","key":"6597_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1038\/s41746-021-00455-y","volume":"4","author":"L Rasmy","year":"2021","unstructured":"Rasmy L, Xiang Y, Xie Z, Tao C, Zhi D (2021) Med-bert: pretrained contextualized embeddings on large-scale structured electronic health records for disease prediction. NPJ Digital Med 4(1):1\u201313","journal-title":"NPJ Digital Med"},{"key":"6597_CR26","doi-asserted-by":"crossref","unstructured":"Beltagy I, Lo K, Cohan A (2019) Scibert: a pretrained language model for scientific text, in: Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pp 3615\u20133620","DOI":"10.18653\/v1\/D19-1371"},{"key":"6597_CR27","unstructured":"Latif E, Lee G-G, Neuman K, Kastorff T, Zhai X (2024) G-sciedbert: a contextualized llm for science assessment tasks in german, arXiv preprint arXiv:2402.06584"},{"issue":"4","key":"6597_CR28","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1147\/rd.14.0309","volume":"1","author":"HP Luhn","year":"1957","unstructured":"Luhn HP (1957) A statistical approach to mechanized encoding and searching of literary information. IBM J Res Dev 1(4):309\u2013317. https:\/\/doi.org\/10.1147\/rd.14.0309","journal-title":"IBM J Res Dev"},{"key":"6597_CR29","unstructured":"Jones KS (1972) A statistical interpretation of term specificity and its application in retrieval. J Doc"},{"key":"6597_CR30","doi-asserted-by":"crossref","unstructured":"Witten IH, Paynter GW, Frank E, Gutwin C, Nevill-Manning CG (1999) Kea: practical automatic keyphrase extraction. In: Proceedings of the fourth ACM conference on Digital libraries, pp 254\u2013255","DOI":"10.1145\/313238.313437"},{"issue":"4","key":"6597_CR31","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1023\/A:1009976227802","volume":"2","author":"PD Turney","year":"2000","unstructured":"Turney PD (2000) Learning algorithms for keyphrase extraction. Inf Retr 2(4):303\u2013336","journal-title":"Inf Retr"},{"key":"6597_CR32","doi-asserted-by":"crossref","unstructured":"Hulth A (2003) Improved automatic keyword extraction given more linguistic knowledge. In: Proceedings of the 2003 conference on Empirical methods in natural language processing, pp 216\u2013223","DOI":"10.3115\/1119355.1119383"},{"key":"6597_CR33","doi-asserted-by":"crossref","unstructured":"Zhang K, Xu H, Tang J, Li J (2006) Keyword extraction using support vector machine. In: international conference on web-age information management, Springer, pp 85\u201396","DOI":"10.1007\/11775300_8"},{"key":"6597_CR34","doi-asserted-by":"crossref","unstructured":"Sahrawat D, Mahata D, Zhang H, Kulkarni M, Sharma A, Gosangi R, Stent A, Kumar Y, Shah RR, Zimmermann R (2020) Keyphrase extraction as sequence labeling using contextualized embeddings. In: European Conference on Information Retrieval, Springer, pp 328\u2013335","DOI":"10.1007\/978-3-030-45442-5_41"},{"issue":"4","key":"6597_CR35","doi-asserted-by":"publisher","first-page":"409","DOI":"10.1017\/S1351324921000127","volume":"28","author":"M Martinc","year":"2022","unstructured":"Martinc M, \u0160krlj B, Pollak S (2022) Tnt-kid: transformer-based neural tagger for keyword identification. Nat Lang Eng 28(4):409\u2013448","journal-title":"Nat Lang Eng"},{"key":"6597_CR36","unstructured":"Koloski B, IPS, Pollak S, \u0160krlj B, Martinc M (2021) Extending neural keyword extraction with tf-idf tagset matching, EACL Hackashop on News Media Content Analysis and Automated Report Generation 22"},{"key":"6597_CR37","unstructured":"Cui Z, Ke R, Pu Z, Wang Y (2018) Deep bidirectional and unidirectional lstm recurrent neural network for network-wide traffic speed prediction, arXiv preprint arXiv:1801.02143"},{"key":"6597_CR38","doi-asserted-by":"crossref","unstructured":"Peters ME, Neumann M, Iyyer M, Gardner M, Clark C, Lee K, Zettlemoyer L (2018) Deep contextualized word representations. In: Proceedings of NAACL-HLT, pp 2227\u20132237","DOI":"10.18653\/v1\/N18-1202"},{"key":"6597_CR39","unstructured":"Wu Y, Schuster M, Chen Z, Le QV, Norouzi M, Macherey W, Krikun M, Cao Y, Gao Q, Macherey K, Klingner J, Shah A, Johnson M, Liu X, Kaiser L, Gouws S, Kato Y, Kudo T, Kazawa H, Stevens K, Kurian G, Patil N, Wang W, Young C, Smith J, Riesa J, Rudnick A, Vinyals O, Corrado G, Hughes M, Dean J (2016) Google\u2019s neural machine translation system: Bridging the gap between human and machine translation. CoRR abs\/1609.08144. arXiv:1609.08144"},{"key":"6597_CR40","unstructured":"Hendrycks D, Gimpel K (2016) Gaussian error linear units (gelus), arXiv preprint arXiv:1606.08415"},{"key":"6597_CR41","doi-asserted-by":"crossref","unstructured":"Ghaemmaghami A, Schiffauerova A, Ebadi A (2022) Which keyword extraction method performs better for emerging technology detection?. In: (2022) International Symposium on Multidisciplinary Studies and Innovative Technologies (ISMSIT). IEEE :613\u2013618","DOI":"10.1109\/ISMSIT56059.2022.9932656"},{"key":"6597_CR42","doi-asserted-by":"crossref","unstructured":"Gupta A, Chadha A, Tewari V (2024) A natural language processing model on bert and yake technique for keyword extraction on sustainability reports, IEEE Access","DOI":"10.1109\/ACCESS.2024.3352742"},{"key":"6597_CR43","doi-asserted-by":"publisher","DOI":"10.1016\/j.techfore.2022.122130","volume":"186","author":"E Jeon","year":"2023","unstructured":"Jeon E, Yoon N, Sohn SY (2023) Exploring new digital therapeutics technologies for psychiatric disorders using bertopic and patentsberta. Technol Forecast Soc Chang 186:122130","journal-title":"Technol Forecast Soc Chang"},{"issue":"5","key":"6597_CR44","doi-asserted-by":"publisher","first-page":"91","DOI":"10.3390\/asi7050091","volume":"7","author":"A Ali","year":"2024","unstructured":"Ali A, Tufail A, De Silva LC, Abas PE (2024) Innovating patent retrieval: a comprehensive review of techniques, trends, and challenges in prior art searches. Appl Syst Innov 7(5):91","journal-title":"Appl Syst Innov"},{"key":"6597_CR45","doi-asserted-by":"crossref","unstructured":"Reswara CG, Nicolas J, Widyatama I, David D, Arisaputra P (2024) Book recommendation system using tf-idf and cosine similarity. In: AIP Conference Proceedings, Vol. 3135, AIP Publishing","DOI":"10.1063\/5.0212477"},{"key":"6597_CR46","doi-asserted-by":"publisher","DOI":"10.1016\/j.sasc.2024.200136","volume":"6","author":"Y Luo","year":"2024","unstructured":"Luo Y, Lu C (2024) Tf-idf combined rank factor naive bayesian algorithm for intelligent language classification recommendation systems. Syst Soft Comput 6:200136","journal-title":"Syst Soft Comput"},{"key":"6597_CR47","doi-asserted-by":"crossref","unstructured":"Chen L-C (2024) An extended tf-idf method for improving keyword extraction in traditional corpus-based research: an example of a climate change corpus. Data Knowl Eng 102322","DOI":"10.1016\/j.datak.2024.102322"},{"issue":"8","key":"6597_CR48","doi-asserted-by":"publisher","first-page":"9073","DOI":"10.1007\/s11227-022-05022-0","volume":"79","author":"S Liao","year":"2023","unstructured":"Liao S, Yang Z, Liao Q, Zheng Z (2023) Topiclprank: a keyphrase extraction method based on improved topicrank. J Supercomput 79(8):9073\u20139092","journal-title":"J Supercomput"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06597-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-024-06597-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-024-06597-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,12]],"date-time":"2024-11-12T12:09:08Z","timestamp":1731413348000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-024-06597-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,12]]},"references-count":48,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["6597"],"URL":"https:\/\/doi.org\/10.1007\/s11227-024-06597-6","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2024,11,12]]},"assertion":[{"value":"7 October 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 November 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"This paper has no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"163"}}