{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T20:11:23Z","timestamp":1769112683384,"version":"3.49.0"},"reference-count":42,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,6,4]],"date-time":"2022-06-04T00:00:00Z","timestamp":1654300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,4]],"date-time":"2022-06-04T00:00:00Z","timestamp":1654300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100014188","name":"ministry of science and ict","doi-asserted-by":"crossref","award":["NRF-2021R1F1A1053194"],"award-info":[{"award-number":["NRF-2021R1F1A1053194"]}],"id":[{"id":"10.13039\/501100014188","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1007\/s10489-022-03655-5","type":"journal-article","created":{"date-parts":[[2022,6,4]],"date-time":"2022-06-04T00:02:26Z","timestamp":1654300946000},"page":"4047-4062","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Impact of preprocessing and word embedding on extreme multi-label patent classification tasks"],"prefix":"10.1007","volume":"53","author":[{"given":"Guik","family":"Jung","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Junghoon","family":"Shin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4251-7177","authenticated-orcid":false,"given":"Sangjun","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,4]]},"reference":[{"issue":"3","key":"3655_CR1","doi-asserted-by":"publisher","first-page":"893\u2014916","DOI":"10.5465\/amj.2010.0844","volume":"56","author":"Q Li","year":"2013","unstructured":"Li Q, Maggitti P G, Smith K G, Tesluk P E, Katila R (2013) Top management attention to innovation: The role of search selection and intensity in new product introductions. Academy of Management Journal 56(3):893\u2014916","journal-title":"Academy of Management Journal"},{"issue":"5","key":"3655_CR2","doi-asserted-by":"publisher","first-page":"1091","DOI":"10.1016\/j.respol.2016.02.006","volume":"45","author":"S Wagner","year":"2016","unstructured":"Wagner S, Wakeman S (2016) What do patent-based measures tell us about product commericalization? Evidence from the pharmaceutical industry. Res Policy 45(5):1091\u20131102","journal-title":"Res Policy"},{"key":"3655_CR3","unstructured":"WIPO (2019). World Intellectual Property Indicators 2019, Geneva: World Intellectual Property Organization"},{"issue":"5","key":"3655_CR4","doi-asserted-by":"publisher","first-page":"1216","DOI":"10.1016\/j.ipm.2006.11.011","volume":"43","author":"Y-H Tseng","year":"2007","unstructured":"Tseng Y-H, Lin C-J, Lin Y u -I (2007) Text mining techniques for patent analysis. Information Processing&Management 43(5):1216\u20131247","journal-title":"Information Processing&Management"},{"issue":"9","key":"3655_CR5","doi-asserted-by":"publisher","first-page":"4348","DOI":"10.1016\/j.eswa.2015.01.050","volume":"42","author":"H Noh","year":"2015","unstructured":"Noh H, Jo Y, Lee S (2015) Keyword selection and processing strategy for applying text mining to patent analysis. Expert Systems with Application 42(9):4348\u20134360","journal-title":"Expert Systems with Application"},{"key":"3655_CR6","doi-asserted-by":"crossref","unstructured":"Grawe M F, Martins C A, Bonfante A G (2017) Automated Patent Classification Using Word Embedding. In: 2017 16th IEEE International Conference on Machine Learning and Applications (ICMLA), pp 408\u2013411","DOI":"10.1109\/ICMLA.2017.0-127"},{"issue":"6","key":"3655_CR7","first-page":"1930","volume":"2","author":"AG Jivani","year":"2011","unstructured":"Jivani A G (2011) A comparative study of stemming algorithms. Int J Comput Appl Technol 2 (6):1930\u20131938","journal-title":"Int J Comput Appl Technol"},{"key":"3655_CR8","doi-asserted-by":"crossref","unstructured":"Wang B, Wang A, Chen F, Wang Y, Kuo C C J (2019) Evaluating word embedding models: Methods and experimental results","DOI":"10.1017\/ATSIP.2019.12"},{"key":"3655_CR9","unstructured":"WIPO (2020). International Patent Classification, Guide, Geneva : World Intellectual Property Organization"},{"key":"3655_CR10","first-page":"18","volume":"5","author":"H Taherdoost","year":"2016","unstructured":"Taherdoost H (2016) Sampling methods in research methodology; how to choose a sampling technique for research. International Journal of Academic Research in Management 5:18\u201327","journal-title":"International Journal of Academic Research in Management"},{"key":"3655_CR11","doi-asserted-by":"crossref","unstructured":"Sechidis K, Tsoumakas G, Vlahavas I (2011) On the stratification of multi-label data. In: In Joint European Conference on Machine Learning and Knowledge Discovery in Databases (pp. 145-158). Springer, Berlin, Heidelberg","DOI":"10.1007\/978-3-642-23808-6_10"},{"issue":"1","key":"3655_CR12","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/1471-2105-8-326","volume":"8","author":"BJ Parker","year":"2007","unstructured":"Parker B J , G\u00fcnter S , Bedo J (2007) Stratification bias in low signal microarray studies. BMC Bioinforma 8(1):1\u201316","journal-title":"BMC Bioinforma"},{"key":"3655_CR13","doi-asserted-by":"crossref","unstructured":"Sechidis K, Tsoumakas G (2011) Vlahavas, Ioannis, Hamed \u201cOn the Stratification of Multi-label Data,\u201d","DOI":"10.1007\/978-3-642-23808-6_10"},{"issue":"1","key":"3655_CR14","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.ipm.2013.08.006","volume":"50","author":"AK Uysal","year":"2014","unstructured":"Uysal A K, Gunal S (2014) The impact of preprocessing on text classification. Information Processing and Management, \u201d 50(1):104\u2013112","journal-title":"Information Processing and Management, \u201d"},{"key":"3655_CR15","doi-asserted-by":"publisher","first-page":"273","DOI":"10.1016\/j.procs.2017.08.368","volume":"113","author":"W Etaiwi","year":"2017","unstructured":"Etaiwi W, Naymat G (2017) The Impact of applying Different Preprocessing Steps on Review Spam Detection. Procedia Computer Science 113:273\u2013279","journal-title":"Procedia Computer Science"},{"key":"3655_CR16","doi-asserted-by":"crossref","unstructured":"Dharavath K, Amarnath G, Talukdar F A, Laskar R H (2014) Impact of image preprocessing on face recognition: A comparative analysis. In: 2014 International Conference on Communication and Signal Processing, pp 631\u2013635","DOI":"10.1109\/ICCSP.2014.6949918"},{"issue":"11","key":"3655_CR17","doi-asserted-by":"publisher","first-page":"613","DOI":"10.1145\/361219.361220","volume":"18","author":"G Salton","year":"1975","unstructured":"Salton G, Wong A, Yang C S (1975) A vector space model for automatic indexing. Commun ACM 18(11):613\u2013620","journal-title":"Commun ACM"},{"key":"3655_CR18","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning, Christopher D (2014) Glove Global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"3655_CR19","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. arXiv:1301.3781"},{"key":"3655_CR20","unstructured":"Zhou C, Sun C, Liu Z, lau F (2015) A C-LSTM neural network for text classification. arXiv:1511.08630"},{"key":"3655_CR21","doi-asserted-by":"publisher","first-page":"7370","DOI":"10.1609\/aaai.v33i01.33017370","volume":"33","author":"L Yao","year":"2019","unstructured":"Yao L, Mao C (2019) Luo, Yuan \u201cGraph convolutional networks for text classification,\u201d. Proceedings of the AAAI Conference on Artificial Intelligence 33:7370\u20137377","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"3655_CR22","doi-asserted-by":"crossref","unstructured":"Yu S, Su J, Luo D (2019). Improving BERT-based text classification with auxiliary sentence and domain knowledge, IEEE Access, 7, pp 176600\u2013176612","DOI":"10.1109\/ACCESS.2019.2953990"},{"key":"3655_CR23","doi-asserted-by":"publisher","first-page":"127913","DOI":"10.1109\/ACCESS.2020.3009217","volume":"8","author":"HA Almuzaini","year":"2020","unstructured":"Almuzaini H A, Azmi A M (2020) Impact of stemming and word embedding on deep learning-based Arabic text categorization. IEEE Access 8:127913\u2013127928","journal-title":"IEEE Access"},{"issue":"5","key":"3655_CR24","doi-asserted-by":"publisher","first-page":"298","DOI":"10.1504\/IJCAT.2019.101171","volume":"60","author":"M Bounabi","year":"2019","unstructured":"Bounabi M, Moutaouakil K E, Satori K (2019) A comparison of text classification methods using different stemming techniques. International Journal of Computer Application in Technology 60(5):298\u2013306","journal-title":"International Journal of Computer Application in Technology"},{"key":"3655_CR25","doi-asserted-by":"publisher","first-page":"131522","DOI":"10.1109\/ACCESS.2020.3009058","volume":"8","author":"S Amin","year":"2020","unstructured":"Amin S, Uddin M I, Hassan S, Khan A, Nasser N, Alharbi A, Alyami H (2020) Recurrent neural networks with TF-IDF embedding technique for detection and classification in tweets of dengue disease. IEEE Access 8:131522\u2013131533","journal-title":"IEEE Access"},{"issue":"6334","key":"3655_CR26","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1126\/science.aal4230","volume":"356","author":"A Caliskan","year":"2017","unstructured":"Caliskan A, Bryson J J, Narayanan A (2017) Semantics derived automatically from language corpora contain human-like biases. Science 356(6334):183\u2013186","journal-title":"Science"},{"key":"3655_CR27","first-page":"175","volume":"47","author":"C Cerisara","year":"2018","unstructured":"Cerisara C, Kral P (2018) Lenc, Ladislav \u201cOn the effects of using word2vec representations in neural networks for dialogue act recognition,\u201d. Computer Speech&Language 47:175\u2013 193","journal-title":"Computer Speech&Language"},{"key":"3655_CR28","doi-asserted-by":"crossref","unstructured":"Benzineb K, Guyot J (2011) Automated patent classification. Current challenges in patent information retrieval","DOI":"10.1007\/978-3-642-19231-9_12"},{"key":"3655_CR29","unstructured":"You R, Zhang Z, Wang Z, Dai S, Mamitsuka H, Zhu S (2019) attentionXM: Label tree-based attention-aware deep model for high-performance extreme multi-label text classification"},{"key":"3655_CR30","unstructured":"Huang X, Chen B, Xiao L, Jing L (1905) Label-aware document representation via hybrid attention for extreme multi-label text classification"},{"issue":"2-3","key":"3655_CR31","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1007\/s10791-017-9319-5","volume":"21","author":"X Yang","year":"2018","unstructured":"Yang X, Macdonald C, Ounis I (2018) Using word embeddings in twitter election classification. Information Retrieval Journal 21(2-3):183\u2013207","journal-title":"Information Retrieval Journal"},{"key":"3655_CR32","doi-asserted-by":"publisher","first-page":"123288","DOI":"10.1016\/j.physa.2019.123288","volume":"541","author":"M Aydo\u0121an","year":"2020","unstructured":"Aydo\u0121an M, Karci Ali (2020) Improving the accuracy using pre-trained word embeddings on deep neural networks for Turkish text classification. Physica A:, Statistical Mechanics and its Applications 541:123288","journal-title":"Physica A:, Statistical Mechanics and its Applications"},{"issue":"1","key":"3655_CR33","doi-asserted-by":"publisher","first-page":"219","DOI":"10.3390\/su10010219","volume":"10","author":"J Hu","year":"2018","unstructured":"Hu J, Li S, Hu J, Yang G (2018) A hierarchical feature extraction model for multi-label mechanical patent classification. Sustainability 10(1):219","journal-title":"Sustainability"},{"issue":"2","key":"3655_CR34","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1007\/s11192-018-2905-5","volume":"117","author":"S Li","year":"2018","unstructured":"Li S, Hu J, Cui Y, Hu J (2018) deeppatent: patent classification with convolutional neural networks and word embedding. Scientometrics 117(2):721\u2013744","journal-title":"Scientometrics"},{"key":"3655_CR35","unstructured":"Lee J-S, Hsiang J (1906) PatentBERT:, Patent classification with fine-tuning a pre-trained bert model"},{"key":"3655_CR36","doi-asserted-by":"publisher","first-page":"429","DOI":"10.1016\/j.ins.2019.11.004","volume":"513","author":"F Thabtah","year":"2020","unstructured":"Thabtah F, Hammoud S, Kamalov F, Gonsalves A (2020) Data imbalance in classification: Experimental evaluation. Inf Sci 513:429\u2013441","journal-title":"Inf Sci"},{"key":"3655_CR37","first-page":"1661","volume":"3","author":"C Silva","year":"2003","unstructured":"Silva C, Ribeiro B (2003) The importance of stop word removal on recall values in text categorization. Proceedings of the International Joint Conference on Neural Networks 3:1661\u20131666","journal-title":"Proceedings of the International Joint Conference on Neural Networks"},{"key":"3655_CR38","unstructured":"Hooper R, Paice C The Lancaster Stemming Algorithm. Available at: http:\/\/www.comp.lancs.ac.uk\/computing\/research\/stemming\/"},{"key":"3655_CR39","unstructured":"Porter MF Snowball: a language for stemming algorithms, http:\/\/www.snowball.tartarus.org\/texts\/introduction.html."},{"key":"3655_CR40","doi-asserted-by":"crossref","unstructured":"Hagberg AA, Schult DA, Swart PJ (2008) Exploring network structure, dynamics, and function using NetworkX. In: proceedings of the 7th Python in Science Conference (SciPy2008), G\u00e4,el Varoquaux, Travis Vaught, and Jarrod Millman (Eds), (Pasadena, CA USA), pp 11\u201315","DOI":"10.25080\/TCWV9851"},{"key":"3655_CR41","unstructured":"Wu H, Qin S, Nie R, Cao J, Gorbachev S Effective collaborative representation learning for multilabel text categorization, IEEE Transactions on Neural Networks and Learning Systems"},{"key":"3655_CR42","doi-asserted-by":"crossref","unstructured":"Zhang W, Yan J, Wang X, Zha H (2018) Deep extreme multi-label learning. In: Inproceedings of the 2018 ACM on International Conference on Multimedia Retrieval (ICMR \u201918), Association for Computing Machinery, New York, NY, USA, 100\u2013107","DOI":"10.1145\/3206025.3206030"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03655-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-03655-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-03655-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,26]],"date-time":"2024-09-26T12:33:15Z","timestamp":1727353995000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-03655-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,4]]},"references-count":42,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,2]]}},"alternative-id":["3655"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-03655-5","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,4]]},"assertion":[{"value":"19 April 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 June 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}