{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,28]],"date-time":"2026-03-28T06:53:35Z","timestamp":1774680815091,"version":"3.50.1"},"reference-count":91,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2021,7,7]],"date-time":"2021-07-07T00:00:00Z","timestamp":1625616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,7,7]],"date-time":"2021-07-07T00:00:00Z","timestamp":1625616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Fundamental Research Funds for the Central Universities, SCUT","award":["D2200150, D2201300"],"award-info":[{"award-number":["D2200150, D2201300"]}]},{"name":"Science and Technology Programs of Guangzhou","award":["201704030076, 201802010027, 201902010046"],"award-info":[{"award-number":["201704030076, 201802010027, 201902010046"]}]},{"DOI":"10.13039\/501100001809","name":"onal Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076100"],"award-info":[{"award-number":["62076100"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Key Research and Development Program of China"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Knowl Inf Syst"],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1007\/s10115-021-01581-5","type":"journal-article","created":{"date-parts":[[2021,7,7]],"date-time":"2021-07-07T11:02:52Z","timestamp":1625655772000},"page":"2313-2346","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["On entropy-based term weighting schemes for text categorization"],"prefix":"10.1007","volume":"63","author":[{"given":"Tao","family":"Wang","sequence":"first","affiliation":[]},{"given":"Yi","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Ho-fung","family":"Leung","sequence":"additional","affiliation":[]},{"given":"Raymond Y. K.","family":"Lau","sequence":"additional","affiliation":[]},{"given":"Haoran","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Qing","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,7]]},"reference":[{"key":"1581_CR1","doi-asserted-by":"crossref","unstructured":"Alshawabkeh M, Aslam JA, Dy JG, Kaeli D (2012) Feature weighting and selection using hypothesis margin of boosting. In: 2012 IEEE 12th international conference on data mining. IEEE","DOI":"10.1109\/ICDM.2012.143"},{"issue":"3","key":"1581_CR2","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1145\/183422.183423","volume":"12","author":"C Apt\u00e9","year":"1994","unstructured":"Apt\u00e9 C, Damerau F, Weiss SM (1994) Automated learning of decision rules for text categorization. ACM Trans Inf Syst (TOIS) 12(3):233\u2013251","journal-title":"ACM Trans Inf Syst (TOIS)"},{"key":"1581_CR3","unstructured":"Arora S, Liang Y, Ma T (2019) A simple but tough-to-beat baseline for sentence embeddings. In: 5th international conference on learning representations, ICLR 2017"},{"key":"1581_CR4","doi-asserted-by":"crossref","unstructured":"Batal I, Hauskrecht M (2009) Boosting knn text classification accuracy by using supervised term weighting schemes. In: CIKM","DOI":"10.1145\/1645953.1646296"},{"key":"1581_CR5","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski P, Grave E, Joulin A, Mikolov T (2017) Enriching word vectors with subword information. Trans Assoc Comput Linguist 5:135\u2013146","journal-title":"Trans Assoc Comput Linguist"},{"key":"1581_CR6","unstructured":"Bolukbasi T, Chang K-W, Zou J, Saligrama V, Kalai A (2016) Man is to computer programmer as woman is to homemaker? debiasing word embeddings. arXiv preprint arXiv:1607.06520"},{"key":"1581_CR7","doi-asserted-by":"crossref","unstructured":"Buckley C, Salton G, Allan J, Singhal A (1995) Automatic query expansion using smart: Trec 3. NIST special publication sp","DOI":"10.6028\/NIST.SP.500-225.routing-cornell"},{"key":"1581_CR8","unstructured":"Chang Y, Li Y, Ding A, Dy J (2016) A robust-equitable copula dependence measure for feature selection. In: Proceedings of the 19th international conference on artificial intelligence and statistics, pp 84\u201392"},{"key":"1581_CR9","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1016\/j.eswa.2016.09.009","volume":"66","author":"K Chen","year":"2016","unstructured":"Chen K, Zhang Z, Long J, Zhang H (2016) Turning from tf-idf to tf-igm for term weighting in text classification. Expert Syst Appl 66:245\u2013260","journal-title":"Expert Syst Appl"},{"key":"1581_CR10","unstructured":"Chen M (2017) Efficient vector representation for documents through corruption. arXiv preprint arXiv:1707.02377"},{"key":"1581_CR11","doi-asserted-by":"crossref","unstructured":"Chen SF, Goodman J (1996) An empirical study of smoothing techniques for language modeling. In: Proceedings of the 34th annual meeting on Association for Computational Linguistics. Association for Computational Linguistics, pp 310\u2013318","DOI":"10.3115\/981863.981904"},{"key":"1581_CR12","doi-asserted-by":"crossref","unstructured":"Chen W, Yuan X, Zhang S, Wu J, Zhang Y, Wang Y (2020) Ferryman at semeval-2020 task 3: bert with tfidf-weighting for predicting the effect of context in word similarity. In: Proceedings of the fourteenth workshop on semantic evaluation, pp 281\u2013285","DOI":"10.18653\/v1\/2020.semeval-1.35"},{"issue":"3","key":"1581_CR13","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik V (1995) Support-vector networks. Mach Learn 20(3):273\u2013297","journal-title":"Mach Learn"},{"key":"1581_CR14","volume-title":"Elements of information theory","author":"M Cover Thomas","year":"2012","unstructured":"Cover Thomas M, Thomas Joy A (2012) Elements of information theory. Wiley, Hoboken"},{"key":"1581_CR15","first-page":"265","volume":"2","author":"K Crammer","year":"2002","unstructured":"Crammer K, Singer Y (2002) On the algorithmic implementation of multiclass kernel-based vector machines. J Mach Learn Res 2:265\u2013292","journal-title":"J Mach Learn Res"},{"key":"1581_CR16","doi-asserted-by":"crossref","unstructured":"Debole F, Sebastiani F (2004) Supervised term weighting for automated text categorization. In: Text mining and its applications","DOI":"10.1007\/978-3-540-45219-5_7"},{"key":"1581_CR17","doi-asserted-by":"crossref","unstructured":"Deng Z-H, Tang S-W, Yang D-Q et al (2004) A comparative study on feature weight in text categorization. In: Advanced web technologies and applications. Springer, pp 588\u2013597","DOI":"10.1007\/978-3-540-24655-8_64"},{"key":"1581_CR18","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"1581_CR19","unstructured":"Dhillon IS, Modha DS (2001) Concept decompositions for large sparse text data using clustering. Mach Learn"},{"key":"1581_CR20","doi-asserted-by":"crossref","unstructured":"Dietterich TG (1998) Approximate statistical tests for comparing supervised classification learning algorithms. Neural Comput","DOI":"10.1162\/089976698300017197"},{"key":"1581_CR21","doi-asserted-by":"crossref","unstructured":"Dumais S, Platt J, Heckerman D, Sahami M (1998) Inductive learning algorithms and representations for text categorization. In: Proceedings of the seventh international conference on information and knowledge management. ACM, pp 148\u2013155","DOI":"10.1145\/288627.288651"},{"key":"1581_CR22","doi-asserted-by":"crossref","unstructured":"Dumais ST (1991) Improving the retrieval of information from external sources. Behav Res Methods Instrum Comput","DOI":"10.3758\/BF03203370"},{"issue":"1","key":"1581_CR23","first-page":"61","volume":"19","author":"T Dunning","year":"1993","unstructured":"Dunning T (1993) Accurate methods for the statistics of surprise and coincidence. Comput Linguist 19(1):61\u201374","journal-title":"Comput Linguist"},{"key":"1581_CR24","doi-asserted-by":"crossref","unstructured":"Efstathiou V, Chatzilenas C, Spinellis D (2018) Word embeddings for the software engineering domain. In: Proceedings of the 15th international conference on mining software repositories, pp 38\u201341","DOI":"10.1145\/3196398.3196448"},{"key":"1581_CR25","first-page":"1871","volume":"9","author":"R-E Fan","year":"2008","unstructured":"Fan R-E, Chang K-W, Hsieh C-J (2008) Liblinear: a library for large linear classification. J Mach Learn Res 9:1871\u20131874","journal-title":"J Mach Learn Res"},{"issue":"1","key":"1581_CR26","first-page":"525","volume":"9","author":"MA Fauzi","year":"2019","unstructured":"Fauzi MA (2019) Word2vec model for sentiment analysis of product reviews in indonesian language. Int J Electr Comput Eng 9(1):525","journal-title":"Int J Electr Comput Eng"},{"key":"1581_CR27","doi-asserted-by":"crossref","unstructured":"Ferrero J, Agnes F, Besacier L, Schwab D (2017) Using word embedding for cross-language plagiarism detection. EACL 2017, pp 415","DOI":"10.18653\/v1\/W17-2502"},{"key":"1581_CR28","unstructured":"Fisher MJ, Fieldsend JE, Everson RM (2004) Precision and recall optimisation for information access tasks"},{"key":"1581_CR29","doi-asserted-by":"crossref","unstructured":"Gonen H, Goldberg Y (2019) Lipstick on a pig: debiasing methods cover up systematic gender biases in word embeddings but do not remove them. arXiv preprint arXiv:1903.03862","DOI":"10.18653\/v1\/N19-1061"},{"key":"1581_CR30","unstructured":"Guyon I, Elisseeff A (2003) An introduction to variable and feature selection. J Mach Learn Res"},{"key":"1581_CR31","doi-asserted-by":"crossref","unstructured":"Haddoud M, Mokhtari A, Lecroq T, Abdedda\u00efm S (2016) Combining supervised term-weighting metrics for svm text classification with extended term representation. Knowl Inf Syst, pp 1\u201323","DOI":"10.1007\/s10115-016-0924-1"},{"key":"1581_CR32","doi-asserted-by":"crossref","unstructured":"Han E-H et al (2001) Text categorization using weight adjusted k-nearest neighbor classification","DOI":"10.1007\/3-540-45357-1_9"},{"key":"1581_CR33","unstructured":"Hsu CW, Lin CJ (2002) A comparison of methods for multiclass support vector machines. IEEE Trans Neural Networks"},{"issue":"4","key":"1581_CR34","doi-asserted-by":"publisher","first-page":"620","DOI":"10.1103\/PhysRev.106.620","volume":"106","author":"ET Jaynes","year":"1957","unstructured":"Jaynes ET (1957) Information theory and statistical mechanics. Phys Rev 106(4):620","journal-title":"Phys Rev"},{"key":"1581_CR35","volume-title":"Text categorization with support vector machines: learning with many relevant features","author":"T Joachims","year":"1998","unstructured":"Joachims T (1998) Text categorization with support vector machines: learning with many relevant features. Springer, Berlin"},{"issue":"1","key":"1581_CR36","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1108\/eb026526","volume":"28","author":"Jones Karen Sparck","year":"1972","unstructured":"Jones Karen Sparck (1972) A statistical interpretation of term specificity and its application in retrieval. J Doc 28(1):11\u201321","journal-title":"J Doc"},{"key":"1581_CR37","unstructured":"Kiros R, Zhu Y, Salakhutdinov R, Zemel RS, Torralba A, Urtasun R, Fidler S (2015) Skip-thought vectors. arXiv preprint arXiv:1506.06726"},{"key":"1581_CR38","doi-asserted-by":"crossref","unstructured":"Ko Y (2012) A study of term weighting schemes using class information for text classification. ACM, In SIGIR","DOI":"10.1145\/2348283.2348453"},{"key":"1581_CR39","doi-asserted-by":"crossref","unstructured":"Ko Y (2015) A new term-weighting scheme for text classification using the odds of positive and negative class probabilities. J Assoc Inf Sci Technol","DOI":"10.1002\/asi.23338"},{"key":"1581_CR40","unstructured":"Lan M, Tan CL, Low HB (2006) Proposing a new term weighting scheme for text categorization. In: AAAI"},{"key":"1581_CR41","unstructured":"Lan M, Tan CL, Su J, Lu Y (2009) Supervised and traditional term weighting methods for automatic text categorization. IEEE Trans Pattern Anal Mach Intell"},{"key":"1581_CR42","doi-asserted-by":"crossref","unstructured":"Largeron C, Moulin C, G\u00e9ry M (2011) Entropy based feature selection for text categorization. In: Proceedings of the 2011 ACM symposium on applied computing. ACM, pp 924\u2013928","DOI":"10.1145\/1982185.1982389"},{"key":"1581_CR43","first-page":"1188","volume":"14","author":"V Le Quoc","year":"2014","unstructured":"Le Quoc V, Tomas M (2014) Distributed representations of sentences and documents. ICML 14:1188\u20131196","journal-title":"ICML"},{"issue":"1\u20133","key":"1581_CR44","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1023\/A:1012491419635","volume":"46","author":"E Leopold","year":"2002","unstructured":"Leopold E, Kindermann J (2002) Text categorization with support vector machines. How to represent texts in input space? Mac Learn 46(1\u20133):423\u2013444","journal-title":"Mac Learn"},{"key":"1581_CR45","doi-asserted-by":"crossref","unstructured":"Lewis DD (1991) Evaluating text categorization. In: Proceedings of speech and natural language workshop. Defense Advanced Research Projects Agency, Morgan Kaufmann, February, pages 312\u2013318","DOI":"10.3115\/112405.112471"},{"key":"1581_CR46","unstructured":"Li Y, Zheng R, Tian T, Hu Z, Iyer R, Sycara K (2016) Joint embedding of hierarchical categories and entities for concept categorization and dataless classification. In: The 26th international conference on computational linguistics (COLING)"},{"key":"1581_CR47","doi-asserted-by":"crossref","unstructured":"Luhn HP (1957) A statistical approach to mechanized encoding and searching of literary information. IBM J Res Dev","DOI":"10.1147\/rd.14.0309"},{"key":"1581_CR48","doi-asserted-by":"crossref","unstructured":"Luo J, Shan H, Zhang G, Yuan G, Zhang S, Yan F, Li Z (2021) Exploiting syntactic and semantic information for textual similarity estimation. Math Probl Eng","DOI":"10.1155\/2021\/4186750"},{"key":"1581_CR49","doi-asserted-by":"crossref","unstructured":"Luo Q, Chen E, Xiong H (2011) A semantic term weighting scheme for text categorization. Expert Syst Appl","DOI":"10.1016\/j.eswa.2011.04.058"},{"key":"1581_CR50","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.03781"},{"key":"1581_CR51","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado GS, Dean J (2013) Distributed representations of words and phrases and their compositionality. In: Advances in neural information processing systems, pp 3111\u20133119"},{"key":"1581_CR52","unstructured":"Mladeni\u2019c D, Grobelnik M (1998) Feature selection for classification based on text hierarchy. In: Text and the web, conference on automated learning and discovery CONALD-98. Citeseer"},{"issue":"3","key":"1581_CR53","doi-asserted-by":"publisher","first-page":"e0230442","DOI":"10.1371\/journal.pone.0230442","volume":"15","author":"M Manal","year":"2020","unstructured":"Manal M, Nazlia O (2020) Question classification based on bloom\u2019s taxonomy cognitive domain using modified tf-idf and word2vec. PLoS ONE 15(3):e0230442","journal-title":"PLoS ONE"},{"key":"1581_CR54","doi-asserted-by":"crossref","unstructured":"Nam J, Menc\u00eda ELJ (2016) All-in text: learning document, label, and word representations jointly. In: Proceedings of the thirtieth AAAI conference on artificial intelligence. AAAI Press, pp 1948\u20131954","DOI":"10.1609\/aaai.v30i1.10241"},{"key":"1581_CR55","doi-asserted-by":"crossref","unstructured":"Nguyen XV, Chan J, Romano S, Bailey J (2014) Effective global approaches for mutual information based feature selection. In: Proceedings of the 20th ACM SIGKDD international conference on knowledge discovery and data mining. ACM, pp 512\u2013521","DOI":"10.1145\/2623330.2623611"},{"key":"1581_CR56","doi-asserted-by":"crossref","unstructured":"Paik JH (2013) A novel tf-idf weighting scheme for effective ranking. In: SIGIR. ACM, pp 343\u2013352","DOI":"10.1145\/2484028.2484070"},{"key":"1581_CR57","doi-asserted-by":"crossref","unstructured":"Papakyriakopoulos O, Hegelich S, Serrano JCM, Marco F (2020) Bias in word embeddings. In: Proceedings of the 2020 conference on fairness, accountability, and transparency, pp 446\u2013457","DOI":"10.1145\/3351095.3372843"},{"issue":"8","key":"1581_CR58","doi-asserted-by":"publisher","first-page":"1226","DOI":"10.1109\/TPAMI.2005.159","volume":"27","author":"H Peng","year":"2005","unstructured":"Peng H, Long F, Ding C (2005) Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy. IEEE Trans Pattern Anal Mach Intell 27(8):1226\u20131238","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1581_CR59","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning C (2014) Glove: global vectors for word representation. In: Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP), pp 1532\u20131543","DOI":"10.3115\/v1\/D14-1162"},{"key":"1581_CR60","doi-asserted-by":"crossref","unstructured":"Phan X-H, Nguyen L-M, Horiguchi S (2008) Learning to classify short and sparse text & web with hidden topics from large-scale data collections. In: Proceedings of the 17th international conference on World Wide Web. ACM, pp 91\u2013100","DOI":"10.1145\/1367497.1367510"},{"key":"1581_CR61","unstructured":"Powers DM (2011) Evaluation: from precision, recall and f-measure to roc, informedness, markedness and correlation"},{"key":"1581_CR62","doi-asserted-by":"crossref","unstructured":"Qiu X, Sun T, Xu Y, Shao Y, Dai N, Huang X (2020) Pre-trained models for natural language processing: a survey. arXiv preprint arXiv:2003.08271","DOI":"10.1007\/s11431-020-1647-3"},{"issue":"5","key":"1581_CR63","doi-asserted-by":"publisher","first-page":"1009","DOI":"10.1109\/TPAMI.2010.154","volume":"33","author":"X Quan","year":"2011","unstructured":"Quan X, Wenyin L, Qiu B (2011) Term weighting schemes for question categorization. IEEE Trans Pattern Anal Mach Intell 33(5):1009\u20131021","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1581_CR64","unstructured":"Stephen R (2004) Understanding inverse document frequency: on theoretical arguments for idf. J Doc"},{"key":"1581_CR65","doi-asserted-by":"crossref","unstructured":"Salton G, Buckley C (1988) Term-weighting approaches in automatic text retrieval. Inf Manag","DOI":"10.1016\/0306-4573(88)90021-0"},{"key":"1581_CR66","doi-asserted-by":"crossref","unstructured":"Salton G, Wong A, Yang CS (1975) A vector space model for automatic indexing. Commun ACM","DOI":"10.1145\/361219.361220"},{"issue":"4","key":"1581_CR67","doi-asserted-by":"publisher","first-page":"351","DOI":"10.1108\/eb026562","volume":"29","author":"G Salton","year":"1973","unstructured":"Salton G, Yang CS (1973) On the specification of term values in automatic indexing. J Doc 29(4):351\u2013372","journal-title":"J Doc"},{"issue":"1","key":"1581_CR68","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/505282.505283","volume":"34","author":"F Sebastiani","year":"2002","unstructured":"Sebastiani F (2002) Machine learning in automated text categorization. ACM Comput Surv (CSUR) 34(1):1\u201347","journal-title":"ACM Comput Surv (CSUR)"},{"key":"1581_CR69","doi-asserted-by":"crossref","unstructured":"Shannon CE (2001) A mathematical theory of communication. ACM SIGMOBILE Mobile Comput Commun (review)","DOI":"10.1145\/584091.584093"},{"key":"1581_CR70","unstructured":"Socher R, Perelygin A, Wu JY, Chuang J, Manning CD, Ng AY, Potts C et\u00a0al (2013) Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the conference on empirical methods in natural language processing (EMNLP), vol 1631. Citeseer, pp 1642"},{"issue":"May","key":"1581_CR71","first-page":"1393","volume":"13","author":"L Song","year":"2012","unstructured":"Song L, Smola A, Gretton A, Bedo J, Borgwardt K (2012) Feature selection via dependence maximization. J Mach Learn Res 13(May):1393\u20131434","journal-title":"J Mach Learn Res"},{"key":"1581_CR72","unstructured":"Soucy P, Mineau GW (2005) Beyond tfidf weighting for text categorization in the vector space model. In: IJCAI"},{"key":"1581_CR73","doi-asserted-by":"crossref","unstructured":"Swinger N, De-Arteaga M et al (2019) What are the biases in my word embedding? In: Proceedings of the 2019 AAAI\/ACM conference on AI, ethics, and society, pp 305\u2013311","DOI":"10.1145\/3306618.3314270"},{"key":"1581_CR74","doi-asserted-by":"crossref","unstructured":"Tang J, Qu M, Mei Q (2015) Pte: Predictive text embedding through large-scale heterogeneous text networks. In: Proceedings of the 21th ACM SIGKDD international conference on knowledge discovery and data mining. ACM, pp 1165\u20131174","DOI":"10.1145\/2783258.2783307"},{"key":"1581_CR75","doi-asserted-by":"crossref","unstructured":"Wang T, Cai Y, Leung H-F, Cai Z, Min H (2015) Entropy-based term weighting schemes for text categorization in vsm. In: 2015 IEEE 27th international conference on tools with artificial intelligence (ICTAI). IEEE, pp 325\u2013332","DOI":"10.1109\/ICTAI.2015.57"},{"key":"1581_CR76","doi-asserted-by":"crossref","unstructured":"Warrens MJ (2008) On association coefficients for 2$$\\times $$ 2 tables and properties that do not depend on the marginal distributions. Psychometrika","DOI":"10.1007\/s11336-008-9070-3"},{"key":"1581_CR77","doi-asserted-by":"crossref","unstructured":"Wei B, Feng B, He F, Fu X (2011) An extended supervised term weighting method for text categorization. In: Proceedings of the international conference on human-centric computing 2011 and embedded and multimedia computing 2011. Springer","DOI":"10.1007\/978-94-007-2105-0_11"},{"key":"1581_CR78","doi-asserted-by":"crossref","unstructured":"Wu H, Gu X (2016) Balancing between over-weighting and under-weighting in supervised term weighting. arXiv preprint arXiv:1604.04007","DOI":"10.1016\/j.ipm.2016.10.003"},{"key":"1581_CR79","doi-asserted-by":"crossref","unstructured":"Wu H, Salton G (1981) A comparison of search term weighting: term relevance vs. inverse document frequency. In: ACM SIGIR Forum, vol 16. ACM, pp 30\u201339","DOI":"10.1145\/1013228.511759"},{"key":"1581_CR80","doi-asserted-by":"crossref","unstructured":"Wu L, Yen IEH, Xu K, Xu F, Balakrishnan A, Chen P-Y, Ravikumar P, Witbrock MJ (2018) Word mover\u2019s embedding: from word2vec to document embedding. arXiv preprint arXiv:1811.01713","DOI":"10.18653\/v1\/D18-1482"},{"key":"1581_CR81","doi-asserted-by":"crossref","unstructured":"Xiong M, Li R, Li Y, Yang Q (2018) Self-inhibition residual convolutional networks for Chinese sentence classification. In: International conference on neural information processing. Springer, pp 425\u2013436","DOI":"10.1007\/978-3-030-04167-0_39"},{"key":"1581_CR82","doi-asserted-by":"crossref","unstructured":"Yang Y, Liu X (1999) A re-examination of text categorization methods. In: SIGIR. ACM, pp 42\u201349","DOI":"10.1145\/312624.312647"},{"key":"1581_CR83","unstructured":"Yang Y, Pedersen JO (1997) A comparative study on feature selection in text categorization. In: ICML"},{"key":"1581_CR84","unstructured":"Yu L, Liu H (2004) Efficient feature selection via analysis of relevance and redundancy. J Mach Learn Res"},{"key":"1581_CR85","doi-asserted-by":"crossref","unstructured":"Yuan H, Wang Y, Feng X, Sun S (2018) Sentiment analysis based on weighted word2vec and att-lstm. In: Proceedings of the 2018 2nd international conference on computer science and artificial intelligence, pp 420\u2013424","DOI":"10.1145\/3297156.3297228"},{"key":"1581_CR86","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1145\/984321.984322","volume":"22","author":"C Zhai","year":"2004","unstructured":"Zhai C, Lafferty J (2004) A study of smoothing methods for language models applied to information retrieval. ACM Trans Inf Syst 22:179\u2013214","journal-title":"ACM Trans Inf Syst"},{"key":"1581_CR87","unstructured":"Zhang D, Yin J, Zhu X, Chengqi Z (2018) A survey. IEEE Trans Big Data Netw Represent Learn"},{"key":"1581_CR88","doi-asserted-by":"crossref","unstructured":"Zhang S, Jin X, Shen D, Cao B, Ding X, Zhang X (2013) Short text classification by detecting information path. In: Proceedings of the 22nd ACM international conference on conference on information & knowledge management. ACM, pp 727\u2013732","DOI":"10.1145\/2505515.2505638"},{"key":"1581_CR89","doi-asserted-by":"crossref","unstructured":"Zhao J, Wang T, Yatskar M, Cotterell R, Ordonez V, Chang K-W (2019) Gender bias in contextualized word embeddings. arXiv preprint arXiv:1904.03310","DOI":"10.18653\/v1\/N19-1064"},{"key":"1581_CR90","doi-asserted-by":"crossref","unstructured":"Zhao J, Zhou Y, Li Z, Wang W, Chang K-W (2018) Learning gender-neutral word embeddings. arXiv preprint arXiv:1809.01496","DOI":"10.18653\/v1\/D18-1521"},{"key":"1581_CR91","doi-asserted-by":"crossref","unstructured":"Zhao K, Hassan H, Auli M (2015) Learning translation models from monolingual continuous representations. In: Proceedings of NAACL","DOI":"10.3115\/v1\/N15-1176"}],"container-title":["Knowledge and Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-021-01581-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10115-021-01581-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10115-021-01581-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,3]],"date-time":"2024-09-03T15:54:33Z","timestamp":1725378873000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10115-021-01581-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,7]]},"references-count":91,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2021,9]]}},"alternative-id":["1581"],"URL":"https:\/\/doi.org\/10.1007\/s10115-021-01581-5","relation":{},"ISSN":["0219-1377","0219-3116"],"issn-type":[{"value":"0219-1377","type":"print"},{"value":"0219-3116","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,7,7]]},"assertion":[{"value":"6 January 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 May 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 May 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 July 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}