{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T19:33:01Z","timestamp":1780687981550,"version":"3.54.1"},"reference-count":21,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2018,3,16]],"date-time":"2018-03-16T00:00:00Z","timestamp":1521158400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Comput Math Organ Theory"],"published-print":{"date-parts":[[2019,9]]},"DOI":"10.1007\/s10588-018-9266-8","type":"journal-article","created":{"date-parts":[[2018,3,16]],"date-time":"2018-03-16T03:48:56Z","timestamp":1521172136000},"page":"319-335","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":115,"title":["The impact of preprocessing steps on the accuracy of machine learning algorithms in sentiment analysis"],"prefix":"10.1007","volume":"25","author":[{"given":"Saqib","family":"Alam","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nianmin","family":"Yao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2018,3,16]]},"reference":[{"key":"9266_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0171649","volume":"12","author":"MZ Asghar","year":"2017","unstructured":"Asghar MZ, Khan A, Ahmad S, Qasim M, Khan A (2017a) Lexicon-enhanced sentiment analysis framework using rule-based classification scheme. PLoS ONE 12:1\u201323","journal-title":"PLoS ONE"},{"issue":"6","key":"9266_CR2","doi-asserted-by":"publisher","first-page":"868","DOI":"10.1007\/s12559-017-9503-3","volume":"9","author":"MZ Asghar","year":"2017","unstructured":"Asghar MZ, Khan A, Bibi A, Kundi FM, Ahmad H (2017b) Sentence-level emotion detection framework using rule-based classification. Cogn Comput 9(6):868\u2013894","journal-title":"Cogn Comput"},{"key":"9266_CR3","unstructured":"Baradad VP, Mugabushaka A (2015) Corpus specific stop words to improve the textual analysis in scientometrics. In: International Conference on Science in Information, pp 999\u20131005"},{"key":"9266_CR4","doi-asserted-by":"crossref","unstructured":"Bhavitha BK, Rodrigues AP, Chiplunkar NN (2017) Comparative study of machine learning techniques in sentimental analysis. In: Proceedings of International Conference Inventory Communication Computing Technology ICICCT 2017, No. Icicct, pp 216\u2013221","DOI":"10.1109\/ICICCT.2017.7975191"},{"issue":"3","key":"9266_CR5","doi-asserted-by":"publisher","first-page":"5432","DOI":"10.1016\/j.eswa.2008.06.054","volume":"36","author":"J Chen","year":"2009","unstructured":"Chen J, Huang H, Tian S, Qu Y (2009) Expert systems with applications feature selection for text classification with Na\u00efve Bayes. Expert Syst Appl 36(3):5432\u20135435","journal-title":"Expert Syst Appl"},{"key":"9266_CR6","unstructured":"Clark A (2003) Pre-processing very noisy text. In: Proceeding of Work Shallow Process Large Corpora, p 11"},{"issue":"25","key":"9266_CR7","first-page":"36","volume":"96","author":"O Das","year":"2014","unstructured":"Das O, Balabantaray RC (2014) Sentiment analysis of movie reviews using POS tags and term frequencies. Int J Ldots 96(25):36\u201341","journal-title":"Int J Ldots"},{"issue":"12","key":"9266_CR8","first-page":"1","volume":"150","author":"A Go","year":"2009","unstructured":"Go A, Bhayani R, Huang L (2009) Twitter sentiment classification using distant supervision. Processing 150(12):1\u20136","journal-title":"Processing"},{"issue":"6","key":"9266_CR9","doi-asserted-by":"publisher","first-page":"1445","DOI":"10.1166\/jmihi.2017.2208","volume":"7","author":"A Khan","year":"2017","unstructured":"Khan A, Asghar MZ, Ahmad H, Kundi FM, Ismail S (2017) A rule-based sentiment classification framework for health reviews on mobile social media. J Med Imaging Health Inf 7(6):1445\u20131453","journal-title":"J Med Imaging Health Inf"},{"key":"9266_CR10","first-page":"22","volume":"11","author":"JB Lovins","year":"1968","unstructured":"Lovins JB (1968) Development of a stemming algorithm. Mech Transl Comput Linguist 11:22\u201331","journal-title":"Mech Transl Comput Linguist"},{"issue":"2","key":"9266_CR11","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/s11280-015-0381-x","volume":"20","author":"AS Manek","year":"2017","unstructured":"Manek AS, Shenoy PD, Mohan MC, Venugopal KR (2017) Aspect term extraction for sentiment analysis in large movie reviews using Gini Index feature selection method and SVM classifier. World Wide Web 20(2):135\u2013154","journal-title":"World Wide Web"},{"key":"9266_CR12","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. Arxiv, pp 1\u201312"},{"key":"9266_CR13","unstructured":"Minanovic A, Gabelica H, Krstic Z (2014) Big data and sentiment analysis using KNIME: online reviews vs. social media. In: 2014 37th International Convention on Information and Communication Technology, Electronics and Microelectronics (MIPRO), pp 1464\u20131468"},{"key":"9266_CR14","doi-asserted-by":"crossref","unstructured":"Mubarok MS, Adiwijaya, Aldhi MD (2017) Aspect-based sentiment analysis to review products using Na\u00efve Bayes. In: AIP Conference Proceedings, vol. 020060, p 020060","DOI":"10.1063\/1.4994463"},{"key":"9266_CR15","unstructured":"Nigam K, Lafferty J, Mccallum A (1999) Using maximum entropy for text classification. In: IJCAI-99 workshop on machine learning for information filtering, pp 61\u201367"},{"key":"9266_CR16","unstructured":"Pak A, Paroubek P (2010) Twitter as a corpus for sentiment analysis and opinion mining. In: Proceedings of the Seventh conference on International Language Resources and Evaluation, pp 1320\u20131326"},{"key":"9266_CR17","doi-asserted-by":"crossref","unstructured":"Riloff E, Qadir A, Surve P, Silva LD, Gilbert N, Huang R (2013) Sarcasm as contrast between a positive sentiment and negative situation. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, No. Emnlp","DOI":"10.18653\/v1\/D13-1066"},{"key":"9266_CR18","unstructured":"Rong X (2014) word2vec parameter learning explained continuous bag-of-word model, pp 1\u201321"},{"key":"9266_CR19","first-page":"45","volume":"2","author":"S Tong","year":"2001","unstructured":"Tong S, Koller D (2001) Support vector machine active learning with applications to text classification. J Mach Learn Res 2:45\u201366","journal-title":"J Mach Learn Res"},{"key":"9266_CR20","doi-asserted-by":"publisher","DOI":"10.1007\/s00500-017-2904-0","author":"X Xie","year":"2017","unstructured":"Xie X, Ge S, Hu F, Xie M, Jiang N (2017) An improved algorithm for sentiment analysis based on maximum entropy. Soft Comput. https:\/\/doi.org\/10.1007\/s00500-017-2904-0","journal-title":"Soft Comput"},{"key":"9266_CR21","unstructured":"Yadav MP, Pandya D (2017) SentiReview: sentiment analysis based on text and emoticons. In: International Conference Innovation Mechanical Industry Application ICIMIA 2017 SentiReview, no. Icimia, pp 467\u2013472"}],"container-title":["Computational and Mathematical Organization Theory"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10588-018-9266-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10588-018-9266-8\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10588-018-9266-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T20:49:20Z","timestamp":1751489360000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10588-018-9266-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,3,16]]},"references-count":21,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,9]]}},"alternative-id":["9266"],"URL":"https:\/\/doi.org\/10.1007\/s10588-018-9266-8","relation":{},"ISSN":["1381-298X","1572-9346"],"issn-type":[{"value":"1381-298X","type":"print"},{"value":"1572-9346","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,3,16]]},"assertion":[{"value":"16 March 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}