{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T11:56:47Z","timestamp":1768996607744,"version":"3.49.0"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,12,10]],"date-time":"2020-12-10T00:00:00Z","timestamp":1607558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,12,10]],"date-time":"2020-12-10T00:00:00Z","timestamp":1607558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,12,10]],"date-time":"2020-12-10T00:00:00Z","timestamp":1607558400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,12,10]]},"DOI":"10.1109\/bigdata50022.2020.9378226","type":"proceedings-article","created":{"date-parts":[[2021,3,19]],"date-time":"2021-03-19T21:10:21Z","timestamp":1616188221000},"page":"2639-2648","source":"Crossref","is-referenced-by-count":16,"title":["Accelerating Text Mining Using Domain-Specific Stop Word Lists"],"prefix":"10.1109","author":[{"given":"Farah","family":"Alshanik","sequence":"first","affiliation":[]},{"given":"Amy","family":"Apon","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Herzog","sequence":"additional","affiliation":[]},{"given":"Ilya","family":"Safro","sequence":"additional","affiliation":[]},{"given":"Justin","family":"Sybrandt","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref32","article-title":"Efficient estimation of word representations in vector space","author":"mikolov","year":"2013"},{"key":"ref31","first-page":"2825","article-title":"Scikit-learn: Machine learning in Python","volume":"12","author":"pedregosa","year":"2011","journal-title":"Journal of Machine Learning Research"},{"key":"ref30","author":"breiman","year":"1984","journal-title":"Classification and Regression Trees"},{"key":"ref10","article-title":"A systematic review on stopword removal algorithms","volume":"4","author":"kaur","year":"2018","journal-title":"Int J Futur Revolut Comput Sci Commun Eng"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/WI.2003.1241221"},{"key":"ref12","article-title":"Effect of stop word removal on document similarity for hindi text","volume":"2","author":"garg","year":"2014","journal-title":"An Int Jounal Eng Sci"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-19403-0_43"},{"key":"ref14","first-page":"119","article-title":"Automated stopwords identification in punjabi documents","volume":"8","author":"puri","year":"2013","journal-title":"International Journal of Engineering Science"},{"key":"ref15","first-page":"35","article-title":"A comparative study on between mongolian stop words and english stop words","volume":"4","author":"zheng","year":"2011","journal-title":"Journal of Chinese Information Processing"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2018.05.196"},{"key":"ref17","first-page":"8887","article-title":"Automatic generation of stopwords in the amharic text","volume":"975","author":"miretie","year":"2018","journal-title":"International Journal of Computer Applications"},{"key":"ref18","first-page":"1036","article-title":"Stop word list construction and application in chinese language processing","volume":"3","author":"zou","year":"2006","journal-title":"WSEAS Transactions on Information Science and Applications"},{"key":"ref19","first-page":"8","article-title":"Toward an arabic stop-words list generation","volume":"46","author":"alajmi","year":"2012","journal-title":"International Journal of Computer Applications"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1038\/nmeth.3904"},{"key":"ref4","author":"bell","year":"1990","journal-title":"Text Compression"},{"key":"ref27","first-page":"797","article-title":"ThunderSVM: A fast SVM library on GPUs and CPUs","volume":"19","author":"wen","year":"2018","journal-title":"Journal of Machine Learning Research"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MicroCom.2016.7522593"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/BigData47090.2019.9005964"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.1995.598994"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2017.8258097"},{"key":"ref8","first-page":"142","article-title":"Learning word vectors for sentiment analysis","author":"maas","year":"0"},{"key":"ref7","year":"2016"},{"key":"ref2","first-page":"30","article-title":"An empirical evaluation of stop word removal in statistical machine translation","author":"chong","year":"2012","journal-title":"Proceedings of the Joint Workshop on Exploiting Synergies Between Information Retrieval and Machine Translation (ESIRMT) and Hybrid Approaches to Machine Translation (HyTra)"},{"key":"ref9","article-title":"Hansard Speeches V2.6.0 [dataset]","author":"odell","year":"0"},{"key":"ref1","first-page":"1010","article-title":"Automatic construction of chinese stop word list","author":"zou","year":"2006","journal-title":"Proceedings of the 5th WSEAS International Conference on Applied Computer Science"},{"key":"ref20","first-page":"17","article-title":"Automatically building a stopword list for an information retrieval system","volume":"5","author":"lo","year":"2005","journal-title":"Journal on Digital Information Management Special Issue on the 5th Dutch-Belgian Information Retrieval Workshop (DIR)"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-10-3153-3_79"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"15","DOI":"10.5120\/ijca2016911462","article-title":"Stop-word removal algorithm and its implementation for sanskrit language","volume":"150","author":"raulji","year":"2016","journal-title":"International Journal of Computer Applications"},{"key":"ref24","first-page":"746","article-title":"Linguistic regularities in continuous space word representations","author":"mikolov","year":"2013","journal-title":"Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics Human Language Technologies"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CSSE.2008.829"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-019-05800-7"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0026666"}],"event":{"name":"2020 IEEE International Conference on Big Data (Big Data)","location":"Atlanta, GA, USA","start":{"date-parts":[[2020,12,10]]},"end":{"date-parts":[[2020,12,13]]}},"container-title":["2020 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9377717\/9377728\/09378226.pdf?arnumber=9378226","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:45:33Z","timestamp":1656344733000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9378226\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,12,10]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/bigdata50022.2020.9378226","relation":{},"subject":[],"published":{"date-parts":[[2020,12,10]]}}}