{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:20:10Z","timestamp":1742970010078,"version":"3.40.3"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030335816"},{"type":"electronic","value":"9783030335823"}],"license":[{"start":{"date-parts":[[2019,11,2]],"date-time":"2019-11-02T00:00:00Z","timestamp":1572652800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-33582-3_12","type":"book-chapter","created":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T07:46:34Z","timestamp":1572594394000},"page":"118-126","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Morphosyntactic Preprocessing Impact on Document Embedding: An Empirical Study on Semantic Similarity"],"prefix":"10.1007","author":[{"given":"Nourelhouda","family":"Yahi","sequence":"first","affiliation":[]},{"given":"Hacene","family":"Belhadef","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,11,2]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Batet, M., Sanchez, D.: A review on semantic similarity. In: Encyclopedia of Information Science and Technology, Third Edition, pp. 7575\u20137583. IGI Global (2015)","DOI":"10.4018\/978-1-4666-5888-2.ch746"},{"key":"12_CR2","first-page":"1137","volume":"3","author":"Y Bengio","year":"2003","unstructured":"Bengio, Y., et al.: A neural probabilistic language model. J. Mach. Learn. Res. 3, 1137\u20131155 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Camacho-Collados, J., Pilehvar, M.T.: On the role of text preprocessing in neural network architectures: an evaluation study on text categorization and sentiment analysis. arXiv preprint \n                    arXiv:1707-1780\n                    \n                   (2017)","DOI":"10.18653\/v1\/W18-5406"},{"key":"12_CR4","volume-title":"Information Representation and Retrieval in the Digital Age","author":"H Chu","year":"2003","unstructured":"Chu, H.: Information Representation and Retrieval in the Digital Age. Information Today, Inc., Medford (2003)"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Dolan, B., Quirk, C., Brockett, C.: Unsupervised construction of large paraphrase corpora: exploiting massively parallel news sources. In: Proceedings of the 20th International Conference on Computational Linguistics, p. 350. Association for Computational Linguistics (2004)","DOI":"10.3115\/1220355.1220406"},{"issue":"4","key":"12_CR6","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1177\/0165551514534143","volume":"40","author":"R Duwairi","year":"2014","unstructured":"Duwairi, R., El-Orfali, M.: A study of the effects of preprocessing strategies on sentiment analysis for Arabic text. J. Inform. Sci. 40(4), 501\u2013513 (2014)","journal-title":"J. Inform. Sci."},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Ibrahim, A., Katz, B., Lin, J.: Extracting structural paraphrases from aligned monolingual corpora. In: Proceedings of the Second International Workshop on Paraphrasing, vol. 16, pp. 57\u201364. Association for Computational Linguistics (2003)","DOI":"10.3115\/1118984.1118992"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Kenter, T., De Rijke, M.: Short text similarity with word embeddings. In: Proceedings of the 24th ACM International on Conference on Information and Knowledge Management, pp. 1411\u20131420. ACM (2015)","DOI":"10.1145\/2806416.2806475"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Kiela, D., Clark, S.: A systematic study of semantic vector space model parameters. In: Proceedings of the 2nd Workshop on Continuous Vector Space Models and their Compositionality (CVSC), pp. 21\u201330 (2014)","DOI":"10.3115\/v1\/W14-1503"},{"issue":"1","key":"12_CR10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/360402.360406","volume":"2","author":"R Kosala","year":"2000","unstructured":"Kosala, R., Blockeel, H.: Web mining research: a survey. ACM Sigkdd Explor. Newsl. 2(1), 1\u201315 (2000)","journal-title":"ACM Sigkdd Explor. Newsl."},{"key":"12_CR11","unstructured":"Le, Q., Mikolov, T.: Distributed representations of sentences and douments. In: International Conference on Machine Learning, pp. 1188\u20131196 (2014)"},{"issue":"Feb","key":"12_CR12","first-page":"419","volume":"2","author":"H Lodhi","year":"2002","unstructured":"Lodhi, H., et al.: Text classification using string kernels. J. Mach. Learn. Res. 2(Feb), 419\u2013444 (2002)","journal-title":"J. Mach. Learn. Res."},{"key":"12_CR13","unstructured":"Mikolov, T., Yih, W., Zweig, G.: Linguistic regularities in continuous space word representations. In: Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 746\u2013751 (2013)"},{"key":"12_CR14","unstructured":"Mikolov, T., et al.: Efficient estimation of word representations in vector space. arXiv preprint \n                    arXiv:1301.3781\n                    \n                   (2013)"},{"issue":"1","key":"12_CR15","first-page":"218","volume":"18","author":"MY Pak","year":"2017","unstructured":"Pak, M.Y., Gunal, S.: The impact of text representation and preprocessing on author identification. Anadolu \u00dcniversitesi Bilim Ve Teknoloji Dergisi A-Uygulamal\u0131 Bilimler ve M\u00fchendislik 18(1), 218\u2013224 (2017)","journal-title":"Anadolu \u00dcniversitesi Bilim Ve Teknoloji Dergisi A-Uygulamal\u0131 Bilimler ve M\u00fchendislik"},{"issue":"5","key":"12_CR16","doi-asserted-by":"publisher","first-page":"1207","DOI":"10.1016\/j.ipm.2004.08.002","volume":"41","author":"E-K Park","year":"2005","unstructured":"Park, E.-K., Ra, D.-Y., Jang, M.-G.: Techniques for improving web retrieval effectiveness. Inf. Process. Manage. 41(5), 1207\u20131223 (2005)","journal-title":"Inf. Process. Manage."},{"issue":"3","key":"12_CR17","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1108\/eb046814","volume":"14","author":"MF Porter","year":"1980","unstructured":"Porter, M.F.: An algorithm for suffix stripping. Program 14(3), 130\u2013137 (1980)","journal-title":"Program"},{"key":"12_CR18","unstructured":"Sergienko, R., Shan, M., Minker, W.: A comparative study of text preprocessing approaches for topic detection of user utterances. In: Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016), pp. 1826\u20131831 (2016)"},{"issue":"1","key":"12_CR19","doi-asserted-by":"publisher","first-page":"104","DOI":"10.1016\/j.ipm.2013.08.006","volume":"50","author":"AK Uysal","year":"2014","unstructured":"Uysal, A.K., Gunal, S.: The impact of preprocessing on text classification. Inform. Process. Manage. 50(1), 104\u2013112 (2014)","journal-title":"Inform. Process. Manage."}],"container-title":["Advances in Intelligent Systems and Computing","Emerging Trends in Intelligent Computing and Informatics"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-33582-3_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,11,1]],"date-time":"2019-11-01T17:03:48Z","timestamp":1572627828000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-33582-3_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,11,2]]},"ISBN":["9783030335816","9783030335823"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-33582-3_12","relation":{},"ISSN":["2194-5357","2194-5365"],"issn-type":[{"type":"print","value":"2194-5357"},{"type":"electronic","value":"2194-5365"}],"subject":[],"published":{"date-parts":[[2019,11,2]]},"assertion":[{"value":"2 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IRICT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference of Reliable Information and Communication Technology","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Johor","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Malaysia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"irict2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/irict.co\/irict2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}