{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T13:06:06Z","timestamp":1776776766440,"version":"3.51.2"},"reference-count":27,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2021,6,24]],"date-time":"2021-06-24T00:00:00Z","timestamp":1624492800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,24]],"date-time":"2021-06-24T00:00:00Z","timestamp":1624492800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Soft Comput"],"published-print":{"date-parts":[[2021,8]]},"DOI":"10.1007\/s00500-021-05754-w","type":"journal-article","created":{"date-parts":[[2021,6,24]],"date-time":"2021-06-24T20:02:16Z","timestamp":1624564936000},"page":"10089-10101","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Arabic sentence similarity based on similarity features and machine learning"],"prefix":"10.1007","volume":"25","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6358-059X","authenticated-orcid":false,"given":"Marwah","family":"Alian","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Arafa","family":"Awajan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,6,24]]},"reference":[{"key":"5754_CR1","doi-asserted-by":"crossref","unstructured":"Alian M, Awajan A (2018) Semantic similarity approaches\u2014review. In: 2018 international Arab conference on information technology (ACIT2018). Werdanye, Lebanon","DOI":"10.1109\/ACIT.2018.8672665"},{"key":"5754_CR2","doi-asserted-by":"publisher","first-page":"851","DOI":"10.1007\/s10772-020-09753-4","volume":"23","author":"M Alian","year":"2020","unstructured":"Alian M, Awajan A (2020a) Factors affecting sentence similarity and paraphrasing identification. Int J Speech Technol 23:851\u2013859","journal-title":"Int J Speech Technol"},{"issue":"4","key":"5754_CR3","doi-asserted-by":"publisher","first-page":"2050033","DOI":"10.1142\/S0219649220500331","volume":"19","author":"M Alian","year":"2020","unstructured":"Alian M, Awajan A (2020b) Semantic similarity for English and Arabic texts: a review. J Inf Knowl Manag 19(4):2050033","journal-title":"J Inf Knowl Manag"},{"key":"5754_CR4","doi-asserted-by":"crossref","unstructured":"Alian M, Awajan A (2020c) Sense inventories for Arabic texts. In: The international Arab conference on information technology (ACIT'2020), Egypt","DOI":"10.1109\/ACIT50332.2020.9300054"},{"key":"5754_CR5","doi-asserted-by":"crossref","unstructured":"Alian M, Awajan A, Al-Hasan A, Akuzhia R (2019) Towards building Arabic paraphrasing benchmark. In: the 2nd international conference on data science, E-learning and Information Systems (DATA'2019). ACM, Dubai","DOI":"10.1145\/3368691.3368708"},{"key":"5754_CR6","doi-asserted-by":"publisher","first-page":"921","DOI":"10.1007\/s10579-020-09504-6","volume":"54","author":"M Alian","year":"2020","unstructured":"Alian M, Al-Naymat G, Ramadan B (2020) Arabic real time entity resolution using inverted indexing. Lang Resour Eval 54:921\u2013941","journal-title":"Lang Resour Eval"},{"issue":"1","key":"5754_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3844\/jcssp.2016.1.18","volume":"12","author":"S Alzahrani","year":"2016","unstructured":"Alzahrani S (2016) Cross-language semantic similarity of Arabic\u2013English short phrases and sentences. J Comput Sci 12(1):1\u201318","journal-title":"J Comput Sci"},{"key":"5754_CR8","unstructured":"Beel J, Langer S, Gipp B (2017) TF-IDuF: a novel term-weighting scheme for user modeling based on users\u2019 personal document collections. In: The conference 2017, Wuhan, China, 2017. Urbana-Champaign: University of Illinois"},{"key":"5754_CR9","doi-asserted-by":"crossref","unstructured":"Cer D, Diab M, Agirre E, Lopez-Gazpio I, Specia L (2017) SemEval-2017 task 1: semantic textual similarity multilingual and crosslingual focused evaluation. In: Proceedings of the 11th international workshop on semantic evaluation (SemEval-2017), Vancouver, Canada","DOI":"10.18653\/v1\/S17-2001"},{"key":"5754_CR10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31164-2","volume-title":"Data matching: concepts and techniques for record link-age, entity resolution, and duplicate detection","author":"P Christen","year":"2012","unstructured":"Christen P (2012) Data matching: concepts and techniques for record link-age, entity resolution, and duplicate detection. Springer"},{"key":"5754_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.csl.2016.01.003","volume":"39","author":"R Ferreira","year":"2016","unstructured":"Ferreira R, Lins RD, Simske SJ, Freitas F, Riss M (2016) Assessing sentence similarity through lexical, syntactic and semantic analysis. Comput Speech Lang 39:1\u201328","journal-title":"Comput Speech Lang"},{"key":"5754_CR12","unstructured":"Hatzivassiloglou V, Klavans J, Eskin E (1999) Detecting text similarity over short passages: exploring linguistic feature combinations via machine learning. In: Joint SIGDAT conference: empirical methods in NLP and very large corpora"},{"key":"5754_CR13","first-page":"241","volume":"37","author":"P Jaccard","year":"1901","unstructured":"Jaccard P (1901) Distribution de la flore alpine dans le bassin des Dranses et dans quelques r\u00e9gions voisines. Bulletin de la Soci\u00e9t\u00e9 Vaudoise des Sciences Naturelles 37:241\u2013272","journal-title":"Bulletin de la Soci\u00e9t\u00e9 Vaudoise des Sciences Naturelles"},{"issue":"1","key":"5754_CR28","first-page":"152","volume":"58","author":"SM Kadhem","year":"2017","unstructured":"Kadhem SM, Abd Alameer AQ (2017) Finding the similarity between two Arabic texts. Iraqi J Sci 58(1):152\u2013162","journal-title":"Iraqi J Sci"},{"key":"5754_CR14","unstructured":"Landauer TK, Laham D, Rehder B, Schreiner ME (1997) How well can passage meaning be derived without using word order? A comparison of latent semantic analysis and humans. In: 19th annual meeting of the cognitive science society"},{"key":"5754_CR15","doi-asserted-by":"crossref","unstructured":"Le Y, Wang Z, Quan Z, He J, Yao B (2018) ACV-tree: a new method for sentence similarity modeling. In: Proceedings of the 27th international joint conference on artificial intelligence (IJCAI-18)","DOI":"10.24963\/ijcai.2018\/575"},{"issue":"8","key":"5754_CR16","doi-asserted-by":"publisher","first-page":"1138","DOI":"10.1109\/TKDE.2006.130","volume":"18","author":"Y Li","year":"2006","unstructured":"Li Y, McLean D, Bandar ZA, O\u2019shea JD, Crockett K (2006) Sentence similarity based on semantic nets and corpus statistics. IEEE Trans Knowl Data Eng 18(8):1138\u20131150","journal-title":"IEEE Trans Knowl Data Eng"},{"key":"5754_CR17","doi-asserted-by":"crossref","unstructured":"Lilleberg J, Zhu Y, Zhang Y (2015) Support vector machines and Word2Vec for text classification with semantic features. In: 14th international conference on cognitive informatics and cognitive computing (ICCI*CC), 2015. IEEE","DOI":"10.1109\/ICCI-CC.2015.7259377"},{"key":"5754_CR18","unstructured":"Lintean M, Rus V (2012) Measuring semantic similarity in short texts through greedy pairing and word semantics. In: The 5th international Florida artificial intelligence research society conference"},{"key":"5754_CR19","unstructured":"Logacheva V, Teslenko D, Shelmanov A, Remus S, Ustalov D, Kutuzov A, Artemova E, Biemann C, Ponzetto SP, Panchenko A (2020) Word sense disambiguation for languages using word embeddings only. In: The 12th conference on language resources and evaluation (LREC 2020), Marseille, pp 5943\u20135952"},{"key":"5754_CR20","doi-asserted-by":"publisher","first-page":"256","DOI":"10.1016\/j.procs.2017.10.117","volume":"117","author":"ABS Mohammad","year":"2017","unstructured":"Mohammad ABS, Eissa K, El-Beltagy SR (2017) AraVec: a set of arabic word embedding models for use in Arabic NLP. Proc Comput Sci 117:256\u2013265","journal-title":"Proc Comput Sci"},{"key":"5754_CR23","doi-asserted-by":"crossref","unstructured":"Nagoudi EMB, Schwab D (2017) Semantic similarity of arabic sentences with word embeddings. In: Proceedings of the 3rd Arabic natural language processing workshop (WANLP), Valencia, Spain","DOI":"10.18653\/v1\/W17-1303"},{"issue":"1","key":"5754_CR24","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1145\/375360.375365","volume":"33","author":"G Navarro","year":"2001","unstructured":"Navarro G (2001) A guided tour to approximate string matching. ACM Comput Surv 33(1):31\u201388","journal-title":"ACM Comput Surv"},{"key":"5754_CR25","unstructured":"O\u2019Shea J, Bandar Z, Crockett K, McLean D (2008) A comparative study of two short text semantic similarity measures. In: Nguyen NT, Jo GS, Howlett RJ, Jain LC (eds) Agent and multi-agent systems: technologies and applications. Lecture notes in computer science. Springer, Berlin"},{"key":"5754_CR26","unstructured":"Ozates SB, Ozgur A, Radev DR (2016) Sentence similarity based on dependency tree kernels for multi-document summarization. In: 10th edition of language resources and evaluation conference (LREC 2016). Portoro\u017e, Slovenia"},{"key":"5754_CR27","doi-asserted-by":"crossref","unstructured":"\u0218tef\u0103nescu D, Banjade R, Rus V (2014) A sentence similarity method based on parsing and information content. In: A G Computational linguistics and intelligent text processing, CICLing 2014, Lecture notes in computer science. Springer, Berlin, pp 442\u2013453","DOI":"10.1007\/978-3-642-54906-9_36"},{"key":"5754_CR29","doi-asserted-by":"crossref","unstructured":"Wu Z, Palmer M (1994) Verb semantics and lexical selection. In: The 32nd annual meeting of the associations for computational linguistics","DOI":"10.3115\/981732.981751"}],"container-title":["Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-021-05754-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00500-021-05754-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00500-021-05754-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,7,14]],"date-time":"2021-07-14T11:27:46Z","timestamp":1626262066000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00500-021-05754-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,24]]},"references-count":27,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2021,8]]}},"alternative-id":["5754"],"URL":"https:\/\/doi.org\/10.1007\/s00500-021-05754-w","relation":{},"ISSN":["1432-7643","1433-7479"],"issn-type":[{"value":"1432-7643","type":"print"},{"value":"1433-7479","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,24]]},"assertion":[{"value":"15 March 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 June 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}