{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T07:52:45Z","timestamp":1761897165468,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2019,9,23]],"date-time":"2019-09-23T00:00:00Z","timestamp":1569196800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100010663","name":"H2020 European Research Council","doi-asserted-by":"publisher","award":["780247"],"award-info":[{"award-number":["780247"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008530","name":"European Regional Development Fund","doi-asserted-by":"publisher","award":["TIN2016-78011-C4-4-R"],"award-info":[{"award-number":["TIN2016-78011-C4-4-R"]}],"id":[{"id":"10.13039\/501100008530","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Ministerio de Econom\u00eda, Industria y Competitividad, Gobierno de Espa\u00f1a","award":["TIN2016-78011-C4-4-R"],"award-info":[{"award-number":["TIN2016-78011-C4-4-R"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2019,9,23]]},"DOI":"10.1145\/3360901.3364444","type":"proceedings-article","created":{"date-parts":[[2019,11,14]],"date-time":"2019-11-14T22:18:50Z","timestamp":1573769930000},"page":"147-153","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Scalable Cross-lingual Document Similarity through Language-specific Concept Hierarchies"],"prefix":"10.1145","author":[{"given":"Carlos","family":"Badenes-Olmedo","sequence":"first","affiliation":[{"name":"Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jos\u00e9 Luis","family":"Redondo-Garc\u00eda","sequence":"additional","affiliation":[{"name":"Amazon Research, Cambridge, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Oscar","family":"Corcho","sequence":"additional","affiliation":[{"name":"Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2019,9,23]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 17th ACM Symposium on Document Engineering (DocEng) . https:\/\/doi.org\/10","author":"Badenes-Olmedo Carlos","year":"2017","unstructured":"Carlos Badenes-Olmedo , Jose Luis Redondo-Garcia , and Oscar Corcho . 2017 a. Distributing Text Mining tasks with librAIry . In Proceedings of the 17th ACM Symposium on Document Engineering (DocEng) . https:\/\/doi.org\/10 .1145\/3103010.3121040 10.1145\/3103010.3121040 Carlos Badenes-Olmedo, Jose Luis Redondo-Garcia, and Oscar Corcho. 2017a. Distributing Text Mining tasks with librAIry. In Proceedings of the 17th ACM Symposium on Document Engineering (DocEng) . https:\/\/doi.org\/10.1145\/3103010.3121040"},{"key":"e_1_3_2_1_2_1","volume-title":"Jose Luis Redondo-Garcia, and Oscar Corcho","author":"Badenes-Olmedo Carlos","year":"2017","unstructured":"Carlos Badenes-Olmedo , Jose Luis Redondo-Garcia, and Oscar Corcho . 2017 b. librAIry\/eval-similarity-calculation . (2017). https:\/\/doi.org\/10.5281\/zenodo.931305 10.5281\/zenodo.931305 Carlos Badenes-Olmedo, Jose Luis Redondo-Garcia, and Oscar Corcho. 2017b. librAIry\/eval-similarity-calculation . (2017). https:\/\/doi.org\/10.5281\/zenodo.931305"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Bahman Bahmani Benjamin Moseley Andrea Vattani Ravi Kumar and Sergei Vassilvitskii. 2012. Scalable K-Means  Bahman Bahmani Benjamin Moseley Andrea Vattani Ravi Kumar and Sergei Vassilvitskii. 2012. Scalable K-Means","DOI":"10.14778\/2180912.2180915"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the VLDB Endowment (PVLDB)","volume":"5","year":"2012","unstructured":". Proceedings of the VLDB Endowment (PVLDB) , Vol. 5 ( 2012 ), 622--633. https:\/\/doi.org\/10.14778\/2180912.2180915 10.14778\/2180912.2180915 . Proceedings of the VLDB Endowment (PVLDB) , Vol. 5 (2012), 622--633. https:\/\/doi.org\/10.14778\/2180912.2180915"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2011.79"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2133806.2133826"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2009.934715"},{"key":"e_1_3_2_1_8_1","volume-title":"Probabilistic Topic Models: A focus on graphical model design and applications to document and image analysis","author":"Blei David","year":"2010","unstructured":"David Blei , Lawrence Carin , and David Dunson . 2010b. Probabilistic Topic Models: A focus on graphical model design and applications to document and image analysis . IEEE signal processing magazine , Vol. 27 , 6 ( 2010 ), 55--65. https:\/\/doi.org\/10.1109\/MSP.2010.938079 10.1109\/MSP.2010.938079 David Blei, Lawrence Carin, and David Dunson. 2010b. Probabilistic Topic Models: A focus on graphical model design and applications to document and image analysis. IEEE signal processing magazine , Vol. 27, 6 (2010), 55--65. https:\/\/doi.org\/10.1109\/MSP.2010.938079"},{"key":"e_1_3_2_1_9_1","volume-title":"Modeling Annotated Data University of California Computer Science Division and Department of Statistics University of California Modeling Annotated Data . Science","author":"Blei David M","year":"2003","unstructured":"David M Blei and Michael I Jordan . 2003. Modeling Annotated Data University of California Computer Science Division and Department of Statistics University of California Modeling Annotated Data . Science ( 2003 ). David M Blei and Michael I Jordan. 2003. Modeling Annotated Data University of California Computer Science Division and Department of Statistics University of California Modeling Annotated Data . Science (2003)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1214\/07-AOAS114"},{"key":"e_1_3_2_1_11_1","first-page":"4","article-title":"Latent Dirichlet Allocation","volume":"3","author":"Blei David M","year":"2003","unstructured":"David M Blei , Andrew Y Ng , and Michael I Jordan . 2003 . Latent Dirichlet Allocation . Journal of Machine Learning Research , Vol. 3 , 4 -- 5 (2003), 993--1022. https:\/\/doi.org\/10.1162\/jmlr.2003.3.4--5.993 10.1162\/jmlr.2003.3.4--5.993 David M Blei, Andrew Y Ng, and Michael I Jordan. 2003. Latent Dirichlet Allocation . Journal of Machine Learning Research , Vol. 3, 4--5 (2003), 993--1022. https:\/\/doi.org\/10.1162\/jmlr.2003.3.4--5.993","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing","author":"Boyd-Graber Jordan","year":"2010","unstructured":"Jordan Boyd-Graber and Philip Resnik . 2010 . Holistic Sentiment Analysis Across Languages: Multilingual Supervised Latent Dirichlet Allocation . Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing October (2010), 45--55. https:\/\/doi.org\/10.1017\/CBO9781107415324.004 10.1017\/CBO9781107415324.004 Jordan Boyd-Graber and Philip Resnik. 2010. Holistic Sentiment Analysis Across Languages: Multilingual Supervised Latent Dirichlet Allocation . Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing October (2010), 45--55. https:\/\/doi.org\/10.1017\/CBO9781107415324.004"},{"key":"e_1_3_2_1_13_1","volume-title":"Proceedings of the NAACL HLT 2010 Workshop on Semantic Search. 1--9.","author":"Hakkani-Tur D","year":"2010","unstructured":"a Celikyilmaz, D Hakkani-Tur , and Gokhan Tur . 2010 . LDA Based Similarity Modeling for Question Answering . In Proceedings of the NAACL HLT 2010 Workshop on Semantic Search. 1--9. a Celikyilmaz, D Hakkani-Tur, and Gokhan Tur. 2010. LDA Based Similarity Modeling for Question Answering. In Proceedings of the NAACL HLT 2010 Workshop on Semantic Search. 1--9."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007537716579"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9"},{"key":"e_1_3_2_1_16_1","unstructured":"Martin Ester Hans-peter Kriegel J\u00f6rg S and Xiaowei Xu. 1996. A density-based algorithm for discovering clusters in large spatial databases with noise . (1996) 226--231. https:\/\/doi.org\/citeulike-article-id:3509601  Martin Ester Hans-peter Kriegel J\u00f6rg S and Xiaowei Xu. 1996. A density-based algorithm for discovering clusters in large spatial databases with noise . (1996) 226--231. https:\/\/doi.org\/citeulike-article-id:3509601"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1012801612483"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/1613715.1613763"},{"key":"e_1_3_2_1_19_1","volume-title":"Untangling Text Data Mining. In the 37th Annual Meeting of the Association for Computational Linguistics. 1--13","author":"Hearst Marti","year":"1999","unstructured":"Marti a Hearst and South Hall . 1999 . Untangling Text Data Mining. In the 37th Annual Meeting of the Association for Computational Linguistics. 1--13 . Marti a Hearst and South Hall. 1999. Untangling Text Data Mining. In the 37th Annual Meeting of the Association for Computational Linguistics. 1--13."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/599609.599631"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2014.2368273"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 24th ACM International on Conference on Information and Knowledge Management (CIKM 2015)","author":"Kenter Tom","year":"2015","unstructured":"Tom Kenter and Maarten de Rijke . 2015 . Short Text Similarity with Word Embeddings Categories and Subject Descriptors . Proceedings of the 24th ACM International on Conference on Information and Knowledge Management (CIKM 2015) (2015), 1411--1420. https:\/\/doi.org\/10.1145\/2806416.2806475 10.1145\/2806416.2806475 Tom Kenter and Maarten de Rijke. 2015. Short Text Similarity with Word Embeddings Categories and Subject Descriptors . Proceedings of the 24th ACM International on Conference on Information and Knowledge Management (CIKM 2015) (2015), 1411--1420. https:\/\/doi.org\/10.1145\/2806416.2806475"},{"key":"e_1_3_2_1_23_1","volume-title":"IEEE Computer Society Conference on Computer Vision and Pattern Recognition. 3336--3343","author":"Wang Chong","year":"2010","unstructured":"Li-jia Li, Chong Wang , Yongwhan Lim , David M Blei , and Li Fei-fei. 2010 . Building and Using a Semantivisual Image Hierarchy . In IEEE Computer Society Conference on Computer Vision and Pattern Recognition. 3336--3343 . Li-jia Li, Chong Wang, Yongwhan Lim, David M Blei, and Li Fei-fei. 2010. Building and Using a Semantivisual Image Hierarchy. In IEEE Computer Society Conference on Computer Vision and Pattern Recognition. 3336--3343."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/18.61115"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the Twenty-Ninth AAAI COnference on Artificial Intelligence. 3820--3826","author":"Luo Wenhan","year":"2015","unstructured":"Wenhan Luo , Bj\u00f6rn Stenger , Xiaowei Zhao , and Tae-Kyun Kim . 2015 . Automatic Topic Discovery for Multi-Object Tracking . In Proceedings of the Twenty-Ninth AAAI COnference on Artificial Intelligence. 3820--3826 . Wenhan Luo, Bj\u00f6rn Stenger, Xiaowei Zhao, and Tae-Kyun Kim. 2015. Automatic Topic Discovery for Multi-Object Tracking. In Proceedings of the Twenty-Ninth AAAI COnference on Artificial Intelligence. 3820--3826."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1093\/genetics\/155.2.945"},{"key":"e_1_3_2_1_27_1","first-page":"1","article-title":"Diversity: Its Measurement, Decomposition, Apportionment and Analysis . Sankhy=a : The Indian Journal of Statistics","volume":"44","author":"Rao C Radhakrishna","year":"1982","unstructured":"C Radhakrishna Rao . 1982 . Diversity: Its Measurement, Decomposition, Apportionment and Analysis . Sankhy=a : The Indian Journal of Statistics , Series A , Vol. 44 , 1 (1982), 1 -- 22 . C Radhakrishna Rao. 1982. Diversity: Its Measurement, Decomposition, Apportionment and Analysis . Sankhy=a : The Indian Journal of Statistics, Series A , Vol. 44, 1 (1982), 1--22.","journal-title":"Series A"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Vasile Rus Nobal Niraula and Rajendra Banjade. 2013. Similarity Measures Based on Latent Dirichlet Allocation . Computational Linguistics and Intelligent Text Processing. 459--470.  Vasile Rus Nobal Niraula and Rajendra Banjade. 2013. Similarity Measures Based on Latent Dirichlet Allocation . Computational Linguistics and Intelligent Text Processing. 459--470.","DOI":"10.1007\/978-3-642-37247-6_37"},{"key":"e_1_3_2_1_29_1","volume-title":"Latent Semantic Analysis: A Road to Meaning","author":"Steyvers M","year":"2006","unstructured":"M Steyvers , Grif Ths , and T. 2006. Probabilistic topic models . In Landauer, T., McNamara, D., Dennis, S., and Kintsch, W., editors, Latent Semantic Analysis: A Road to Meaning . Laurence Erlbaum . Tang, Z. and MacLennan, J ( 2006 ). M Steyvers, Grif Ths, and T. 2006. Probabilistic topic models . In Landauer, T., McNamara, D., Dennis, S., and Kintsch, W., editors, Latent Semantic Analysis: A Road to Meaning. Laurence Erlbaum. Tang, Z. and MacLennan, J (2006)."},{"key":"e_1_3_2_1_30_1","first-page":"1","article-title":"Measuring Similarity Similarly: LDA and Human Perception","volume":"7","author":"Towne W Ben","year":"2016","unstructured":"W Ben Towne , Carolyn P Ros\u00e9 , and James Herbsleb . 2016 . Measuring Similarity Similarly: LDA and Human Perception . ACM Transactions on Intelligent Systems and Technology ACM Reference Format ACM Trans. Intell. Syst. Technol , Vol. 7 , 2 (2016), 1 -- 25 . W Ben Towne, Carolyn P Ros\u00e9 , and James Herbsleb. 2016. Measuring Similarity Similarly: LDA and Human Perception . ACM Transactions on Intelligent Systems and Technology ACM Reference Format ACM Trans. Intell. Syst. Technol , Vol. 7, 2 (2016), 1--25.","journal-title":"Intell. Syst. Technol"}],"event":{"name":"K-CAP '19: Knowledge Capture Conference","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence"],"location":"Marina Del Rey CA USA","acronym":"K-CAP '19"},"container-title":["Proceedings of the 10th International Conference on Knowledge Capture"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3360901.3364444","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3360901.3364444","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:44:37Z","timestamp":1750203877000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3360901.3364444"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,9,23]]},"references-count":30,"alternative-id":["10.1145\/3360901.3364444","10.1145\/3360901"],"URL":"https:\/\/doi.org\/10.1145\/3360901.3364444","relation":{},"subject":[],"published":{"date-parts":[[2019,9,23]]},"assertion":[{"value":"2019-09-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}