{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:32:35Z","timestamp":1757626355393,"version":"3.44.0"},"publisher-location":"Cham","reference-count":47,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030865221"},{"type":"electronic","value":"9783030865238"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-86523-8_37","type":"book-chapter","created":{"date-parts":[[2021,9,10]],"date-time":"2021-09-10T06:05:16Z","timestamp":1631253916000},"page":"612-627","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["TaxoRef: Embeddings Evaluation for AI-driven Taxonomy Refinement"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0222-9365","authenticated-orcid":false,"given":"Lorenzo","family":"Malandri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6864-2702","authenticated-orcid":false,"given":"Fabio","family":"Mercorio","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0399-2810","authenticated-orcid":false,"given":"Mario","family":"Mezzanzanica","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9964-097X","authenticated-orcid":false,"given":"Navid","family":"Nobani","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,9,11]]},"reference":[{"key":"37_CR1","doi-asserted-by":"crossref","unstructured":"Agirre, E., Alfonseca, E., Hall, K., Kravalova, J., Pasca, M., Soroa, A.: A study on similarity and relatedness using distributional and wordnet-based approaches (2009)","DOI":"10.3115\/1620754.1620758"},{"issue":"7","key":"37_CR2","doi-asserted-by":"publisher","first-page":"eaao6030","DOI":"10.1126\/sciadv.aao6030","volume":"4","author":"A Alabdulkareem","year":"2018","unstructured":"Alabdulkareem, A., Frank, M.R., Sun, L., AlShebli, B., Hidalgo, C., Rahwan, I.: Unpacking the polarization of workplace skills. Sci. Adv. 4(7), eaao6030 (2018)","journal-title":"Sci. Adv."},{"key":"37_CR3","doi-asserted-by":"crossref","unstructured":"Aly, R., Acharya, S., Ossa, A., K\u00f6hn, A., Biemann, C., Panchenko, A.: Every child should have parents: a taxonomy refinement algorithm based on hyperbolic term embeddings. In: ACL, pp. 4811\u20134817 (2019)","DOI":"10.18653\/v1\/P19-1474"},{"key":"37_CR4","doi-asserted-by":"crossref","unstructured":"Baroni, M., Dinu, G., Kruszewski, G.: Don\u2019t count, predict! a systematic comparison of context-counting vs. context-predicting semantic vectors. In: ACL (2014)","DOI":"10.3115\/v1\/P14-1023"},{"key":"37_CR5","doi-asserted-by":"crossref","unstructured":"Bordea, G., Buitelaar, P., Faralli, S., Navigli, R.: Semeval-2015 task 17: taxonomy extraction evaluation (texeval). In: SemEval, p. 902\u2013910 (2015)","DOI":"10.18653\/v1\/S15-2151"},{"key":"37_CR6","doi-asserted-by":"crossref","unstructured":"Bordea, G., Lefever, E., Buitelaar, P.: Semeval-2016 task 13: taxonomy extraction evaluation (texeval-2). In: SemEval, pp. 1081\u20131091 (2016)","DOI":"10.18653\/v1\/S16-1168"},{"key":"37_CR7","doi-asserted-by":"crossref","unstructured":"Boselli, R., Cesarini, M., Mercorio, F., Mezzanzanica, M.: Using machine learning for labour market intelligence. In: ECML-PKDD, vol. 10536, pp. 330\u2013342 (2017)","DOI":"10.1007\/978-3-319-71273-4_27"},{"key":"37_CR8","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1016\/j.future.2018.03.035","volume":"86","author":"R Boselli","year":"2018","unstructured":"Boselli, R., Cesarini, M., Mercorio, F., Mezzanzanica, M.: Classifying online job advertisements through machine learning. Future Gener. Comput. Syst. 86, 319\u2013328 (2018)","journal-title":"Future Gener. Comput. Syst."},{"key":"37_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1613\/jair.4135","volume":"49","author":"E Bruni","year":"2014","unstructured":"Bruni, E., Tran, N.K., Baroni, M.: Multimodal distributional semantics. JAIR 49, 1\u201347 (2014)","journal-title":"JAIR"},{"key":"37_CR10","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1613\/jair.1.11259","volume":"63","author":"J Camacho-Collados","year":"2018","unstructured":"Camacho-Collados, J., Pilehvar, M.T.: From word to sense embeddings: a survey on vector representations of meaning. JAIR 63, 743\u2013788 (2018)","journal-title":"JAIR"},{"key":"37_CR11","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1613\/jair.953","volume":"16","author":"NV Chawla","year":"2002","unstructured":"Chawla, N.V., Bowyer, K.W., Hall, L.O., Kegelmeyer, W.P.: Smote: synthetic minority over-sampling technique. JAIR 16, 321\u2013357 (2002)","journal-title":"JAIR"},{"key":"37_CR12","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1016\/j.infoecopol.2019.05.003","volume":"47","author":"E Colombo","year":"2019","unstructured":"Colombo, E., Mercorio, F., Mezzanzanica, M.: Ai meets labor market: exploring the link between automation and skills. Inf. Econ. Policy 47, 27\u201337 (2019)","journal-title":"Inf. Econ. Policy"},{"key":"37_CR13","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint (2018)"},{"key":"37_CR14","doi-asserted-by":"crossref","unstructured":"Faruqui, M., Dodge, J., Jauhar, S.K., Dyer, C., Hovy, E., Smith, N.A.: Retrofitting word vectors to semantic lexicons. arXiv preprint arXiv:1411.4166 (2014)","DOI":"10.3115\/v1\/N15-1184"},{"key":"37_CR15","doi-asserted-by":"crossref","unstructured":"Frey, C.B., Osborne, M.A.: The future of employment: How susceptible are jobs to computerisation? technological forecasting and social change (2017)","DOI":"10.1016\/j.techfore.2016.08.019"},{"key":"37_CR16","doi-asserted-by":"crossref","unstructured":"Fu, R., Guo, J., Qin, B., Che, W., Wang, H., Liu, T.: Learning semantic hierarchies via word embeddings. In: ACL, pp. 1199\u20131209 (2014)","DOI":"10.3115\/v1\/P14-1113"},{"key":"37_CR17","unstructured":"Ghannay, S., Favre, B., Esteve, Y., Camelin, N.: Word embedding evaluation and combination. In: LREC (2016)"},{"key":"37_CR18","doi-asserted-by":"crossref","unstructured":"Giabelli, A., Malandri, L., Mercorio, F., Mezzanzanica, M., Seveso, A.: NEO: a tool for taxonomy enrichment with new emerging occupations. In: ISWC (2020)","DOI":"10.1007\/978-3-030-62466-8_35"},{"key":"37_CR19","doi-asserted-by":"crossref","unstructured":"Giabelli, A., Malandri, L., Mercorio, F., Mezzanzanica, M., Seveso, A.: NEO: a system for identifying new emerging occupation from job ads. In: AAAI (2021)","DOI":"10.1609\/aaai.v35i18.18004"},{"key":"37_CR20","doi-asserted-by":"publisher","unstructured":"Giabelli, A., Malandri, L., Mercorio, F., Mezzanzanica, M., Seveso, A.: Skills2job: a recommender system that encodes job offer embeddings on graph databases. Appl. Soft Comput. 101, 107049 (2021). https:\/\/doi.org\/10.1016\/j.asoc.2020.107049","DOI":"10.1016\/j.asoc.2020.107049"},{"key":"37_CR21","doi-asserted-by":"publisher","first-page":"675","DOI":"10.1613\/jair.1.11675","volume":"65","author":"T Jauhiainen","year":"2019","unstructured":"Jauhiainen, T., Lui, M., Zampieri, M., Baldwin, T., Lind\u00e9n, K.: Automatic language identification in texts: a survey. JAIR 65, 675\u2013782 (2019)","journal-title":"JAIR"},{"key":"37_CR22","unstructured":"Jiang, J.J., Conrath, D.W.: Semantic similarity based on corpus statistics and lexical taxonomy. arXiv preprint cmp-lg\/9709008 (1997)"},{"key":"37_CR23","doi-asserted-by":"crossref","unstructured":"Kiela, D., Hill, F., Clark, S.: Specializing word embeddings for similarity or relatedness. In: EMNLP, pp. 2044\u20132048 (2015)","DOI":"10.18653\/v1\/D15-1242"},{"key":"37_CR24","doi-asserted-by":"crossref","unstructured":"K\u00f6hn, A.: What\u2019s in an embedding? analyzing word embeddings through multilingual evaluation. In: EMNLP (2015)","DOI":"10.18653\/v1\/D15-1246"},{"key":"37_CR25","doi-asserted-by":"crossref","unstructured":"Lau, J.H., Baldwin, T.: An empirical evaluation of doc2vec with practical insights into document embedding generation. arXiv preprint arXiv:1607.05368 (2016)","DOI":"10.18653\/v1\/W16-1609"},{"issue":"2","key":"37_CR26","doi-asserted-by":"crossref","first-page":"265","DOI":"10.7551\/mitpress\/7287.003.0018","volume":"49","author":"C Leacock","year":"1998","unstructured":"Leacock, C., Chodorow, M.: Combining local context and wordnet similarity for word sense identification. WordNet Electron. Lexical Database 49(2), 265\u2013283 (1998)","journal-title":"WordNet Electron. Lexical Database"},{"key":"37_CR27","doi-asserted-by":"publisher","unstructured":"Malandri, L., Mercorio, F., Mezzanzanica, M., Nobani, N.: MEET-LM: a method for embeddings evaluation for taxonomic data in the labour market. Comput. Ind. 124 (2021). https:\/\/doi.org\/10.1016\/j.compind.2020.103341","DOI":"10.1016\/j.compind.2020.103341"},{"key":"37_CR28","doi-asserted-by":"crossref","unstructured":"Mann, H.B., Whitney, D.R.: On a test of whether one of two random variables is stochastically larger than the other. Ann. Math. Stat. (1947)","DOI":"10.1214\/aoms\/1177730491"},{"key":"37_CR29","unstructured":"Meng, Y., et al.: Spherical text embedding. In: NIPS (2019)"},{"issue":"2","key":"37_CR30","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1016\/j.ipm.2014.07.007","volume":"51","author":"M Mezzanzanica","year":"2015","unstructured":"Mezzanzanica, M., Boselli, R., Cesarini, M., Mercorio, F.: A model-based evaluation of data quality activities in KDD. Inf. Process. Manag. 51(2), 144\u2013166 (2015)","journal-title":"Inf. Process. Manag."},{"key":"37_CR31","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. In: NIPS (2013)"},{"key":"37_CR32","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1145\/219717.219748","volume":"38","author":"GA Miller","year":"1995","unstructured":"Miller, G.A.: Wordnet: a lexical database for English. Comm. ACM 38, 39\u201341 (1995)","journal-title":"Comm. ACM"},{"key":"37_CR33","doi-asserted-by":"crossref","unstructured":"Nguyen, K.A., K\u00f6per, M., Walde, S.S.i., Vu, N.T.: Hierarchical embeddings for hypernymy detection and directionality. arXiv preprint arXiv:1707.07273 (2017)","DOI":"10.18653\/v1\/D17-1022"},{"key":"37_CR34","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1016\/j.ympev.2016.12.006","volume":"107","author":"TD O\u2019Hara","year":"2017","unstructured":"O\u2019Hara, T.D., Hugall, A.F., Thuy, B., St\u00f6hr, S., Martynov, A.V.: Restructuring higher taxonomy using broad-scale phylogenomics: the living ophiuroidea. Molec. Phylogenet. Evol. 107, 415\u2013430 (2017)","journal-title":"Molec. Phylogenet. Evol."},{"key":"37_CR35","unstructured":"Pedersen, T., Patwardhan, S., Michelizzi, J., et al.: Wordnet: Similarity-measuring the relatedness of concepts. In: AAAI, vol. 4, pp. 25\u201329 (2004)"},{"key":"37_CR36","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.: Glove: global vectors for word representation. In: EMNLP, pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"37_CR37","unstructured":"Ponzetto, S.P., Navigli, R.: Large-scale taxonomy mapping for restructuring and integrating wikipedia. In: IJCAI (2009)"},{"key":"37_CR38","doi-asserted-by":"crossref","unstructured":"Press, O., Wolf, L.: Using the output embedding to improve language models. In: EACL, p. 157 (2017)","DOI":"10.18653\/v1\/E17-2025"},{"key":"37_CR39","doi-asserted-by":"crossref","unstructured":"Radinsky, K., Agichtein, E., Gabrilovich, E., Markovitch, S.: A word at a time: computing word relatedness using temporal semantic analysis. In: WWW (2011)","DOI":"10.1145\/1963405.1963455"},{"key":"37_CR40","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1613\/jair.514","volume":"11","author":"P Resnik","year":"1999","unstructured":"Resnik, P.: Semantic similarity in a taxonomy: an information-based measure and its application to problems of ambiguity in natural language. JAIR 11, 95\u2013130 (1999)","journal-title":"JAIR"},{"key":"37_CR41","doi-asserted-by":"crossref","unstructured":"Schnabel, T., Labutov, I., Mimno, D., Joachims, T.: Evaluation methods for unsupervised word embeddings. In: EMNLP (2015)","DOI":"10.18653\/v1\/D15-1036"},{"key":"37_CR42","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/978-3-030-34518-1_7","volume-title":"Artificial Intelligence and Natural Language","author":"M Vinel","year":"2019","unstructured":"Vinel, M., Ryazanov, I., Botov, D., Nikolaev, I.: Experimental comparison of unsupervised approaches in the task of separating specializations within professions in job vacancies. In: Ustalov, D., Filchenkov, A., Pivovarova, L. (eds.) AINL 2019. CCIS, vol. 1119, pp. 99\u2013112. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-34518-1_7"},{"key":"37_CR43","doi-asserted-by":"crossref","unstructured":"Wang, B., Wang, A., Chen, F., Wang, Y., Kuo, C.C.J.: Evaluating word embedding models: methods and experimental results. In: APSIPA TSIP (2019)","DOI":"10.1017\/ATSIP.2019.12"},{"key":"37_CR44","doi-asserted-by":"crossref","unstructured":"Wang, C., He, X., Zhou, A.: A short survey on taxonomy learning from text corpora: issues, resources and recent advances. In: EMNLP (2017)","DOI":"10.18653\/v1\/D17-1123"},{"issue":"4","key":"37_CR45","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1080\/07421222.1996.11518099","volume":"12","author":"RY Wang","year":"1996","unstructured":"Wang, R.Y., Strong, D.M.: Beyond accuracy: what data quality means to data consumers. J. Manag. Inf. Syst. 12(4), 5\u201333 (1996)","journal-title":"J. Manag. Inf. Syst."},{"key":"37_CR46","doi-asserted-by":"crossref","unstructured":"Wu, Z., Palmer, M.: Verbs semantics and lexical selection. In: ACL (1994)","DOI":"10.3115\/981732.981751"},{"key":"37_CR47","doi-asserted-by":"crossref","unstructured":"Zhang, D., Liu, J., Zhu, H., Liu, Y., Wang, L., Xiong, H.: Job2vec: job title benchmarking with collective multi-view representation learning. In: CIKM (2019)","DOI":"10.1145\/3357384.3357825"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-86523-8_37","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T22:04:00Z","timestamp":1757455440000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-86523-8_37"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030865221","9783030865238"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-86523-8_37","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"11 September 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bilbao","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Spain","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2021.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"869","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"210","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3-9","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held online due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}