{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T00:49:30Z","timestamp":1772498970837,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,3,31]],"date-time":"2025-03-31T00:00:00Z","timestamp":1743379200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100010661","name":"Horizon 2020 Framework Programme","doi-asserted-by":"publisher","award":["101004703"],"award-info":[{"award-number":["101004703"]}],"id":[{"id":"10.13039\/100010661","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,3,31]]},"DOI":"10.1145\/3672608.3707718","type":"proceedings-article","created":{"date-parts":[[2025,5,14]],"date-time":"2025-05-14T18:30:17Z","timestamp":1747247417000},"page":"928-935","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["JobSet: Synthetic Job Advertisements Dataset for Labour Market Intelligence"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-4958-927X","authenticated-orcid":false,"given":"Samuele","family":"Colombo","sequence":"first","affiliation":[{"name":"University of Milano-Bicocca, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2820-0277","authenticated-orcid":false,"given":"Simone","family":"D'Amico","sequence":"additional","affiliation":[{"name":"Department of Economics, Management and Statistics, University of Milano-Bicocca, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0222-9365","authenticated-orcid":false,"given":"Lorenzo","family":"Malandri","sequence":"additional","affiliation":[{"name":"Department of Statistics and Quantitative Methods, University of Milano-Bicocca, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6864-2702","authenticated-orcid":false,"given":"Fabio","family":"Mercorio","sequence":"additional","affiliation":[{"name":"Department of Statistics and Quantitative Methods, University of Milano-Bicocca, Milan, Italy"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7132-7703","authenticated-orcid":false,"given":"Andrea","family":"Seveso","sequence":"additional","affiliation":[{"name":"Department of Statistics and Quantitative Methods, University of Milano-Bicocca, Milan, Italy"}]}],"member":"320","published-online":{"date-parts":[[2025,5,14]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"AI@Meta. 2024. Llama 3 Model Card. (2024). https:\/\/github.com\/meta-llama\/llama3\/blob\/main\/MODEL_CARD.md"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.513"},{"key":"e_1_3_2_1_3_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_2_1_4_1","unstructured":"CEDEFOP. 2014. Real-time Labour Market information on skill requirements: feasibility study and working prototype\". https:\/\/goo.gl\/qNjmrn."},{"key":"e_1_3_2_1_5_1","unstructured":"CEDEFOP. 2016. Real-time Labour Market information on Skill Requirements: Setting up the EU system for online vacancy analysis. https:\/\/goo.gl\/5FZS3E."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.34"},{"key":"e_1_3_2_1_7_1","volume-title":"Large language models as batteries-included zero-shot ESCO skills matchers. arXiv preprint arXiv:2307.03539","author":"Clavi\u00e9 Benjamin","year":"2023","unstructured":"Benjamin Clavi\u00e9 and Guillaume Souli\u00e9. 2023. Large language models as batteries-included zero-shot ESCO skills matchers. arXiv preprint arXiv:2307.03539 (2023)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infoecopol.2019.05.003"},{"key":"e_1_3_2_1_9_1","volume-title":"Johannes Deleu, Chris Develder, and Thomas Demeester.","author":"Decorte Jens-Joris","year":"2023","unstructured":"Jens-Joris Decorte, Severine Verlinden, Jeroen Van Hautte, Johannes Deleu, Chris Develder, and Thomas Demeester. 2023. Extreme multi-label skill extraction training using large language models. arXiv preprint arXiv:2307.10778 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","unstructured":"European Organization For Nuclear Research and OpenAIRE. 2013. Zenodo. 10.25495\/7GXK-RD71","DOI":"10.25495\/7GXK-RD71"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2023.107779"},{"key":"e_1_3_2_1_12_1","volume-title":"Proceedings, Part II 19","author":"Giabelli Anna","year":"2020","unstructured":"Anna Giabelli, Lorenzo Malandri, Fabio Mercorio, Mario Mezzanzanica, and Andrea Seveso. 2020. NEO: A tool for taxonomy enrichment with new emerging occupations. In The Semantic Web-ISWC 2020: 19th International Semantic Web Conference, Athens, Greece, November 2\u20136, 2020, Proceedings, Part II 19, a (Ed.). Springer, 568\u2013584."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.107049"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i08.7038"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_16_1","first-page":"34586","article-title":"Factuality enhanced language models for open-ended text generation","volume":"35","author":"Lee Nayeon","year":"2022","unstructured":"Nayeon Lee, Wei Ping, Peng Xu, Mostofa Patwary, Pascale N Fung, Mohammad Shoeybi, and Bryan Catanzaro. 2022. Factuality enhanced language models for open-ended text generation. Advances in Neural Information Processing Systems 35 (2022), 34586\u201334599.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_17_1","volume-title":"AnglE-optimized Text Embeddings. arXiv preprint arXiv:2309.12871","author":"Li Xianming","year":"2023","unstructured":"Xianming Li and Jing Li. 2023. AnglE-optimized Text Embeddings. arXiv preprint arXiv:2309.12871 (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"Towards general text embeddings with multi-stage contrastive learning. arXiv preprint arXiv:2308.03281","author":"Li Zehan","year":"2023","unstructured":"Zehan Li, Xin Zhang, Yanzhao Zhang, Dingkun Long, Pengjun Xie, and Meishan Zhang. 2023. Towards general text embeddings with multi-stage contrastive learning. arXiv preprint arXiv:2308.03281 (2023)."},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the First Workshop on Natural Language Processing for Human Resources (NLP4HR","author":"Magron Antoine","year":"2024","unstructured":"Antoine Magron, Anna Dai, Mike Zhang, Syrielle Montariol, and Antoine Bosselut. 2024. JobSkape: A Framework for Generating Synthetic Job Postings to Enhance Skill Matching. In Proceedings of the First Workshop on Natural Language Processing for Human Resources (NLP4HR 2024). 43\u201358."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43458-7_34"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"e_1_3_2_1_22_1","volume-title":"Synthetic data for deep learning","author":"Nikolenko Sergey I","unstructured":"Sergey I Nikolenko. 2021. Synthetic data for deep learning. Vol. 174. Springer."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3352100"},{"key":"e_1_3_2_1_24_1","volume-title":"Social Networks and Texts: 7th International Conference, AIST 2018","author":"Sayfullina Luiza","year":"2018","unstructured":"Luiza Sayfullina, Eric Malmi, and Juho Kannala. 2018. Learning representations for soft skill matching. In Analysis of Images, Social Networks and Texts: 7th International Conference, AIST 2018, Moscow, Russia, July 5\u20137, 2018, Revised Selected Papers 7. Springer, 141\u2013152."},{"key":"e_1_3_2_1_25_1","unstructured":"Darius Koenig Julius Lipp Sean Lee Aamir Shakir. 2024. Open Source Strikes Bread - New Fluffy Embeddings Model. https:\/\/www.mixedbread.ai\/blog\/mxbai-embed-large-v1"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/SDS.2019.000-3"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/IRI49571.2020.00063"},{"key":"e_1_3_2_1_28_1","volume-title":"Akhil Arora, Martin Josifoski, Ashton Anderson, and Robert West.","author":"Veselovsky Veniamin","year":"2023","unstructured":"Veniamin Veselovsky, Manoel Horta Ribeiro, Akhil Arora, Martin Josifoski, Ashton Anderson, and Robert West. 2023. Generating Faithful Synthetic Data with Large Language Models: A Case Study in Computational Social Science. arXiv preprint arXiv:2305.15041 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"C-Pack: Packaged Resources To Advance General Chinese Embedding. arXiv preprint arXiv:2309.07597","author":"Xiao Shitao","year":"2023","unstructured":"Shitao Xiao, Zheng Liu, Peitian Zhang, and Niklas Muennighof. 2023. C-Pack: Packaged Resources To Advance General Chinese Embedding. arXiv preprint arXiv:2309.07597 (2023)."},{"key":"e_1_3_2_1_30_1","unstructured":"Shitao Xiao Zheng Liu Peitian Zhang and Niklas Muennighoff. 2023. C-Pack: Packaged Resources To Advance General Chinese Embedding. arXiv:2309.07597 [cs.CL]"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3512467"},{"key":"e_1_3_2_1_32_1","volume-title":"Large language model as attributed training data generator: A tale of diversity and bias. Advances in Neural Information Processing Systems 36","author":"Yu Yue","year":"2024","unstructured":"Yue Yu, Yuchen Zhuang, Jieyu Zhang, Yu Meng, Alexander J Ratner, Ranjay Krishna, Jiaming Shen, and Chao Zhang. 2024. Large language model as attributed training data generator: A tale of diversity and bias. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.366"},{"key":"e_1_3_2_1_34_1","volume-title":"Rob van der Goot, and Barbara Plank.","author":"Zhang Mike","year":"2022","unstructured":"Mike Zhang, Kristian N\u00f8rgaard Jensen, Rob van der Goot, and Barbara Plank. 2022. Skill extraction from job postings using weak supervision. arXiv preprint arXiv:2209.08071 (2022)."},{"key":"e_1_3_2_1_35_1","volume-title":"ESCOXLM-R: Multilingual Taxonomy-driven Pre-training for the Job Market Domain. arXiv preprint arXiv:2305.12092","author":"Zhang Mike","year":"2023","unstructured":"Mike Zhang, Rob van der Goot, and Barbara Plank. 2023. ESCOXLM-R: Multilingual Taxonomy-driven Pre-training for the Job Market Domain. arXiv preprint arXiv:2305.12092 (2023)."}],"event":{"name":"SAC '25: 40th ACM\/SIGAPP Symposium on Applied Computing","location":"Catania International Airport Catania Italy","acronym":"SAC '25","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 40th ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707718","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3672608.3707718","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:06:14Z","timestamp":1750291574000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3672608.3707718"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,31]]},"references-count":35,"alternative-id":["10.1145\/3672608.3707718","10.1145\/3672608"],"URL":"https:\/\/doi.org\/10.1145\/3672608.3707718","relation":{},"subject":[],"published":{"date-parts":[[2025,3,31]]},"assertion":[{"value":"2025-05-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}