{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:06:16Z","timestamp":1761897976573,"version":"3.40.3"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031564345"},{"type":"electronic","value":"9783031564352"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-56435-2_4","type":"book-chapter","created":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T06:51:26Z","timestamp":1710831086000},"page":"42-56","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["WikiMuTe: A Web-Sourced Dataset of\u00a0Semantic Descriptions for\u00a0Music Audio"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5760-6922","authenticated-orcid":false,"given":"Benno","family":"Weck","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-4655-729X","authenticated-orcid":false,"given":"Holger","family":"Kirchhoff","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5525-5233","authenticated-orcid":false,"given":"Peter","family":"Grosche","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1395-2345","authenticated-orcid":false,"given":"Xavier","family":"Serra","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,3,20]]},"reference":[{"key":"4_CR1","unstructured":"Agostinelli, A., Denk, T.I., Borsos, Z., Engel, J.H., et al.: MusicLM: generating music from Text. CoRR abs\/2301.11325 (2023)"},{"key":"4_CR2","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1016\/j.ipm.2015.03.004","volume":"52","author":"A Aljanaki","year":"2016","unstructured":"Aljanaki, A., Wiering, F., Veltkamp, R.C.: Studying emotion induced by music through a crowdsourcing game. Inf. Process. Manage. 52, 115\u2013128 (2016)","journal-title":"Inf. Process. Manage."},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Bertin-Mahieux, T., Eck, D., Mandel, M.: Automatic tagging of audio: the state-of-the-art. In: Machine Audition: Principles, Algorithms and Systems, pp. 334\u2013352. IGI Global (2011)","DOI":"10.4018\/978-1-61520-919-4.ch014"},{"key":"4_CR4","unstructured":"Bertin-Mahieux, T., Ellis, D., Whitman, B., Lamere, P.: The million song dataset. In: 12th International Society for Music Information Retrieval Conference, ISMIR 2011 (2011)"},{"key":"4_CR5","unstructured":"Bogdanov, D., Won, M., Tovstogan, P., Porter, A., Serra, X.: The MTG-Jamendo dataset for automatic music tagging. In: Machine Learning for Music Discovery Workshop, International Conference on Machine Learning (ICML 2019) (2019)"},{"key":"4_CR6","unstructured":"Defferrard, M., Benzi, K., Vandergheynst, P., Bresson, X.: FMA: a dataset for music analysis. In: 18th International Society for Music Information Retrieval Conference, ISMIR 2017. pp. 316\u2013323, Suzhou, China (2017)"},{"key":"4_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1. Minnesota (2019)"},{"key":"4_CR8","unstructured":"Doh, S., Choi, K., Lee, J., Nam, J.: LP-MusicCaps: LLM-based pseudo music captioning. In: 24th International Society for Music Information Retrieval Conference, ISMIR 2023. Milan, Italy (2023)"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Doh, S., Won, M., Choi, K., Nam, J.: Toward universal text-to-music retrieval. In: 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023)","DOI":"10.1109\/ICASSP49357.2023.10094670"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., Ellis, D., Freedman, D., Jansen, A., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: Proceedings of IEEE ICASSP 2017 (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"issue":"10s","key":"4_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3505245","volume":"54","author":"R Gruetzemacher","year":"2022","unstructured":"Gruetzemacher, R., Paradice, D.: Deep Transfer learning & beyond: transformer language models in information systems research. ACM Comput. Surv. 54(10s), 1\u201335 (2022)","journal-title":"ACM Comput. Surv."},{"key":"4_CR12","unstructured":"Huang, Q., Jansen, A., Lee, J., Ganti, R., et al.: Mulan: a joint embedding of music audio and natural language. In: 23rd International Society for Music Information Retrieval Conference (ISMIR), Bengaluru, India (2022)"},{"key":"4_CR13","unstructured":"Huang, Q., Park, D.S., Wang, T., Denk, T.I., et al.: Noise2Music: text-conditioned Music Generation with Diffusion Models. CoRR abs\/2302.03917 (2023)"},{"issue":"2","key":"4_CR14","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1080\/09298210802479284","volume":"37","author":"P Lamere","year":"2008","unstructured":"Lamere, P.: Social tagging and music information retrieval. J. New Music Res. 37(2), 101\u2013114 (2008). https:\/\/doi.org\/10.1080\/09298210802479284","journal-title":"J. New Music Res."},{"key":"4_CR15","unstructured":"Law, E., West, K., Mandel, M.I., Bay, M., Downie, J.S.: Evaluation of algorithms using games: the case of music tagging. In: 10th International Society for Music Information Retrieval Conference, ISMIR 2009, Japan (2009)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Manco, I., Benetos, E., Quinton, E., Fazekas, G.: Learning music audio representations via weak language supervision. In: 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, Singapore (2022)","DOI":"10.1109\/ICASSP43922.2022.9746996"},{"key":"4_CR17","unstructured":"Manco, I., Benetos, E., Quinton, E., Fazekas, G.: Contrastive audio-language learning for music. In: 23rd Internationall Society for Music Information Retrieval Conference (ISMIR), Bengaluru, India (2022)"},{"key":"4_CR18","unstructured":"Manco, I., Weck, B., Doh, S., Won, M., Zhang, Y., Bogdanov, D., et al.: The Song Describer Dataset: a Corpus of Audio Captions for Music-and-Language Evaluation. In: Machine Learning for Audio Workshop at NeurIPS 2023 (2023)"},{"key":"4_CR19","unstructured":"Manco, I., Weck, B., Tovstogan, P., Bogdanov, D.: Song Describer: a Platform for Collecting Textual Descriptions of Music Recordings. In: Late-Breaking Demo Session of the 23rd Int\u2019l Society for Music Information Retrieval Conf. India (2022)"},{"key":"4_CR20","doi-asserted-by":"crossref","unstructured":"McKee, D., Salamon, J., Sivic, J., Russell, B.: Language-Guided Music Recommendation for Video via Prompt Analogies. In: 2023 IEEE\/CVF Conf. on Computer Vision and Pattern Recognition (CVPR). Canada (Jun 2023)","DOI":"10.1109\/CVPR52729.2023.01420"},{"issue":"1","key":"4_CR21","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1109\/MSP.2018.2874383","volume":"36","author":"J Nam","year":"2019","unstructured":"Nam, J., Choi, K., Lee, J., Chou, S.Y., Yang, Y.H.: Deep Learning for Audio-Based Music Classification and Tagging: Teaching Computers to Distinguish Rock from Bach. IEEE Signal Process. Mag. 36(1), 41\u201351 (2019)","journal-title":"IEEE Signal Process. Mag."},{"key":"4_CR22","doi-asserted-by":"publisher","first-page":"4","DOI":"10.5334\/tismir.10","volume":"1","author":"S Oramas","year":"2018","unstructured":"Oramas, S., Barbieri, F., Nieto, O., Serra, X.: Multimodal Deep Learning for Music Genre Classification. Transactions of the International Society for Music Information Retrieval 1, 4\u201321 (2018). https:\/\/doi.org\/10.5334\/tismir.10","journal-title":"Transactions of the International Society for Music Information Retrieval"},{"key":"4_CR23","unstructured":"Qi, D., Su, L., Song, J., Cui, E., et al.: ImageBERT: Cross-modal Pre-training with Large-scale Weak-supervised Image-Text Data. CoRR abs\/2001.07966 (2020)"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: sentence embeddings using Siamese BERT-networks. In: 2019 Conference on Empirical Methods in Natural Language Processing (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"4_CR25","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: Distilbert, a distilled version of BERT: smaller, faster, cheaper and lighter. CoRR abs\/1910.01108 (2019)"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Schedl, M., Orio, N., Liem, C.C.S., Peeters, G.: A professionally annotated and enriched multimodal data set on popular music. In: 4th ACM Multimedia Systems Conference, pp. 78\u201383. ACM, Oslo Norway, February 2013","DOI":"10.1145\/2483977.2483985"},{"key":"4_CR27","unstructured":"Sohn, K.: improved deep metric learning with multi-class N-pair loss objective. In: Advances in Neural Information Processing Systems, vol. 29. Curran Associates, Inc. (2016)"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"Srinivasan, K., Raman, K., Chen, J., Bendersky, M., Najork, M.: WIT: Wikipedia-based image text dataset for multimodal multilingual machine learning. In: 44th International ACM SIGIR Conference on Research and Development in Information Retrieval. Virtual Event Canada (2021)","DOI":"10.1145\/3404835.3463257"},{"issue":"2","key":"4_CR29","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1080\/09298215.2014.894533","volume":"43","author":"BL Sturm","year":"2014","unstructured":"Sturm, B.L.: The state of the art ten years after a state of the art: future research in music information retrieval. J. New Music Res. 43(2), 147\u2013172 (2014)","journal-title":"J. New Music Res."},{"key":"4_CR30","unstructured":"Turnbull, D., Barrington, L., Lanckriet, G.: Five approaches to collecting tags for music. In: ISMIR 2008, 9th International Conference on Music Information Retrieval (2008)"},{"issue":"5","key":"4_CR31","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1109\/TSA.2002.800560","volume":"10","author":"G Tzanetakis","year":"2002","unstructured":"Tzanetakis, G., Cook, P.: Musical genre classification of audio signals. IEEE Trans. Speech Audio Process. 10(5), 293\u2013302 (2002)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"4_CR32","unstructured":"Wang, W., Wei, F., Dong, L., Bao, H., et al.: MiniLM: deep self-attention distillation for task-agnostic compression of pre-trained transformers. In: Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020. Virtual (2020)"},{"key":"4_CR33","unstructured":"Wikipedia contributors: Take On Me - Wikipedia, The Free Encyclopedia (Sep 2023). https:\/\/en.wikipedia.org\/w\/index.php?title=Take_On_Me &oldid=1173253296"},{"key":"4_CR34","unstructured":"Won, M., Choi, K., Serra, X.: Semi-supervised music tagging transformer. In: 22nd International Society for Music Information Retrieval Conference, ISMIR 2021 (2021)"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Won, M., Oramas, S., Nieto, O., Gouyon, F., Serra, X.: Multimodal metric learning for tag-based music retrieval. In: 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 591\u2013595, June 2021","DOI":"10.1109\/ICASSP39728.2021.9413514"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Wu, Y., Chen, K., Zhang, T., Hui, Y., et al.: Large-scale contrastive language-audio pretraining with feature fusion and keyword-to-caption augmentation. In: 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), June 2023","DOI":"10.1109\/ICASSP49357.2023.10095969"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-56435-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T07:03:27Z","timestamp":1710831807000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-56435-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031564345","9783031564352"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-56435-2_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"20 March 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 February 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"ConfTool Pro","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"297","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"120","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}