{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T11:21:18Z","timestamp":1782386478308,"version":"3.54.5"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icasspw62465.2024.10625960","type":"proceedings-article","created":{"date-parts":[[2024,8,15]],"date-time":"2024-08-15T17:19:18Z","timestamp":1723742358000},"page":"505-509","source":"Crossref","is-referenced-by-count":12,"title":["Benchmarking Representations for Speech, Music, and Acoustic Events"],"prefix":"10.1109","author":[{"given":"Moreno","family":"La Quatra","sequence":"first","affiliation":[{"name":"Kore University of Enna"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alkis","family":"Koudounas","sequence":"additional","affiliation":[{"name":"Politecnico di Torino"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lorenzo","family":"Vaiani","sequence":"additional","affiliation":[{"name":"Politecnico di Torino"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Elena","family":"Baralis","sequence":"additional","affiliation":[{"name":"Politecnico di Torino"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Luca","family":"Cagliero","sequence":"additional","affiliation":[{"name":"Politecnico di Torino"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Paolo","family":"Garza","sequence":"additional","affiliation":[{"name":"Politecnico di Torino"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sabato Marco","family":"Siniscalchi","sequence":"additional","affiliation":[{"name":"Universit&#x00E0; Degli Studi di Palermo"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"wav2vec 2.0: a framework for self-supervised learning of speech representations","volume-title":"NeurIPS","author":"Baevski"},{"key":"ref2","article-title":"Codified audio language modeling learns useful representations for music information retrieval","volume-title":"ISMIR","author":"Castellon"},{"key":"ref3","article-title":"The efficacy of self-supervised speech models for audio representations","volume-title":"PMLR","author":"Wu"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2605"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3195430"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3221007"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746790"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1775"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-556"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3202093"},{"key":"ref12","article-title":"Hear: Holistic evaluation of audio representations","volume-title":"NeurIPS Competitions and Demonstrations Track","author":"Turian"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806390"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2655045"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3133208"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1037\/emo0001048"},{"key":"ref17","article-title":"Fma: A dataset for music analysis","volume-title":"ISMIR","author":"Defferrard"},{"key":"ref18","article-title":"Evaluation of algorithms using games: The case of music tagging","volume-title":"ISMIR","author":"Law"},{"key":"ref19","article-title":"A comparison of sound segregation techniques for predominant instrument recognition in musical audio signals","volume-title":"ISMIR","author":"Bosch"},{"key":"ref20","article-title":"Deep convolutional networks on the pitch spiral for musical instrument recognition","volume-title":"ISMIR","author":"Lostanlen"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.1188976"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.jfranklin.2023.11.038"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.588"},{"key":"ref24","article-title":"EMOVO corpus: an Italian emotional speech database","author":"Costantini","journal-title":"LREC. 2014, European Language Resources Association (ELRA)"},{"key":"ref25","article-title":"Decoupled weight decay regularization","volume-title":"ICLR","author":"Loshchilov"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref27","article-title":"data2vec: A general framework for self-supervised learning in speech, vision and language","volume-title":"ICML","author":"Baevski"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-227"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21315"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052942"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1965"},{"key":"ref34","article-title":"Voxpopuli: A large-scale multilingual speech corpus for representation learning, semi-supervised learning and interpretation","volume-title":"ACL","author":"Changhan"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01011"}],"event":{"name":"2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","location":"Seoul, Korea, Republic of","start":{"date-parts":[[2024,4,14]]},"end":{"date-parts":[[2024,4,19]]}},"container-title":["2024 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10625769\/10625780\/10625960.pdf?arnumber=10625960","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,16]],"date-time":"2024-08-16T05:30:54Z","timestamp":1723786254000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10625960\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icasspw62465.2024.10625960","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}