{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T16:49:11Z","timestamp":1765039751626,"version":"3.28.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,9,30]]},"DOI":"10.1109\/is262782.2024.10704165","type":"proceedings-article","created":{"date-parts":[[2024,10,7]],"date-time":"2024-10-07T17:42:20Z","timestamp":1728322940000},"page":"1-8","source":"Crossref","is-referenced-by-count":1,"title":["Large-Scale Room Impulse Response Dataset Compression with Neural Audio Codecs"],"prefix":"10.1109","author":[{"given":"Alessandro Ilic","family":"Mezza","sequence":"first","affiliation":[{"name":"DEIB, Politecnico di Milano,Milan,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alberto","family":"Bernardini","sequence":"additional","affiliation":[{"name":"DEIB, Politecnico di Milano,Milan,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fabio","family":"Antonacci","sequence":"additional","affiliation":[{"name":"DEIB, Politecnico di Milano,Milan,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"journal-title":"ISO 3382\u20131:2009, International Organization for Standardization, Geneva, Switzerland","article-title":"Acoustics - measurement of room acoustic parameters. Part 1: performance spaces","year":"2009","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2998299"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1002\/9781119279860"},{"volume-title":"Speech enhancement","year":"2006","author":"Benesty","key":"ref4"},{"key":"ref5","first-page":"861","article-title":"Auralization-an overview","volume":"41","author":"Kleiner","year":"1993","journal-title":"Journal of the Audio Engineering Society"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3389\/frsip.2022.904866"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SCVT.2011.6101302"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IEEECONF59510.2023.10335480"},{"journal-title":"High fidelity neural audio compression","year":"2022","author":"D\u00e9fossez","key":"ref9"},{"issue":"8942","key":"ref10","article-title":"High-quality, low-delay music coding in the Opus codec","author":"Valin","year":"2013","journal-title":"Journal of the Audio Engineering Society"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7179063"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461487"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3129994"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747419"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746296"},{"journal-title":"HiFi-Codec: Group-residual vector quantization for high fidelity audio codec","year":"2023","author":"Yang","key":"ref16"},{"journal-title":"SpeechTokenizer: Unified speech tokenizer for speech large language models","year":"2023","author":"Zhang","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.314"},{"key":"ref19","article-title":"High-fidelity audio compression with improved RVQGAN","volume":"36","author":"Kumar","year":"2024","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10445966"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3417347"},{"journal-title":"SemantiCodec: An ultra low bitrate semantic audio codec for general sound","year":"2024","author":"Liu","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10448454"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA58266.2023.10248189"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2023.3306619"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1612524113"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSPW62465.2024.10626753"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICI2ST57350.2022.00017"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.61782\/fa.2023.1177"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2189567"},{"issue":"3\u20131","key":"ref31","article-title":"Scattering delay network: An interactive reverberator for computer games","author":"De Sena","year":"2011","journal-title":"Journal of the Audio Engineering society"},{"journal-title":"Data-driven room acoustic modeling via differentiable feedback delay networks with learnable delay lines","year":"2024","author":"Mezza","key":"ref32"},{"article-title":"Modeling the frequency-dependent sound energy decay of acoustic environments with differentiable feedback delay networks","volume-title":"Proceedings of the 27th International Conference on Digital Audio Effects, 2024","author":"Mezza","key":"ref33"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.2514\/3.20031"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611974508"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1981.1056282"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3202128"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3240650"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1121\/1.1939454"},{"key":"ref40","article-title":"CSTR VCTK Corpus: English multi-speaker corpus for CSTR voice cloning toolkit (version 0.92)","author":"Yamagishi","year":"2019","journal-title":"University of Edinburgh. The Centre for Speech Technology Research (CSTR)"},{"key":"ref41","article-title":"Anechoic audio and 3D-video content database of small ensemble performances for virtual concerts","author":"Thery","year":"2019","journal-title":"International Congress on Acoustics (ICA)"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.5334\/jors.187"},{"journal-title":"Rec. ITU-R BS.1534\u20133, International Telecommunications Union, Geneva, Switzerland","article-title":"Method for the subjective assessment of intermediate quality level of audio systems","year":"2021","key":"ref43"},{"journal-title":"Rec. ITU-R BS.1770\u20134, International Telecommunications Union, Geneva, Switzerland","article-title":"Algorithms to measure audio programme loudness and true-peak audio level","year":"2023","key":"ref44"},{"issue":"10483","key":"ref45","article-title":"pyloudnorm: A simple yet flexible loudness meter in Python","author":"Steinmetz","year":"2021","journal-title":"Journal of the Audio Engineering Society"}],"event":{"name":"2024 IEEE 5th International Symposium on the Internet of Sounds (IS2)","start":{"date-parts":[[2024,9,30]]},"location":"Erlangen, Germany","end":{"date-parts":[[2024,10,2]]}},"container-title":["2024 IEEE 5th International Symposium on the Internet of Sounds (IS2)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10704037\/10704076\/10704165.pdf?arnumber=10704165","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T04:59:28Z","timestamp":1728363568000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10704165\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/is262782.2024.10704165","relation":{},"subject":[],"published":{"date-parts":[[2024,9,30]]}}}