{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:52:41Z","timestamp":1740102761776,"version":"3.37.3"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icassp48485.2024.10448509","type":"proceedings-article","created":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T18:56:31Z","timestamp":1710788191000},"page":"7990-7994","source":"Crossref","is-referenced-by-count":0,"title":["Binauralmusic: A Diverse Dataset for Improving Cross-Modal Binaural Audio Generation"],"prefix":"10.1109","author":[{"given":"Yunqi","family":"Li","sequence":"first","affiliation":[{"name":"Communication University of China,School of Data Science and Intelligent Media,Beijing,China,100024"}]},{"given":"Shulin","family":"Liu","sequence":"additional","affiliation":[{"name":"Communication University of China,School of Information and Communication Engineering,Beijing,China,100024"}]},{"given":"Haonan","family":"Cheng","sequence":"additional","affiliation":[{"name":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China,100024"}]},{"given":"Long","family":"Ye","sequence":"additional","affiliation":[{"name":"Communication University of China,State Key Laboratory of Media Convergence and Communication,Beijing,China,100024"}]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/cvpr.2019.00041"},{"key":"ref2","first-page":"570","article-title":"The sound of pixels","volume-title":"Proceedings of the European Conference on Computer Vision","author":"Gan"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1109\/TMM.2018.2856090"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.5334\/tismir.146"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/TASL.2009.2038819"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1109\/ICASSP.2013.6637776"},{"key":"ref7","article-title":"Self-supervised generation of spatial audio for 360 video","volume":"31","author":"Langlois","year":"2018","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","first-page":"52","article-title":"Sep-stereo: Visually guided stereophonic audio generation by associating source separation","volume-title":"Proceedings of the European Conference on Computer Vision","author":"Xu"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1145\/3478513.3480560"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/iccv48922.2021.00194"},{"doi-asserted-by":"publisher","key":"ref11","DOI":"10.1109\/wacv51458.2022.00221"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.1109\/cvpr46437.2021.01523"},{"key":"ref13","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","volume":"28","author":"Girshick","year":"2015","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"3","key":"ref14","first-page":"18","article-title":"Openimages: A public dataset for large-scale multi-label and multi-class image classification","volume-title":"Dataset available from https:\/\/github.com\/openimages","volume":"2","author":"Alldrin","year":"2017"},{"doi-asserted-by":"publisher","key":"ref15","DOI":"10.5555\/3454287.3455008"},{"volume-title":"Mathematics of the discrete fourier transform (dft): with audio applications","year":"2008","author":"Smith","key":"ref16"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/CVPR46437.2021.00277"},{"key":"ref18","first-page":"2014","article-title":"Mir_eval: A transparent implementation of common mir metrics","volume":"10","author":"Humphrey","year":"2014","journal-title":"The International Society for Music Information Retrieval"}],"event":{"name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2024,4,14]]},"location":"Seoul, Korea, Republic of","end":{"date-parts":[[2024,4,19]]}},"container-title":["ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10445798\/10445803\/10448509.pdf?arnumber=10448509","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,3]],"date-time":"2024-08-03T04:55:02Z","timestamp":1722660902000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10448509\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10448509","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}