{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:08:10Z","timestamp":1777655290468,"version":"3.51.4"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,14]],"date-time":"2024-04-14T00:00:00Z","timestamp":1713052800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,4,14]]},"DOI":"10.1109\/icassp48485.2024.10447063","type":"proceedings-article","created":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T18:56:31Z","timestamp":1710788191000},"page":"936-940","source":"Crossref","is-referenced-by-count":18,"title":["Syncfusion: Multimodal Onset-Synchronized Video-to-Audio Foley Synthesis"],"prefix":"10.1109","author":[{"given":"Marco","family":"Comunit\u00e0","sequence":"first","affiliation":[{"name":"Queen Mary University of London,Centre for Digital Music,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Riccardo F.","family":"Gramaccioni","sequence":"additional","affiliation":[{"name":"Sapienza University of Rome,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Emilian","family":"Postolache","sequence":"additional","affiliation":[{"name":"Sapienza University of Rome,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Emanuele","family":"Rodol\u00e0","sequence":"additional","affiliation":[{"name":"Sapienza University of Rome,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Danilo","family":"Comminiello","sequence":"additional","affiliation":[{"name":"Sapienza University of Rome,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Joshua D.","family":"Reiss","sequence":"additional","affiliation":[{"name":"Queen Mary University of London,Centre for Digital Music,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Sparse in space and time: Audio-visual synchronisation with trainable selectors","volume-title":"British Machine Vision Conference","author":"Iashin"},{"key":"ref2","article-title":"Audio-visual synchronisation in the wild","volume":"abs\/2112.04432","author":"Chen","year":"2021"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10861"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3359998.3369405"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01385"},{"key":"ref6","article-title":"Poserac: Pose saliency transformer for repetitive action counting","volume":"abs\/2303.08450","author":"Yao","year":"2023"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01040"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/tmm.2022.3177894"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00940"},{"key":"ref10","article-title":"Diff-foley: Synchronized video-to-audio synthesis with latent diffusion models","volume":"abs\/2306.17203","author":"Luo","year":"2023"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i14.29475"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00240"},{"key":"ref13","article-title":"Multi-source diffusion models for simultaneous music generation and separation","volume":"abs\/2302.02257","author":"Mariani","year":"2023"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49357.2023.10096760"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9534474"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413528"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095889"},{"key":"ref18","article-title":"Score-based generative modeling through stochastic differential equations","volume-title":"International Conference on Learning Representations","author":"Song"},{"key":"ref19","article-title":"Progressive distillation for fast sampling of diffusion models","volume-title":"International Conference on Learning Representations","author":"Salimans"},{"key":"ref20","article-title":"Denoising diffusion implicit models","volume-title":"International Conference on Learning Representations","author":"Song"},{"key":"ref21","article-title":"Mo\\\u02c6usai: Text-to-music generation with long-context latent diffusion","author":"Schneider","year":"2023"},{"key":"ref22","article-title":"Classifier-free diffusion guidance","volume-title":"NeurIPS 2021 Workshop on Deep Generative Models and Downstream Applications","author":"Ho"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.264"},{"key":"ref24","article-title":"Fr\\\u2019echet audio distance: A metric for evaluating music enhancement algorithms","author":"Kilgour","year":"2018"},{"key":"ref25","article-title":"The effectiveness of data augmentation in image classification using deep learning","volume":"abs\/1712.04621","author":"Perez","year":"2017"},{"key":"ref26","article-title":"Taming visually guided sound generation","volume-title":"British Machine Vision Conference (BMVC)","author":"Iashin"},{"key":"ref27","article-title":"Melgan: Generative adversarial networks for conditional waveform synthesis","volume-title":"Neural Information Processing Systems","author":"Kumar"}],"event":{"name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Seoul, Korea, Republic of","start":{"date-parts":[[2024,4,14]]},"end":{"date-parts":[[2024,4,19]]}},"container-title":["ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10445798\/10445803\/10447063.pdf?arnumber=10447063","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,2]],"date-time":"2024-08-02T04:39:57Z","timestamp":1722573597000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10447063\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,14]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10447063","relation":{},"subject":[],"published":{"date-parts":[[2024,4,14]]}}}