{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T21:14:19Z","timestamp":1776114859372,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,18]],"date-time":"2024-03-18T00:00:00Z","timestamp":1710720000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Sciences and Engineering Research Council of Canada (NSERC)","award":["IRCPJ 545100 - 18"],"award-info":[{"award-number":["IRCPJ 545100 - 18"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,18]]},"DOI":"10.1145\/3640543.3645158","type":"proceedings-article","created":{"date-parts":[[2024,4,5]],"date-time":"2024-04-05T18:23:12Z","timestamp":1712341392000},"page":"51-65","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["SynthScribe: Deep Multimodal Tools for Synthesizer Sound Retrieval and Exploration"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-3182-1498","authenticated-orcid":false,"given":"Stephen","family":"Brade","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9016-038X","authenticated-orcid":false,"given":"Bryan","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1438-2882","authenticated-orcid":false,"given":"Mauricio","family":"Sousa","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6474-7509","authenticated-orcid":false,"given":"Gregory Lee","family":"Newsome","sequence":"additional","affiliation":[{"name":"Faculty of Music, University of Toronto, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8130-3569","authenticated-orcid":false,"given":"Sageev","family":"Oore","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Dalhousie University, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0494-5373","authenticated-orcid":false,"given":"Tovi","family":"Grossman","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Toronto, Canada"}]}],"member":"320","published-online":{"date-parts":[[2024,4,5]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"[n. d.]. Max. Software. Available at https:\/\/cycling74.com\/products\/max."},{"key":"e_1_3_2_2_2_1","volume-title":"Clip Retrieval: Easily compute clip embeddings and build a clip retrieval system with them. https:\/\/github.com\/rom1504\/clip-retrieval.","author":"Beaumont Romain","year":"2022","unstructured":"Romain Beaumont. 2022. Clip Retrieval: Easily compute clip embeddings and build a clip retrieval system with them. https:\/\/github.com\/rom1504\/clip-retrieval."},{"key":"e_1_3_2_2_3_1","volume-title":"Dawdreamer: Bridging the gap between digital audio workstations and python interfaces. arXiv preprint arXiv:2111.09931","author":"Braun David","year":"2021","unstructured":"David Braun. 2021. Dawdreamer: Bridging the gap between digital audio workstations and python interfaces. arXiv preprint arXiv:2111.09931 (2021)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.5555\/1921427.1921443"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1504\/JDR.2017.086749"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2501988.2502008"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2851581.2892414"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290605.3300599"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","unstructured":"Philippe Esling Naotake Masuda Adrien Bardet Romeo Despres and Axel Chemla-Romeu-Santos. 2019. Universal audio synthesizer control with normalizing flows. https:\/\/doi.org\/10.48550\/arXiv.1907.00971 arXiv:1907.00971 [cs eess stat].","DOI":"10.48550\/arXiv.1907.00971"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","unstructured":"Philippe Esling Naotake Masuda and Axel Chemla-Romeu-Santos. 2020. FlowSynth: Simplifying Complex Audio Generation Through Explorable Latent Spaces with Normalizing Flows Vol.\u00a05. 5273\u20135275. https:\/\/doi.org\/10.24963\/ijcai.2020\/767 ISSN: 1045-0823.","DOI":"10.24963\/ijcai.2020\/767"},{"key":"e_1_3_2_2_12_1","volume-title":"Flask web development: developing web applications with python. \" O\u2019Reilly Media","author":"Grinberg Miguel","unstructured":"Miguel Grinberg. 2018. Flask web development: developing web applications with python. \" O\u2019Reilly Media, Inc.\"."},{"key":"e_1_3_2_2_13_1","volume-title":"Development of NASA-TLX (Task Load Index): Results of empirical and theoretical research.[W]: PA Hancock","author":"Hart G","unstructured":"Sandra\u00a0G Hart and LE Staveland. 1988. Development of NASA-TLX (Task Load Index): Results of empirical and theoretical research.[W]: PA Hancock, N. Meshkati (Eds.): Human Mental Workload."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.2307\/3680541"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2557500.2557544"},{"key":"e_1_3_2_2_16_1","volume-title":"\u00a0W. Ellis","author":"Huang Qingqing","year":"2022","unstructured":"Qingqing Huang, Aren Jansen, Joonseok Lee, Ravi Ganti, Judith\u00a0Yue Li, and Daniel P.\u00a0W. Ellis. 2022. MuLan: A Joint Embedding of Music Audio and Natural Language. arxiv:2208.12415\u00a0[eess.AS]"},{"key":"e_1_3_2_2_17_1","volume-title":"International Conference on Learning Representations.","author":"Huang Sicong","year":"2018","unstructured":"Sicong Huang, Qiyang Li, Cem Anil, Xuchan Bao, Sageev Oore, and Roger\u00a0B Grosse. 2018. TimbreTron: A WaveNet (CycleGAN (CQT (Audio))) Pipeline for Musical Timbre Transfer. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_18_1","volume-title":"\u00a0T. Sturm","author":"Jonason Nicolas","year":"2022","unstructured":"Nicolas Jonason and Bob L.\u00a0T. Sturm. 2022. TimbreCLIP: Connecting Timbre to Text and Images. arxiv:2211.11225\u00a0[cs.SD]"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173735"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3526113.3545664"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2642918.2647386"},{"key":"e_1_3_2_2_22_1","volume-title":"Transient attributes for high-level understanding and editing of outdoor scenes. ACM Transactions on graphics (TOG) 33, 4","author":"Laffont Pierre-Yves","year":"2014","unstructured":"Pierre-Yves Laffont, Zhile Ren, Xiaofeng Tao, Chao Qian, and James Hays. 2014. Transient attributes for high-level understanding and editing of outdoor scenes. ACM Transactions on graphics (TOG) 33, 4 (2014), 1\u201311."},{"key":"e_1_3_2_2_23_1","volume-title":"International Workshop on Computer Music and Audio Technology. Citeseer, 205","author":"Lai Yuyo","year":"2006","unstructured":"Yuyo Lai, Shyh-Kang Jeng, Der-Tzung Liu, and Yo-Chung Liu. 2006. Automated optimization of parameters for FM sound synthesis with genetic algorithms. In International Workshop on Computer Music and Audio Technology. Citeseer, 205."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/18.61115"},{"key":"e_1_3_2_2_25_1","unstructured":"Ilaria Manco. 2023. Multimodal Machine Learning for Music (MML4Music). https:\/\/github.com\/ilaria-manco\/multimodal-ml-music original-date: 2022-12-29T15:19:48Z."},{"key":"e_1_3_2_2_26_1","unstructured":"Naotake Masuda and Daisuke Saito. 2021. Synthesizer Sound Matching with Differentiable DSP.. In ISMIR. 428\u2013434."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3173943"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","unstructured":"Kyosuke Nakanishi Paul Haimes Tetsuaki Baba and Kumiko Kushiyama. 2016. NAKANISYNTH: An Intuitive Freehand Drawing Waveform Synthesiser Application for iOS Devices. https:\/\/doi.org\/10.5281\/zenodo.1176086 Pages: 143-145 Publication Title: Proceedings of the International Conference on New Interfaces for Musical Expression Publisher: Zenodo.","DOI":"10.5281\/zenodo.1176086"},{"key":"e_1_3_2_2_29_1","unstructured":"Alec Radford Jong\u00a0Wook Kim Chris Hallacy Aditya Ramesh Gabriel Goh Sandhini Agarwal Girish Sastry Amanda Askell Pamela Mishkin Jack Clark Gretchen Krueger and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arxiv:2103.00020\u00a0[cs.CV]"},{"key":"e_1_3_2_2_30_1","unstructured":"Aditya Ramesh Prafulla Dhariwal Alex Nichol Casey Chu and Mark Chen. 2022. Hierarchical Text-Conditional Image Generation with CLIP Latents. arxiv:2204.06125\u00a0[cs.CV]"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-Resolution Image Synthesis with Latent Diffusion Models. arxiv:2112.10752\u00a0[cs.CV]","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_2_32_1","unstructured":"Kevin Schlei. 2012. TC-11: A Programmable Multi-Touch Synthesizer for the iPad.. In NIME."},{"key":"e_1_3_2_2_33_1","volume-title":"A Critical Analysis of Synthesizer User Interfaces for Timbre. Vol.\u00a02","author":"Seago Allan","unstructured":"Allan Seago, Simon Holland, and Paul Mulholland. 2004. A Critical Analysis of Synthesizer User Interfaces for Timbre. Vol.\u00a02. Research Press International, Bristol, UK, 105\u2013108. http:\/\/oro.open.ac.uk\/5688\/ Num Pages: 4."},{"key":"e_1_3_2_2_34_1","volume-title":"An intuitive control space for material appearance. arXiv preprint arXiv:1806.04950","author":"Serrano Ana","year":"2018","unstructured":"Ana Serrano, Diego Gutierrez, Karol Myszkowski, Hans-Peter Seidel, and Belen Masia. 2018. An intuitive control space for material appearance. arXiv preprint arXiv:1806.04950 (2018)."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3356590.3356598"},{"key":"e_1_3_2_2_36_1","volume-title":"Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation. In IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP.","author":"Yusong","year":"2023","unstructured":"Yusong Wu*, Ke Chen*, Tianyu Zhang*, Yuchen Hui*, Taylor Berg-Kirkpatrick, and Shlomo Dubnov. 2023. Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation. In IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766908"}],"event":{"name":"IUI '24: 29th International Conference on Intelligent User Interfaces","location":"Greenville SC USA","acronym":"IUI '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["Proceedings of the 29th International Conference on Intelligent User Interfaces"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640543.3645158","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640543.3645158","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T00:54:33Z","timestamp":1764550473000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640543.3645158"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,18]]},"references-count":37,"alternative-id":["10.1145\/3640543.3645158","10.1145\/3640543"],"URL":"https:\/\/doi.org\/10.1145\/3640543.3645158","relation":{},"subject":[],"published":{"date-parts":[[2024,3,18]]},"assertion":[{"value":"2024-04-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}