{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,9]],"date-time":"2025-10-09T20:58:08Z","timestamp":1760043488123,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":92,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100000001","name":"NSF GRFP","doi-asserted-by":"publisher","award":["1745302"],"award-info":[{"award-number":["1745302"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3680528.3687679","type":"proceedings-article","created":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T08:14:37Z","timestamp":1733213677000},"page":"1-11","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Sketching With Your Voice: \"Non-Phonorealistic\" Rendering of Sounds via Vocal Imitation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-5061-4599","authenticated-orcid":false,"given":"Matthew","family":"Caren","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, Massachusetts, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1835-3707","authenticated-orcid":false,"given":"Kartik","family":"Chandra","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, Massachusetts, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1925-2035","authenticated-orcid":false,"given":"Joshua","family":"Tenenbaum","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, Massachusetts, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6243-9543","authenticated-orcid":false,"given":"Jonathan","family":"Ragan-Kelley","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, Massachusetts, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4180-6433","authenticated-orcid":false,"given":"Karima","family":"Ma","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology, Cambridge, Massachusetts, United States of America"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,12,3]]},"reference":[{"key":"e_1_3_3_2_2_1","unstructured":"Sam Acquaviva Yewen Pu Marta Kryven Theodoros Sechopoulos Catherine Wong Gabrielle Ecanow Maxwell Nye Michael Tessler and Josh Tenenbaum. 2022. Communicating natural programs to humans and machines. Advances in Neural Information Processing Systems 35 (2022) 3731\u20133743."},{"key":"e_1_3_3_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/383259.383286"},{"key":"e_1_3_3_2_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1125"},{"key":"e_1_3_3_2_5_1","doi-asserted-by":"crossref","unstructured":"Pierre Badin. 1991. Fricative consonants: acoustic and X-ray measurements. Journal of phonetics 19 3-4 (1991) 397\u2013408.","DOI":"10.1016\/S0095-4470(19)30331-6"},{"key":"e_1_3_3_2_6_1","doi-asserted-by":"crossref","unstructured":"Pierre B\u00e9nard and Aaron Hertzmann. 2019. Line drawings from 3D models: A tutorial. Foundations and Trends\u00ae in Computer Graphics and Vision 11 1-2 (2019) 1\u2013159.","DOI":"10.1561\/0600000075"},{"key":"e_1_3_3_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2006.1660160"},{"key":"e_1_3_3_2_8_1","volume-title":"Audio Engineering Society Conference: 53rd International Conference: Semantic Audio","author":"Blancas David\u00a0Sanchez","year":"2014","unstructured":"David\u00a0Sanchez Blancas and Jordi Janer. 2014. Sound retrieval from voice imitation queries in collaborative databases. In Audio Engineering Society Conference: 53rd International Conference: Semantic Audio. Audio Engineering Society."},{"key":"e_1_3_3_2_9_1","first-page":"349","volume-title":"Symposium on Computer Animation","author":"Cardle Marc","year":"2003","unstructured":"Marc Cardle, Stephen Brooks, Ziv Bar-Joseph, and Peter Robinson. 2003. Sound-by-numbers: motion-driven sound synthesis.. In Symposium on Computer Animation. 349\u2013356."},{"key":"e_1_3_3_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/2702123.2702387"},{"key":"e_1_3_3_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00776"},{"key":"e_1_3_3_2_12_1","volume-title":"Proceedings of the Annual Meeting of the Cognitive Science Society","volume":"46","author":"Chandra Kartik","year":"2024","unstructured":"Kartik Chandra, Anne\u00a0HK Harrington, Katherine\u00a0M Collins, Christopher Kymn, Kushin Mukherjee, Sean\u00a0P Anderson, Arnav Verma, and Judith\u00a0E Fan. 2024. COGGRAPH: Building bridges between cognitive science and computer graphics. In Proceedings of the Annual Meeting of the Cognitive Science Society , Vol.\u00a046."},{"key":"e_1_3_3_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3528233.3530715"},{"key":"e_1_3_3_2_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591510"},{"key":"e_1_3_3_2_15_1","doi-asserted-by":"crossref","unstructured":"Tao Chen Ming-Ming Cheng Ping Tan Ariel Shamir and Shi-Min Hu. 2009. Sketch2photo: Internet image montage. ACM transactions on graphics (TOG) 28 5 (2009) 1\u201310.","DOI":"10.1145\/1618452.1618470"},{"key":"e_1_3_3_2_16_1","volume-title":"Proceedings of the Annual Meeting of the Cognitive Science Society","volume":"46","author":"Chen Tony","year":"2024","unstructured":"Tony Chen, Sean\u00a0Dae Houlihan, Kartik Chandra, Josh Tenenbaum, and Rebecca Saxe. 2024. Intervening on Emotions by Planning Over a Theory of Mind. In Proceedings of the Annual Meeting of the Cognitive Science Society , Vol.\u00a046."},{"key":"e_1_3_3_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00981"},{"key":"e_1_3_3_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/1399504.1360687"},{"key":"e_1_3_3_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/1576246.1531334"},{"key":"e_1_3_3_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/1185657.1185776"},{"key":"e_1_3_3_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1201775.882339"},{"key":"e_1_3_3_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/508530.508550"},{"key":"e_1_3_3_2_23_1","doi-asserted-by":"crossref","unstructured":"Mathias Eitz Kristian Hildebrand Tamy Boubekeur and Marc Alexa. 2010. Sketch-based image retrieval: Benchmark and bag-of-features descriptors. IEEE transactions on visualization and computer graphics 17 11 (2010) 1624\u20131636.","DOI":"10.1109\/TVCG.2010.266"},{"key":"e_1_3_3_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/1858171.1858195"},{"key":"e_1_3_3_2_25_1","doi-asserted-by":"crossref","unstructured":"Judith\u00a0E Fan Wilma\u00a0A Bainbridge Rebecca Chamberlain and Jeffrey\u00a0D Wammes. 2023. Drawing as a versatile cognitive tool. Nature Reviews Psychology 2 9 (2023) 556\u2013568.","DOI":"10.1038\/s44159-023-00212-w"},{"key":"e_1_3_3_2_26_1","unstructured":"Gunnar Fant. 1981. The source filter concept in voice production. STL-QPSR 1 1981 (1981) 21\u201337."},{"key":"e_1_3_3_2_27_1","doi-asserted-by":"crossref","unstructured":"Ila\u00a0R Fiete Michale\u00a0S Fee and H\u00a0Sebastian Seung. 2007. Model of birdsong learning based on gradient estimation by dynamic perturbation of neural conductances. Journal of neurophysiology 98 4 (2007) 2038\u20132057.","DOI":"10.1152\/jn.01311.2006"},{"key":"e_1_3_3_2_28_1","doi-asserted-by":"crossref","unstructured":"Jim Foley. 2000. Getting there: The ten top problems left. IEEE Computer Graphics and Applications 20 01 (2000) 66\u201368.","DOI":"10.1109\/38.814569"},{"key":"e_1_3_3_2_29_1","doi-asserted-by":"crossref","unstructured":"Eduardo Fonseca Xavier Favory Jordi Pons Frederic Font and Xavier Serra. 2021. FSD50K: an open dataset of human-labeled sound events. IEEE\/ACM Transactions on Audio Speech and Language Processing 30 (2021) 829\u2013852.","DOI":"10.1109\/TASLP.2021.3133208"},{"key":"e_1_3_3_2_30_1","unstructured":"Christian Frank. 2020. The machine learning behind hum to search. https:\/\/blog.research.google\/2020\/11\/the-machine-learning-behind-hum-to.html"},{"key":"e_1_3_3_2_31_1","doi-asserted-by":"publisher","unstructured":"Michael\u00a0C. Frank and Noah\u00a0D. Goodman. 2012. Predicting Pragmatic Reasoning in Language Games. Science 336 6084 (2012) 998\u2013998. 10.1126\/science.1218633","DOI":"10.1126\/science.1218633"},{"key":"e_1_3_3_2_32_1","unstructured":"Leon Gatys Alexander\u00a0S Ecker and Matthias Bethge. 2015. Texture synthesis using convolutional neural networks. Advances in neural information processing systems 28 (2015)."},{"key":"e_1_3_3_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"e_1_3_3_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/217279.215273"},{"key":"e_1_3_3_2_35_1","unstructured":"Bernard Gold. 1990. A History of Vocoder Research at Lincoln Laboratory. https:\/\/api.semanticscholar.org\/CorpusID:13577927"},{"key":"e_1_3_3_2_36_1","doi-asserted-by":"crossref","unstructured":"Noah\u00a0D Goodman and Michael\u00a0C Frank. 2016. Pragmatic language interpretation as probabilistic inference. Trends in cognitive sciences 20 11 (2016) 818\u2013829.","DOI":"10.1016\/j.tics.2016.08.005"},{"key":"e_1_3_3_2_37_1","doi-asserted-by":"crossref","unstructured":"Noah\u00a0D Goodman and Daniel Lassiter. 2015. Probabilistic semantics and pragmatics uncertainty in language and thought. The handbook of contemporary semantic theory (2015) 655\u2013686.","DOI":"10.1002\/9781118882139.ch21"},{"key":"e_1_3_3_2_38_1","doi-asserted-by":"publisher","DOI":"10.1163\/9789004368811_003"},{"key":"e_1_3_3_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/218380.218446"},{"key":"e_1_3_3_2_40_1","doi-asserted-by":"crossref","unstructured":"Aaron Hertzmann. 2024. New Insights in Smooth Occluding Contours for Nonphotorealistic Rendering. IEEE Computer Graphics and Applications 44 1 (2024) 76\u201385.","DOI":"10.1109\/MCG.2023.3338784"},{"key":"e_1_3_3_2_41_1","unstructured":"Matthias Hofer Tuan\u00a0Anh Le Roger Levy and Josh Tenenbaum. 2021. Learning evolved combinatorial symbols with a neuro-symbolic generative model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2104.08274 (2021)."},{"key":"e_1_3_3_2_42_1","doi-asserted-by":"crossref","unstructured":"Matthias Hofer and Roger\u00a0Philip Levy. 2019. Iconicity and structure in the emergence of combinatoriality. (2019).","DOI":"10.31234\/osf.io\/vsjkt"},{"key":"e_1_3_3_2_43_1","doi-asserted-by":"crossref","unstructured":"Rui Hu and John Collomosse. 2013. A performance evaluation of gradient field hog descriptor for sketch based image retrieval. Computer Vision and Image Understanding 117 7 (2013) 790\u2013806.","DOI":"10.1016\/j.cviu.2013.02.005"},{"key":"e_1_3_3_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1996.541110"},{"key":"e_1_3_3_2_45_1","doi-asserted-by":"crossref","unstructured":"Richard Hunt. 1923. The phonetics of bird-sound. The Condor 25 6 (1923) 202\u2013208.","DOI":"10.2307\/1362681"},{"key":"e_1_3_3_2_46_1","volume-title":"Proceedings of the Sixth Sound and Music Computing Conference (SMC 2010)","author":"Huq Arefin","year":"2010","unstructured":"Arefin Huq, Mark Cartwright, and Bryan Pardo. 2010. Crowdsourcing a real-world on-line query by humming system. In Proceedings of the Sixth Sound and Music Computing Conference (SMC 2010)."},{"key":"e_1_3_3_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/1185657.1185772"},{"key":"e_1_3_3_2_48_1","doi-asserted-by":"crossref","unstructured":"Doug\u00a0L James Jernej Barbi\u010d and Dinesh\u00a0K Pai. 2006. Precomputed acoustic transfer: output-sensitive accurate sound generation for geometrically complex vibration sources. ACM Transactions on Graphics (TOG) 25 3 (2006) 987\u2013995.","DOI":"10.1145\/1141911.1141983"},{"key":"e_1_3_3_2_49_1","volume-title":"Acoustic and Auditory Phonetics","author":"Johnson Keith","year":"2011","unstructured":"Keith Johnson. 2011. Acoustic and Auditory Phonetics. John Wiley & Sons."},{"key":"e_1_3_3_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/1179352.1141928"},{"key":"e_1_3_3_2_51_1","first-page":"148","volume-title":"DCASE","author":"Kim Bongjun","year":"2018","unstructured":"Bongjun Kim, Madhav Ghei, Bryan Pardo, and Zhiyao Duan. 2018. Vocal Imitation Set: a dataset of vocally imitated sound events using the AudioSet ontology. In DCASE. 148\u2013152."},{"key":"e_1_3_3_2_52_1","volume-title":"International Conference on Learning Representations","author":"Kong Zhifeng","year":"2020","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2020. DiffWave: A Versatile Diffusion Model for Audio Synthesis. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2413097.2413104"},{"key":"e_1_3_3_2_54_1","doi-asserted-by":"crossref","unstructured":"Patricia\u00a0K Kuhl and Andrew\u00a0N Meltzoff. 1996. Infant vocalizations in response to speech: Vocal imitation and developmental change. The journal of the Acoustical Society of America 100 4 (1996) 2425\u20132438.","DOI":"10.1121\/1.417951"},{"key":"e_1_3_3_2_55_1","volume-title":"Thirty-seventh Conference on Neural Information Processing Systems","author":"Le Matthew","year":"2023","unstructured":"Matthew Le, Apoorv Vyas, Bowen Shi, Brian Karrer, Leda Sari, Rashel Moritz, Mary Williamson, Vimal Manohar, Yossi Adi, Jay Mahadeokar, and Wei-Ning Hsu. 2023. Voicebox: Text-Guided Multilingual Universal Speech Generation at Scale. In Thirty-seventh Conference on Neural Information Processing Systems. https:\/\/openreview.net\/forum?id=gzCS252hCO"},{"key":"e_1_3_3_2_56_1","doi-asserted-by":"crossref","unstructured":"Guillaume Lemaitre Olivier Houix Fr\u00e9d\u00e9ric Voisin Nicolas Misdariis and Patrick Susini. 2016. Vocal imitations of non-vocal sounds. PloS one 11 12 (2016) e0168167.","DOI":"10.1371\/journal.pone.0168167"},{"key":"e_1_3_3_2_57_1","doi-asserted-by":"crossref","unstructured":"Guillaume Lemaitre and Davide Rocchesso. 2014. On the effectiveness of vocal imitations and verbal descriptions of sounds. The journal of the Acoustical Society of America 135 2 (2014) 862\u2013873.","DOI":"10.1121\/1.4861245"},{"key":"e_1_3_3_2_58_1","doi-asserted-by":"crossref","unstructured":"Shiguang Liu Haonan Cheng and Yiying Tong. 2019. Physically-based statistical simulation of rain sound. ACM Transactions on Graphics (TOG) 38 4 (2019) 1\u201314.","DOI":"10.1145\/3306346.3323045"},{"key":"e_1_3_3_2_59_1","doi-asserted-by":"publisher","unstructured":"Josh\u00a0H. McDermott and Eero\u00a0P. Simoncelli. 2011. Sound Texture Perception via Statistics of the Auditory Periphery: Evidence from Sound Synthesis. Neuron 71 5 (2011) 926\u2013940. 10.1016\/j.neuron.2011.06.032","DOI":"10.1016\/j.neuron.2011.06.032"},{"key":"e_1_3_3_2_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASPAA.2009.5346467"},{"key":"e_1_3_3_2_61_1","first-page":"19","volume-title":"Proceedings of the 14th International Conference on Digital Audio Effects (DAFx-11), Paris, France","author":"Michon Romain","year":"2011","unstructured":"Romain Michon and Julius\u00a0O Smith. 2011. Faust-STK: a set of linear and nonlinear physical models for the Faust programming language. In Proceedings of the 14th International Conference on Digital Audio Effects (DAFx-11), Paris, France. 19\u201323."},{"key":"e_1_3_3_2_62_1","unstructured":"Kushin Mukherjee Holly Huey Xuanchen Lu Yael Vinker Rio Aguina-Kang Ariel Shamir and Judith Fan. 2024. SEVA: Leveraging sketches to evaluate alignment between human and machine visual abstraction. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_3_2_63_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-69808-9"},{"key":"e_1_3_3_2_64_1","doi-asserted-by":"publisher","DOI":"10.1145\/545261.545290"},{"key":"e_1_3_3_2_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096517"},{"key":"e_1_3_3_2_66_1","doi-asserted-by":"crossref","unstructured":"Yuki Okamoto Keisuke Imoto Shinnosuke Takamichi Ryosuke Yamanishi Takahiro Fukumori Yoichi Yamashita et\u00a0al. 2022. Onoma-to-wave: Environmental sound synthesis from onomatopoeic words. APSIPA Transactions on Signal and Information Processing 11 1 (2022).","DOI":"10.1561\/116.00000049"},{"key":"e_1_3_3_2_67_1","doi-asserted-by":"crossref","unstructured":"Luke Olsen Faramarz\u00a0F Samavati Mario\u00a0Costa Sousa and Joaquim\u00a0A Jorge. 2009. Sketch-based modeling: A survey. Computers & Graphics 33 1 (2009) 85\u2013103.","DOI":"10.1016\/j.cag.2008.09.013"},{"key":"e_1_3_3_2_68_1","first-page":"542","volume-title":"International computer music conference","author":"Orlarey Yann","year":"2002","unstructured":"Yann Orlarey, Dominique Fober, and St\u00e9phane Letz. 2002. An algebra for block diagram languages. In International computer music conference. 542\u2013547."},{"key":"e_1_3_3_2_69_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.264"},{"key":"e_1_3_3_2_70_1","doi-asserted-by":"crossref","unstructured":"Bryan Pardo Jonah Shifrin and William Birmingham. 2004. Name that tune: A pilot study in finding a melody from a sung query. Journal of the American Society for Information Science and Technology 55 4 (2004) 283\u2013300.","DOI":"10.1002\/asi.10373"},{"key":"e_1_3_3_2_71_1","doi-asserted-by":"crossref","unstructured":"Michelle\u00a0L Patterson and Janet\u00a0F Werker. 1999. Matching phonetic information in lips and voice is robust in 4.5-month-old infants. Infant Behavior and Development 22 2 (1999) 237\u2013247.","DOI":"10.1016\/S0163-6383(99)00003-X"},{"key":"e_1_3_3_2_72_1","unstructured":"Yewen Pu Kevin Ellis Marta Kryven Josh Tenenbaum and Armando Solar-Lezama. 2020. Program synthesis with pragmatic communication. Advances in Neural Information Processing Systems 33 (2020) 13249\u201313259."},{"key":"e_1_3_3_2_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/1833349.1778805"},{"key":"e_1_3_3_2_74_1","unstructured":"Resemble AI. 2024. Resemble Enhance (GitHub repository). https:\/\/github.com\/resemble-ai\/resemble-enhance"},{"key":"e_1_3_3_2_75_1","volume-title":"Proc. the 1st Web Audio Conference (WAC)","author":"Roma Gerard","year":"2015","unstructured":"Gerard Roma and Xavier Serra. 2015. Querying Freesound with a microphone. In Proc. the 1st Web Audio Conference (WAC)."},{"key":"e_1_3_3_2_76_1","doi-asserted-by":"publisher","DOI":"10.1515\/9783110814750-033"},{"key":"e_1_3_3_2_77_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095702"},{"key":"e_1_3_3_2_78_1","first-page":"293","volume-title":"Computational auditory scene analysis","author":"Saint-Arnaud Nicolas","year":"1995","unstructured":"Nicolas Saint-Arnaud and Kris Popat. 1995. Analysis and synthesis of sound textures. In Computational auditory scene analysis. CRC Press, 293\u2013308."},{"key":"e_1_3_3_2_79_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.723"},{"key":"e_1_3_3_2_80_1","doi-asserted-by":"crossref","unstructured":"Vishnu Sarukkai Lu Yuan Mia Tang Maneesh Agrawala and Kayvon Fatahalian. 2024. Block and Detail: Scaffolding Sketch-to-Image Generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2402.18116 (2024).","DOI":"10.1145\/3654777.3676444"},{"key":"e_1_3_3_2_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581754.3584139"},{"key":"e_1_3_3_2_82_1","unstructured":"Neil Thapen. 2017. Pink Trombone. https:\/\/experiments.withgoogle.com\/pink-trombone"},{"key":"e_1_3_3_2_83_1","doi-asserted-by":"crossref","unstructured":"Barbara Tversky Masaki Suwa Maneesh Agrawala Julie Heiser Chris Stolte Pat Hanrahan Doantam Phan Jeff Klingner Marie-Paule Daniel Paul Lee et\u00a0al. 2003. Sketches for design and design of sketches. Human Behaviour in Design: Individuals Teams Tools (2003) 79\u201386.","DOI":"10.1007\/978-3-662-07811-2_9"},{"key":"e_1_3_3_2_84_1","unstructured":"Priyan Vaithilingam Yewen Pu and Elena\u00a0L Glassman. 2023. The Usability of Pragmatic Communication in Regular Expression Synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.06656 (2023)."},{"key":"e_1_3_3_2_85_1","doi-asserted-by":"publisher","DOI":"10.1145\/383259.383322"},{"key":"e_1_3_3_2_86_1","unstructured":"Aaron Van Den\u00a0Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alex Graves Nal Kalchbrenner Andrew Senior Koray Kavukcuoglu et\u00a0al. 2016. Wavenet: A generative model for raw audio. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1609.03499 12 (2016)."},{"key":"e_1_3_3_2_87_1","doi-asserted-by":"crossref","unstructured":"Yael Vinker Ehsan Pajouheshgar Jessica\u00a0Y Bo Roman\u00a0Christian Bachmann Amit\u00a0Haim Bermano Daniel Cohen-Or Amir Zamir and Ariel Shamir. 2022. Clipasso: Semantically-aware object sketching. ACM Transactions on Graphics (TOG) 41 4 (2022) 1\u201311.","DOI":"10.1145\/3528223.3530068"},{"key":"e_1_3_3_2_88_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298797"},{"key":"e_1_3_3_2_89_1","doi-asserted-by":"crossref","unstructured":"Jui-Hsien Wang Ante Qu Timothy\u00a0R Langlois and Doug\u00a0L James. 2018. Toward wave-based sound synthesis for computer animation. ACM Trans. Graph. 37 4 (2018) 109.","DOI":"10.1145\/3197517.3201318"},{"key":"e_1_3_3_2_90_1","doi-asserted-by":"crossref","unstructured":"Kangrui Xue Ryan\u00a0M Aronson Jui-Hsien Wang Timothy\u00a0R Langlois and Doug\u00a0L James. 2023. Improved Water Sound Synthesis using Coupled Bubbles. ACM Transactions on Graphics (TOG) 42 4 (2023) 1\u201313.","DOI":"10.1145\/3592424"},{"key":"e_1_3_3_2_91_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.93"},{"key":"e_1_3_3_2_92_1","doi-asserted-by":"crossref","unstructured":"Yichi Zhang and Zhiyao Duan. 2016. Supervised and unsupervised sound retrieval by vocal imitation. Journal of the Audio Engineering Society 64 7\/8 (2016) 533\u2013543.","DOI":"10.17743\/jaes.2016.0013"},{"key":"e_1_3_3_2_93_1","doi-asserted-by":"publisher","DOI":"10.1145\/1964921.1964933"}],"event":{"name":"SA '24: SIGGRAPH Asia 2024 Conference Papers","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"],"location":"Tokyo Japan","acronym":"SA '24"},"container-title":["SIGGRAPH Asia 2024 Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687679","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680528.3687679","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3680528.3687679","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:20Z","timestamp":1750295900000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3680528.3687679"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":92,"alternative-id":["10.1145\/3680528.3687679","10.1145\/3680528"],"URL":"https:\/\/doi.org\/10.1145\/3680528.3687679","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}