{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,2]],"date-time":"2024-10-02T04:09:11Z","timestamp":1727842151538},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icme57554.2024.10688330","type":"proceedings-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T17:24:16Z","timestamp":1727717056000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["TEAdapter: Supply Vivid Guidance for Controllable Text-to-Music Generation"],"prefix":"10.1109","author":[{"given":"Jialing","family":"Zou","sequence":"first","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"Jiahao","family":"Mei","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"XuDong","family":"Nan","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"Jinghua","family":"Li","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"Daoguo","family":"Dong","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]},{"given":"Liang","family":"He","sequence":"additional","affiliation":[{"name":"East China Normal University,Shanghai,China"}]}],"member":"263","reference":[{"article-title":"Adding conditional control to text-toimage diffusion models","volume-title":"ICCV","author":"Zhang","key":"ref1"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i5.28226"},{"journal-title":"Simple and controllable music generation","year":"2023","author":"Copet","key":"ref3"},{"article-title":"AudioLDM: Text-to-audio generation with latent diffusion models","volume-title":"ICML","author":"Liu","key":"ref4"},{"article-title":"Symphony generation with permutation invariant language model","volume-title":"ISMIR","author":"Liu","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3414032"},{"journal-title":"Noise2music: Textconditioned music generation with diffusion models","year":"2023","author":"Huang","key":"ref7"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"journal-title":"Uni-controlnet: All-in-one control to text-to-image diffusion models","year":"2023","author":"Zhao","key":"ref9"},{"journal-title":"Learning interpretable representation for controllable polyphonic music generation","year":"2020","author":"Wang","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/taslp.2024.3399026"},{"journal-title":"Audioldm 2: Learning holistic audio generation with self-supervised pretraining","year":"2023","author":"Liu","key":"ref12"},{"journal-title":"Masked autoencoders that listen","year":"2022","author":"Huang","key":"ref13"},{"article-title":"librosa: Audio and music signal analysis in python","year":"2023","author":"McFee","key":"ref14"},{"article-title":"A feature smoothing method for chord recognition using recurrence plots","volume-title":"ISMIR","author":"Cho","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096110"},{"article-title":"Openmic-2018: An open data-set for multiple instrument recognition","volume-title":"ISMIR","author":"Humphrey","key":"ref17"},{"article-title":"Design and creation of a large-scale database of structural annotations","volume-title":"ISMIR","author":"Smith","key":"ref18"},{"article-title":"Systematic exploration of computational music structure research","volume-title":"ISMIR","author":"Nieto","key":"ref19"},{"article-title":"FMA: A dataset for music analysis","volume-title":"ISMIR","author":"Defferrard","key":"ref20"},{"journal-title":"Musiclm: Generating music from text","year":"2023","author":"Agostinelli","key":"ref21"},{"key":"ref22","doi-asserted-by":"crossref","DOI":"10.1109\/TASL.2010.2096216","article-title":"Extracting predominant local pulse information from music recordings","volume-title":"TASLP","author":"Grosche"},{"journal-title":"Fr\u00e9chet audio distance: A metric for evaluating music enhancement algorithms","year":"2018","author":"Kilgour","key":"ref23"},{"journal-title":"Efficient training of audio transformers with patchout","year":"2021","author":"Koutini","key":"ref24"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095969"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","start":{"date-parts":[[2024,7,15]]},"location":"Niagara Falls, ON, Canada","end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10685847\/10687354\/10688330.pdf?arnumber=10688330","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T06:42:30Z","timestamp":1727764950000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10688330\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/icme57554.2024.10688330","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}