{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:01:57Z","timestamp":1764399717797,"version":"3.46.0"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249391","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"310-315","source":"Crossref","is-referenced-by-count":0,"title":["Singing MIDI Transcription with Music Language Models: Formulation and Comparison"],"prefix":"10.1109","author":[{"given":"Yu","family":"Sugimoto","sequence":"first","affiliation":[{"name":"Kyushu University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun-You","family":"Wang","sequence":"additional","affiliation":[{"name":"National Taiwan Normal University,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Su","sequence":"additional","affiliation":[{"name":"Academia Sinica,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eita","family":"Nakamura","sequence":"additional","affiliation":[{"name":"Kyushu University,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ASPAA.2005.1540233"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2019.8902550"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1561\/116.20240016"},{"key":"ref4","first-page":"293","article-title":"VOCANO: A note transcription framework for singing voice in polyphonic music","author":"Hsu","year":"2021","journal-title":"ISMIR"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/taslp.2022.3224297"},{"key":"ref6","first-page":"454","article-title":"Mel-RoFormer for vocal separation and vocal melody transcription","author":"Wang","year":"2024","journal-title":"ISMIR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASLPRO.2025.3544064"},{"key":"ref8","first-page":"50","article-title":"Onsets and frames: Dual-objective piano transcription","author":"Hawthorne","year":"2018","journal-title":"ISMIR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/taslp.2023.3328283"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462128"},{"key":"ref11","first-page":"359","article-title":"Tatum-level drum transcription based on a convolutional recurrent neural network with language model-based regularized training","author":"Ishizuka","year":"2020","journal-title":"APSIPA ASC"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1017\/ATSIP.2021.4"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref14","article-title":"Sequence transduction with recurrent neural networks","author":"Graves","year":"2012","journal-title":"preprint"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682336"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2533858"},{"key":"ref17","first-page":"454","article-title":"Blending acoustic and language model predictions for automatic music transcription","author":"Ycart","year":"2019","journal-title":"ISMIR"},{"key":"ref18","first-page":"421","article-title":"A study on LSTM networks for polyphonic music sequence modelling","author":"Ycart","year":"2017","journal-title":"ISMIR"},{"volume-title":"Demucs(v4)","year":"2022","author":"D\u00e9fossez","key":"ref19"},{"key":"ref20","first-page":"63","article-title":"Deep salience representations for F0 estimation in polyphonic music","author":"Bittner","year":"2017","journal-title":"ISMIR"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414601"},{"key":"ref22","first-page":"367","article-title":"MIR_EVAL: A transparent implementation of common MIR metrics","author":"Raffel","year":"2014","journal-title":"ISMIR"},{"key":"ref23","first-page":"567","article-title":"Evaluation framework for automatic singing transcription","author":"Molina","year":"2014","journal-title":"ISMIR"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249391.pdf?arnumber=11249391","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:00:32Z","timestamp":1764399632000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249391\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249391","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}