{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T16:27:08Z","timestamp":1781108828445,"version":"3.54.1"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"JST SPRING","award":["JPMJSP2140"],"award-info":[{"award-number":["JPMJSP2140"]}]},{"name":"JSPS KAKENHI","award":["JP21H05054"],"award-info":[{"award-number":["JP21H05054"]}]},{"name":"JSPS KAKENHI","award":["JP21H03500"],"award-info":[{"award-number":["JP21H03500"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/taslp.2023.3343614","type":"journal-article","created":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T19:49:34Z","timestamp":1702669774000},"page":"906-916","source":"Crossref","is-referenced-by-count":4,"title":["Improving Speech Translation Accuracy and Time Efficiency With Fine-Tuned wav2vec 2.0-Based Speech Segmentation"],"prefix":"10.1109","volume":"32","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6213-3241","authenticated-orcid":false,"given":"Ryo","family":"Fukuda","sequence":"first","affiliation":[{"name":"Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2122-9846","authenticated-orcid":false,"given":"Katsuhito","family":"Sudoh","sequence":"additional","affiliation":[{"name":"Graduate School of Science and Technology and the Data Science Center, Nara Institute of Science and Technology, Ikoma, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6956-3803","authenticated-orcid":false,"given":"Satoshi","family":"Nakamura","sequence":"additional","affiliation":[{"name":"Graduate School of Science and Technology and the Data Science Center, Nara Institute of Science and Technology, Ikoma, Japan"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-59"},{"key":"ref2","first-page":"2012","article-title":"MuST-C: A multilingual speech translation corpus","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. Linguistics: Hum. Lang. Technol.","author":"Gangi","year":"2019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.248"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2014-511"},{"key":"ref5","first-page":"55","article-title":"Beyond voice activity detection: Hybrid audio segmentation for direct speech translation","volume-title":"Proc. 4th Int. Conf. Natural Lang. Speech Process.","author":"Gaido","year":"2021"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518807"},{"key":"ref7","first-page":"252","article-title":"Segmentation and punctuation prediction in speech language translation using a monolingual translation system","volume-title":"Proc. 9th Int. Workshop Spoken Lang. Transl.: Papers, Hong Kong, Table contents","author":"Cho","year":"2012"},{"key":"ref8","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. 34th Int. Conf. Neural Inf. Process. Syst.","author":"Baevski","year":"2020"},{"key":"ref9","first-page":"321","article-title":"Morphtagger: Hmm-based arabic segmentation for statistical machine translation","volume-title":"Proc. 7th Int. Workshop Spoken Lang. Transl., Papers","author":"Mansour","year":"2010"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3115\/1596324.1596348"},{"key":"ref11","first-page":"177","article-title":"Better punctuation prediction with dynamic conditional random fields","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Lu","year":"2010"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.3115\/1613984.1614022"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3115\/1220175.1220176"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2007-644"},{"key":"ref15","first-page":"230","article-title":"Segmentation strategies for streaming speech translation","volume-title":"Proc. Conf. North Amer. Chapter Assoc. Comput. Linguistics, Hum. Lang. Technol.","author":"Sridhar","year":"2013"},{"key":"ref16","first-page":"55","article-title":"Beyond voice activity detection: Hybrid audio segmentation for direct speech translation","volume-title":"Proc. 4th Int. Conf. Natural Lang. Speech Process.","author":"Gaido","year":"2021"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.iwslt-1.10"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.iwslt-1.11"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054358"},{"key":"ref20","first-page":"173","article-title":"Punctuation insertion for real-time spoken language translation","volume-title":"Proc. 11th Int. Workshop Spoken Lang. Transl.","author":"Cho","year":"2015"},{"key":"ref21","first-page":"62","article-title":"The kit translation systems for IWSLT 2015","volume-title":"Proc. 11th Int. Workshop Spoken Lang. Transl.","author":"Ha","year":"2015"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1320"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICSLP.1996.607773"},{"key":"ref24","first-page":"139","article-title":"An efficient and effective online sentence segmenter for simultaneous interpretation","volume-title":"Proc. 3rd Workshop Asian Transl.","author":"Wang","year":"2016"},{"key":"ref25","first-page":"1","article-title":"Online sentence segmentation for simultaneous interpretation using multi-shifted recurrent neural network","volume-title":"Proc. Mach. Transl. Summit XVII: Volume 1 Res. Track","author":"Wang","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.206"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11382"},{"key":"ref28","first-page":"2790","article-title":"Parameter-efficient transfer learning for NLP","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Houlsby","year":"2019"},{"key":"ref29","article-title":"Towards a unified view of parameter-efficient transfer learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"He","year":"2021"},{"key":"ref30","first-page":"12991","article-title":"LST: Ladder side-tuning for parameter and memory efficient transfer learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sung","year":"2022"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-4012"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.2307\/2340140"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.iwslt-1.23"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054626"},{"key":"ref35","first-page":"138","article-title":"Evaluating machine translation output with automatic sentence segmentation","volume-title":"Proc. 2nd Int. Workshop Spoken Lang. Transl.","author":"Matusov","year":"2005"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6319"},{"key":"ref38","article-title":"BERTscore: Evaluating text generation with BERT","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Zhang","year":"2020"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.704"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.328"},{"key":"ref42","first-page":"33","article-title":"Fairseq S2T: Fast speech-to-text modeling with fairseq","volume-title":"Proc. 1st Conf. Asia-Pacific Chapter Assoc. Comput. Linguistics 10th Int. Joint Conf. Natural Lang. Process.: Syst. Demonstrations","author":"Wang","year":"2020"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2582"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.4000\/books.aaccademia.8585"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2020-2860"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.200"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/10304349\/10361556.pdf?arnumber=10361556","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T22:36:03Z","timestamp":1705098963000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10361556\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/taslp.2023.3343614","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}