{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,9]],"date-time":"2025-02-09T01:40:10Z","timestamp":1739065210224,"version":"3.37.0"},"posted":{"date-parts":[[2025]]},"group-title":"SSRN","reference-count":61,"publisher":"Elsevier BV","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.2139\/ssrn.5123047","type":"posted-content","created":{"date-parts":[[2025,2,9]],"date-time":"2025-02-09T01:00:42Z","timestamp":1739062842000},"source":"Crossref","is-referenced-by-count":0,"title":["A Review of Voicing Decision in Whispered Speech: From Rules to Machine Learning"],"prefix":"10.2139","author":[{"given":"Jo\u00e3o  Miguel Pinto Pereira","family":"da Silva","sequence":"first","affiliation":[]},{"given":"Gon\u00e7alo","family":"Duarte Nunes","sequence":"additional","affiliation":[]},{"given":"An\u00edbal","family":"Ferreira","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"7","key":"ref1","doi-asserted-by":"crossref","first-page":"1494","DOI":"10.1016\/S1364-6613(00)01494-7","article-title":"The evolution of speech: A comparative review","volume":"4","author":"W T Fitch","year":"2000","journal-title":"Trends in cognitive sciences"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511620539","author":"H H Clark","year":"1996","journal-title":"Using Language"},{"key":"ref3","article-title":"An introduction to the study of speech","volume":"1","author":"E Sapir","year":"1921","journal-title":"Language"},{"key":"ref4","author":"M J Pickering","year":"2021","journal-title":"Understanding Dialogue: Language Use and Social Interaction"},{"key":"ref5","author":"L Rabiner","year":"1993","journal-title":"Fundamentals of Speech Recognition"},{"issue":"6","key":"ref6","doi-asserted-by":"crossref","first-page":"4002","DOI":"10.1121\/10.0002952","article-title":"Acoustic differences between voiced and whispered speech in gender diverse speakers","volume":"148","author":"N Houle","year":"2020","journal-title":"The Journal of the Acoustical Society of America"},{"key":"ref7","author":"J Silva","journal-title":"Flexible parametric implantation of voicing in whispered speech under scarce training data"},{"key":"ref8","first-page":"416","year":"2021","journal-title":"European Signal Processing Conference (EUSIPCO)"},{"key":"ref9","author":"B P Lim","year":"2011","journal-title":"Computational Differences between Whispered and Non-Whispered Speech"},{"key":"ref10","author":"M H M Mateus","year":"1979","journal-title":"Fon\ufffdtica e Fonologia"},{"key":"ref11","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9781139166621","author":"J Laver","year":"1994","journal-title":"Principles of phonetics"},{"issue":"2","key":"ref12","doi-asserted-by":"crossref","first-page":"139","DOI":"10.1016\/j.specom.2003.10.005","article-title":"Analysis and recognition of whispered speech","volume":"45","author":"T Ito","year":"2005","journal-title":"Speech communication"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"701","DOI":"10.1109\/ICASSP40776.2020.9054496","article-title":"Impact of a shift-invariant harmonic phase model in fully parametric harmonic voice representation and time\/frequency synthesis","author":"A Ferreira","year":"2020","journal-title":"ICASSP 2020 -2020 IEEE International Conference on Acoustics, Speech and Signal Processing"},{"issue":"2","key":"ref14","doi-asserted-by":"crossref","first-page":"97","DOI":"10.1016\/S0892-1997(05)80339-X","article-title":"Vocal tract acoustics","volume":"7","author":"R D Kent","year":"1993","journal-title":"Journal of Voice"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-642-17478-0","author":"S A Fulop","year":"2011","journal-title":"Speech Spectrum Analysis"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"186","DOI":"10.1109\/LSP.2019.2961213","article-title":"Voice conversion for whispered speech synthesis","volume":"27","author":"M Cotescu","year":"2020","journal-title":"IEEE Signal Processing Letters"},{"issue":"2","key":"ref17","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1159\/000265721","article-title":"Quantitative study of whisper","volume":"36","author":"P Monoson","year":"1984","journal-title":"Folia Phoniatrica et Logopaedica"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.21437\/Eurospeech.1999-37","article-title":"Acoustic nature of the whisper","author":"M Matsuda","year":"1999","journal-title":"Sixth European Conference on Speech Communication and Technology"},{"key":"ref19","doi-asserted-by":"crossref","first-page":"889","DOI":"10.1109\/TASLP.2020.2971417","article-title":"Glottal flow synthesis for whisper-to-speech conversion","volume":"28","author":"O Perrotin","year":"2020","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"ref20","first-page":"251","article-title":"Whispered speech to normal speech conversion using bidirectional lstms with metanetwork","author":"W Yu","year":"2019","journal-title":"IEEE 2nd International Conference on Information Communication and Signal Processing"},{"key":"ref21","first-page":"7119","article-title":"On the importance of vocal tract constriction for speaker characterization: The whispered speech study","author":"R K Das","year":"2020","journal-title":"ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing"},{"issue":"4","key":"ref22","article-title":"Speaker identification of whispered speech with perceptible mood","volume":"9","author":"G Chenghui","year":"2014","journal-title":"Journal of Multimedia"},{"key":"ref23","first-page":"2061","author":"J Zhou","year":"2012","journal-title":"Whisper intelligibility enhancement using a supervised learning approach, Circuits, Systems, and Signal Processing 31"},{"issue":"2","key":"ref24","doi-asserted-by":"crossref","first-page":"445","DOI":"10.1044\/jshr.1302.445","article-title":"Power spectral density measurements of oral and whispered speech","volume":"13","author":"M F Schwartz","year":"1970","journal-title":"Journal of Speech and Hearing Research"},{"issue":"1","key":"ref25","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1016\/j.specom.2012.07.002","article-title":"Acoustic analysis and feature transformation from neutral to whisper for speaker identification within whispered speech audio streams","volume":"55","author":"X Fan","year":"2013","journal-title":"Speech communication"},{"key":"ref26","author":"D R Boone","year":"2005","journal-title":"The voice and voice therapy"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1016\/j.wocn.2023.101223","article-title":"Discriminative segmental cues to vowel height and consonantal place and voicing in whispered speech","volume":"97","author":"L M Jesus","year":"2023","journal-title":"Journal of Phonetics"},{"issue":"4","key":"ref28","article-title":"Voiced speech from whispers for post-laryngectomised patients","volume":"36","author":"H R Sharifzadeh","year":"2009","journal-title":"IAENG International Journal of Computer Science"},{"key":"ref29","first-page":"327","volume":"34","author":"H Liu","year":"2007","journal-title":"Electrolarynx in voice rehabilitation"},{"key":"ref30","doi-asserted-by":"crossref","first-page":"133","DOI":"10.2147\/MDER.S133225","article-title":"The electrolarynx: Voice restoration after total laryngectomy","author":"R Kaye","year":"2017","journal-title":"Medical devices: evidence and research"},{"issue":"1","key":"ref31","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1044\/2018_PERS-SIG3-2018-0013","article-title":"Review of the electrolarynx: The past and present, Perspectives of the","volume":"4","author":"S R Cox","year":"2019","journal-title":"ASHA Special Interest Groups"},{"issue":"5","key":"ref32","doi-asserted-by":"crossref","first-page":"952","DOI":"10.1109\/JSTSP.2016.2535970","article-title":"The new bionic electro-larynx speech system","volume":"10","author":"A K Fuchs","year":"2016","journal-title":"IEEE Journal of Selected Topics in Signal Processing"},{"key":"ref33","author":"G S Meltzner","year":"2005","journal-title":"Impact of aberrant acoustic properties on the perception of sound quality in electrolarynx speech"},{"key":"ref34","first-page":"1","article-title":"Electrolarynx system using voice conversion based on WaveRNN","author":"E Urabe","year":"2020","journal-title":"2020 IEEE International Conference on Consumer Electronics (ICCE)"},{"issue":"5","key":"ref35","doi-asserted-by":"crossref","first-page":"865","DOI":"10.1109\/TBME.2006.872821","article-title":"Enhancement of electrolarynx speech based on auditory masking","volume":"53","author":"H Liu","year":"2006","journal-title":"IEEE Transactions on Biomedical Engineering"},{"key":"ref36","author":"G Duarte Nunes","year":"2023","journal-title":"Whispered speech segmentation based on deep learning, Master's thesis"},{"key":"ref37","author":"I T Union","year":"2004","journal-title":"Tolerable round-trip time delay for sound-programme and television broadcast programme inserts -context and rationale, Norm BT"},{"issue":"10","key":"ref38","doi-asserted-by":"crossref","first-page":"2448","DOI":"10.1109\/TBME.2010.2053369","article-title":"Reconstruction of normal sounding speech for laryngectomy patients through a modified CELP codec","volume":"57","author":"H R Sharifzadeh","year":"2010","journal-title":"IEEE Transactions on Biomedical Engineering"},{"key":"ref39","first-page":"351","volume":"9","author":"V Zue","year":"1990","journal-title":"Speech database development at MIT: TIMIT and beyond"},{"key":"ref40","doi-asserted-by":"crossref","first-page":"159","DOI":"10.1109\/ISIVC.2016.7893980","article-title":"Implantation of voicing on whispered speech using frequency-domain parametric modelling of source and filter information","author":"A Ferreira","year":"2016","journal-title":"2016 International Symposium on Signal, Image, Video and Communications (ISIVC)"},{"issue":"2","key":"ref41","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1016\/j.jvoice.2010.12.002","article-title":"A comprehensive vowel space for whispered speech","volume":"26","author":"H R Sharifzadeh","year":"2012","journal-title":"Journal of Voice"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"107","DOI":"10.21437\/SpeechProsody.2008-25","author":"V.-A Tran","year":"2008","journal-title":"Speech Prosody 2008-4th International Conference on Speech Prosody"},{"issue":"5","key":"ref43","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","article-title":"Multilayer feedforward networks are universal approximators","volume":"2","author":"K Hornik","year":"1989","journal-title":"Neural Networks"},{"issue":"24","key":"ref44","doi-asserted-by":"crossref","first-page":"1781","DOI":"10.1049\/el.2014.1645","article-title":"Whisper-to-speech conversion using restricted Boltzmann machine arrays","volume":"50","author":"J.-J Li","year":"2014","journal-title":"Electronics Letters"},{"issue":"4","key":"ref45","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/5254.708428","article-title":"Support vector machines","volume":"13","author":"M Hearst","year":"1998","journal-title":"IEEE Intelligent Systems and their Applications"},{"key":"ref46","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1007\/BF00994018","article-title":"Support-vector networks","volume":"20","author":"C Cortes","year":"2004","journal-title":"Machine Learning"},{"key":"ref47","first-page":"503","author":"N Meenakshi","year":"2017","journal-title":"A robust voiced\/unvoiced phoneme classification from whispered speech using the 'color' of whispered phonemes and deep neural network"},{"key":"ref48","first-page":"127","author":"N Lopes","year":"2015","journal-title":"Non-negative matrix factorization (NMF), in: Machine Learning for Adaptive Many-Core Machines -a Practical Approach"},{"key":"ref49","article-title":"A multichannel articulatory speech database and its application for automatic speech recognition","author":"A Wrench","year":"2000","journal-title":"Proc. 5th Seminar on Speech Production: Models and Data"},{"key":"ref50","first-page":"1957","author":"T Toda","year":"2005","journal-title":"NAM-to-speech conversion with Gaussian mixture models"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"659","DOI":"10.1007\/978-0-387-73003-5_196","author":"D Reynolds","year":"2009","journal-title":"Encyclopedia of Biometrics"},{"key":"ref52","author":"J F T Costa","year":"2021","journal-title":"Adaptive phonetic segmentation in dysphonic voice"},{"issue":"3","key":"ref53","doi-asserted-by":"crossref","first-page":"268","DOI":"10.1109\/PROC.1973.9030","article-title":"The viterbi algorithm","volume":"61","author":"G Forney","year":"1973","journal-title":"Proceedings of the IEEE"},{"issue":"2","key":"ref54","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/5.18626","article-title":"A tutorial on hidden Markov models and selected applications in speech recognition","volume":"77","author":"L Rabiner","year":"1989","journal-title":"Proceedings of the IEEE"},{"issue":"1","key":"ref55","doi-asserted-by":"crossref","first-page":"4","DOI":"10.1109\/MASSP.1986.1165342","article-title":"An introduction to hidden Markov models","volume":"3","author":"L Rabiner","year":"1986","journal-title":"IEEE ASSP Magazine"},{"key":"ref56","author":"L Rabiner","year":"1978","journal-title":"Digital Processing of Speech Signals"},{"key":"ref57","first-page":"2253","article-title":"Voicing decision based on phonemes classification and spectral moments for whisper-to-speech conversion","author":"L Ardaillon","year":"2022","journal-title":"Interspeech 2022-23rd Annual Conference of the International Speech Communication Association, ISCA; ISCA"},{"issue":"1","key":"ref58","doi-asserted-by":"crossref","first-page":"21","DOI":"10.1109\/TIT.1967.1053964","article-title":"Nearest neighbor pattern classification","volume":"13","author":"T Cover","year":"1967","journal-title":"IEEE Transactions on Information Theory"},{"issue":"7553","key":"ref59","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"Y Lecun","year":"2015","journal-title":"nature"},{"key":"ref60","author":"I T U R S Itu-R)","year":"2004","journal-title":"Tolerable round-trip time delay for soundprogramme and television broadcast programme inserts -Context and rationale"},{"key":"ref61","author":"A Vaswani","journal-title":"Attention is all you need"}],"container-title":[],"original-title":[],"deposited":{"date-parts":[[2025,2,9]],"date-time":"2025-02-09T01:05:16Z","timestamp":1739063116000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.ssrn.com\/abstract=5123047"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":61,"URL":"https:\/\/doi.org\/10.2139\/ssrn.5123047","relation":{},"subject":[],"published":{"date-parts":[[2025]]},"subtype":"preprint"}}