{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T06:14:25Z","timestamp":1759385665686,"version":"3.37.3"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2020,7,21]],"date-time":"2020-07-21T00:00:00Z","timestamp":1595289600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,7,21]],"date-time":"2020-07-21T00:00:00Z","timestamp":1595289600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Circuits Syst Signal Process"],"published-print":{"date-parts":[[2021,2]]},"DOI":"10.1007\/s00034-020-01496-6","type":"journal-article","created":{"date-parts":[[2020,7,21]],"date-time":"2020-07-21T07:03:32Z","timestamp":1595315012000},"page":"772-797","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["SongF0: A Spectrum-Based Fundamental Frequency Estimation for Monophonic Songs"],"prefix":"10.1007","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0781-7542","authenticated-orcid":false,"given":"Pradeep","family":"Rengaswamy","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"K. Sreenivasa","family":"Rao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pallab","family":"Dasgupta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,7,21]]},"reference":[{"issue":"3","key":"1496_CR1","doi-asserted-by":"publisher","first-page":"333","DOI":"10.1109\/89.759042","volume":"7","author":"S Ahmadi","year":"1999","unstructured":"S. Ahmadi et al., Cepstrum-based pitch detection using a new statistical v\/uv classification algorithm. IEEE Trans. Speech Audio Process. 7(3), 333\u2013338 (1999)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1496_CR2","doi-asserted-by":"crossref","unstructured":"H. Ba, N. Yang, et al., Bana: a hybrid approach for noise resilient pitch detection, in Statistical Signal Processing Workshop (SSP), IEEE (IEEE, 2012)","DOI":"10.1109\/SSP.2012.6319706"},{"key":"1496_CR3","first-page":"155","volume":"14","author":"RM Bittner","year":"2014","unstructured":"R.M. Bittner et al., Medleydb: a multitrack dataset for annotation-intensive mir research. Int. Soc. Music Inf. Retrieval (ISMIR) 14, 155\u2013160 (2014)","journal-title":"Int. Soc. Music Inf. Retrieval (ISMIR)"},{"issue":"1","key":"1496_CR4","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/A:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"L. Breiman, Random forests. Mach. Learn. 45(1), 5\u201332 (2001)","journal-title":"Mach. Learn."},{"key":"1496_CR5","doi-asserted-by":"publisher","first-page":"162","DOI":"10.1016\/j.jvoice.2017.04.008","volume":"32","author":"M Brockmann-Bauser","year":"2017","unstructured":"M. Brockmann-Bauser et al., Acoustic perturbation measures improve with increasing vocal intensity in individuals with and without voice disorders. J. Voice 32, 162\u2013168 (2017)","journal-title":"J. Voice"},{"issue":"3","key":"1496_CR6","doi-asserted-by":"publisher","first-page":"165","DOI":"10.1109\/TSA.2003.811538","volume":"11","author":"CJ Burges","year":"2003","unstructured":"C.J. Burges et al., Distortion discriminant analysis for audio fingerprinting. IEEE Trans. Speech Audio Process. 11(3), 165\u2013174 (2003)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"3","key":"1496_CR7","doi-asserted-by":"publisher","first-page":"1638","DOI":"10.1121\/1.2951592","volume":"124","author":"A Camacho","year":"2008","unstructured":"A. Camacho et al., A sawtooth waveform inspired pitch estimator for speech and music. J. Acoust. Soc. Am. 124(3), 1638\u20131652 (2008)","journal-title":"J. Acoust. Soc. Am."},{"issue":"2","key":"1496_CR8","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1016\/j.specom.2003.12.001","volume":"42","author":"R Carr\u00e9","year":"2004","unstructured":"R. Carr\u00e9, From an acoustic tube to speech production. Speech Commun. 42(2), 227\u2013240 (2004)","journal-title":"Speech Commun."},{"key":"1496_CR9","volume-title":"The Analysis of Time Series: An Introduction","author":"C Chatfield","year":"2016","unstructured":"C. Chatfield, The Analysis of Time Series: An Introduction (CRC Press, Boca Raton, 2016)"},{"key":"1496_CR10","unstructured":"J.S.D. Dan Ellis, MIREX Evaluation metrics (2005), http:\/\/www.music-ir.org\/evaluation\/mirex-results\/audio-melody\/index.html. Accessed 10 May 2018"},{"issue":"4","key":"1496_CR11","doi-asserted-by":"publisher","first-page":"1917","DOI":"10.1121\/1.1458024","volume":"111","author":"A De Cheveign\u00e9","year":"2002","unstructured":"A. De Cheveign\u00e9 et al., YIN, a fundamental frequency estimator for speech and music. J. Acoust. Soc. Am. 111(4), 1917\u20131930 (2002)","journal-title":"J. Acoust. Soc. Am."},{"key":"1496_CR12","doi-asserted-by":"crossref","unstructured":"T. Drugman, et al., Glottal closure and opening instant detection from speech signals, in Tenth Annual Conference of the International Speech Communication Association (2009)","DOI":"10.21437\/Interspeech.2009-47"},{"key":"1496_CR13","doi-asserted-by":"crossref","unstructured":"T. Drugman, et al., Joint robust voicing detection and pitch estimation based on residual harmonics, in Twelfth Annual Conference of the International Speech Communication Association (2011)","DOI":"10.21437\/Interspeech.2011-519"},{"issue":"3","key":"1496_CR14","doi-asserted-by":"publisher","first-page":"994","DOI":"10.1109\/TASL.2011.2170835","volume":"20","author":"T Drugman","year":"2012","unstructured":"T. Drugman et al., Detection of glottal closure instants from speech signals: a quantitative review. IEEE Trans. Audio Speech Lang. Process. 20(3), 994\u20131006 (2012)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"6","key":"1496_CR15","doi-asserted-by":"publisher","first-page":"1568","DOI":"10.1121\/1.387811","volume":"71","author":"H Duifhuis","year":"1982","unstructured":"H. Duifhuis et al., Measurement of pitch in speech: an implementation of Goldstein\u2019s theory of pitch perception. J. Acoust. Soc. Am. 71(6), 1568\u20131580 (1982)","journal-title":"J. Acoust. Soc. Am."},{"key":"1496_CR16","doi-asserted-by":"crossref","unstructured":"A. Ghias, et al., Query by humming: musical information retrieval in an audio database, in Proceedings of the Third ACM International Conference on Multimedia (ACM, 1995)","DOI":"10.1145\/217279.215273"},{"key":"1496_CR17","unstructured":"S. Gonzalez, et al., A pitch estimation filter robust to high levels of noise (pefac), in 2011 19th European Signal Processing Conference (IEEE, 2011), pp. 451\u2013455"},{"key":"1496_CR18","unstructured":"N. Henrich, Study of the Glottal Source in Speech and Singing: Modeling and Estimation, Acoustic and Electroglottographic Measurements, Perception (Universit\u00e9 Pierre et Marie Curie-Paris VI, Theses, 2001)"},{"issue":"3","key":"1496_CR19","doi-asserted-by":"publisher","first-page":"1417","DOI":"10.1121\/1.1850031","volume":"117","author":"N Henrich","year":"2005","unstructured":"N. Henrich et al., Glottal open quotient in singing: measurements and correlation with laryngeal mechanisms, vocal intensity, and fundamental frequency. J. Acoust. Soc. Am. 117(3), 1417\u20131430 (2005)","journal-title":"J. Acoust. Soc. Am."},{"issue":"1","key":"1496_CR20","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1121\/1.396427","volume":"83","author":"DJ Hermes","year":"1988","unstructured":"D.J. Hermes, Measurement of pitch by subharmonic summation. J. Acoust. Soc. Am. 83(1), 257\u2013264 (1988)","journal-title":"J. Acoust. Soc. Am."},{"key":"1496_CR21","unstructured":"R. Jang, MIR Corpora (2005), http:\/\/mirlab.org\/dataSet\/public\/. Accessed 22 June 2017"},{"key":"1496_CR22","doi-asserted-by":"crossref","unstructured":"S.R. Kadiri, et al., Analysis of singing voice for epoch extraction using Zero Frequency Filtering method, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2015)","DOI":"10.1109\/ICASSP.2015.7178774"},{"issue":"3","key":"1496_CR23","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/S0167-6393(98)00085-5","volume":"27","author":"H Kawahara","year":"1999","unstructured":"H. Kawahara et al., Restructuring speech representations using a pitch-adaptive time-frequency smoothing and an instantaneous-frequency-based F0 extraction: Possible role of a repetitive structure in sounds. Speech Commun. 27(3), 187\u2013207 (1999)","journal-title":"Speech Commun."},{"key":"1496_CR24","unstructured":"H. Kenmochi, et\u00a0al., VOCALOID-commercial singing synthesizer based on sample concatenation, in INTERSPEECH, vol. 2007 (2007)"},{"issue":"3","key":"1496_CR25","doi-asserted-by":"publisher","first-page":"362","DOI":"10.2174\/157489311796904709","volume":"6","author":"M Kob","year":"2011","unstructured":"M. Kob et al., Analysing and understanding the singing voice: recent progress and open questions. Curr. Bioinform. 6(3), 362\u2013374 (2011)","journal-title":"Curr. Bioinform."},{"key":"1496_CR26","doi-asserted-by":"crossref","unstructured":"A. Kumar et\u00a0al., Audio event detection from acoustic unit occurrence patterns, in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2012)","DOI":"10.1109\/ICASSP.2012.6287923"},{"key":"1496_CR27","doi-asserted-by":"publisher","first-page":"609","DOI":"10.1109\/89.943339","volume":"9","author":"DJ Liu","year":"2001","unstructured":"D.J. Liu et al., Fundamental frequency estimation based on the joint time-frequency analysis of harmonic spectral structure. IEEE Trans. Speech Audio Process. 9, 609\u2013621 (2001)","journal-title":"IEEE Trans. Speech Audio Process."},{"key":"1496_CR28","first-page":"83","volume":"2016","author":"A Lombardo","year":"2016","unstructured":"A. Lombardo, Analysis of vocal signals for the detection of vocal tract diseases. New Collect. 2016, 83 (2016)","journal-title":"New Collect."},{"key":"1496_CR29","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1016\/j.pmcj.2017.04.006","volume":"41","author":"Z Lv","year":"2017","unstructured":"Z. Lv et al., Serious game based personalized healthcare system for dysphonia rehabilitation. Pervasive Mobile Comput. 41, 504\u2013519 (2017)","journal-title":"Pervasive Mobile Comput."},{"key":"1496_CR30","doi-asserted-by":"crossref","unstructured":"M.W. Macon, et\u00a0al., A singing voice synthesis system based on sinusoidal modeling, in IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), vol. 1 (IEEE, 1997)","DOI":"10.1109\/ICASSP.1997.599668"},{"key":"1496_CR31","doi-asserted-by":"crossref","unstructured":"M. Makhmutov, et\u00a0al., MOMOS-MT: mobile monophonic system for music transcription (2016), arXiv preprint arXiv:1611.07351","DOI":"10.1145\/3019612.3019723"},{"key":"1496_CR32","unstructured":"MathWorks, Prominence (2012), https:\/\/in.mathworks.com\/help\/signal\/ug\/prominence.html. Accessed 2 Aug 2017"},{"key":"1496_CR33","doi-asserted-by":"crossref","unstructured":"M. Mauch et\u00a0al., pyin: a fundamental frequency estimator using probabilistic threshold distributions, in 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (IEEE, 2014), pp. 659\u2013663","DOI":"10.1109\/ICASSP.2014.6853678"},{"issue":"2","key":"1496_CR34","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1109\/TASL.2006.876756","volume":"15","author":"TL Nwe","year":"2007","unstructured":"T.L. Nwe et al., Exploring vibrato-motivated acoustic features for singer identification. IEEE Trans. Audio Speech Lang. Process. 15(2), 519\u2013530 (2007)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"7","key":"1496_CR35","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1016\/j.nurpra.2016.04.025","volume":"12","author":"A Pylypowich","year":"2016","unstructured":"A. Pylypowich et al., Differentiating the symptom of dysphonia. J. Nurse Pract. 12(7), 459\u2013466 (2016)","journal-title":"J. Nurse Pract."},{"issue":"1","key":"1496_CR36","doi-asserted-by":"publisher","first-page":"236","DOI":"10.1111\/j.1467-8624.2011.01700.x","volume":"83","author":"C Quam","year":"2012","unstructured":"C. Quam et al., Development in children\u2019s interpretation of pitch cues to emotions. Child Dev. 83(1), 236\u2013250 (2012)","journal-title":"Child Dev."},{"issue":"1","key":"1496_CR37","doi-asserted-by":"publisher","first-page":"24","DOI":"10.1109\/TASSP.1977.1162905","volume":"25","author":"L Rabiner","year":"1977","unstructured":"L. Rabiner, On the use of autocorrelation analysis for pitch detection. IEEE Trans. Acoust. Speech Signal Process. 25(1), 24\u201333 (1977)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"key":"1496_CR38","doi-asserted-by":"crossref","unstructured":"K. Saino et\u00a0al., An HMM-based singing voice synthesis system, in INTERSPEECH (2006)","DOI":"10.21437\/Interspeech.2006-584"},{"issue":"3","key":"1496_CR39","doi-asserted-by":"publisher","first-page":"405","DOI":"10.1016\/j.specom.2005.01.010","volume":"46","author":"T Saitou","year":"2005","unstructured":"T. Saitou et al., Development of an F0 control model based on F0 dynamic characteristics for singing-voice synthesis. Speech Commun. 46(3), 405\u2013417 (2005)","journal-title":"Speech Commun."},{"key":"1496_CR40","volume-title":"Vocal Health and Pedagogy, Volume II: Advanced Assessment and Practice","author":"RT Sataloff","year":"2006","unstructured":"R.T. Sataloff, Vocal Health and Pedagogy, Volume II: Advanced Assessment and Practice (Plural Publishing, San Diego, 2006)"},{"issue":"4","key":"1496_CR41","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1121\/1.1910902","volume":"43","author":"MR Schroeder","year":"1968","unstructured":"M.R. Schroeder, Period histogram and product spectrum: new methods for fundamental-frequency measurement. J. Acoust. Soci. Am. 43(4), 829\u2013834 (1968)","journal-title":"J. Acoust. Soci. Am."},{"key":"1496_CR42","unstructured":"X. Serra et\u00a0al., Musical sound modeling with sinusoids plus noise, in Musical Signal Processing (1997), pp. 91\u2013122"},{"issue":"1","key":"1496_CR43","doi-asserted-by":"publisher","first-page":"223","DOI":"10.1121\/1.382239","volume":"65","author":"T Sreenivas","year":"1979","unstructured":"T. Sreenivas et al., Pitch extraction from corrupted harmonics of the power spectrum. J. Acoust. Soc. Am. 65(1), 223\u2013228 (1979)","journal-title":"J. Acoust. Soc. Am."},{"key":"1496_CR44","doi-asserted-by":"crossref","unstructured":"X. Sun, A pitch determination algorithm based on subharmonic-to-harmonic ratio, in Sixth International Conference on Spoken Language Processing (2000)","DOI":"10.21437\/ICSLP.2000-902"},{"key":"1496_CR45","first-page":"1","volume":"1","author":"J Sundberg","year":"1968","unstructured":"J. Sundberg, Formant frequencies of bass singers. STL-QPSR 1, 1\u20136 (1968)","journal-title":"Formant frequencies of bass singers. STL-QPSR"},{"key":"1496_CR46","first-page":"21","volume":"4","author":"J Sundberg","year":"1970","unstructured":"J. Sundberg, The level of the \u201csinging formant\u201d and the source spectra of professional bass singers\u201d. Speech Transm. Lab. Q. Prog. Status Rep. 4, 21\u201339 (1970)","journal-title":"Speech Transm. Lab. Q. Prog. Status Rep."},{"key":"1496_CR47","first-page":"518","volume":"495","author":"D Talkin","year":"1995","unstructured":"D. Talkin, A robust algorithm for pitch tracking (RAPT). Speech Coding Synth. 495, 518 (1995)","journal-title":"Speech Coding Synth."},{"issue":"7\u20138","key":"1496_CR48","doi-asserted-by":"publisher","first-page":"841","DOI":"10.1016\/j.specom.2013.03.001","volume":"55","author":"LN Tan","year":"2013","unstructured":"L.N. Tan et al., Multi-band summary correlogram-based pitch detection for noisy speech. Speech Commun. 55(7\u20138), 841\u2013856 (2013)","journal-title":"Speech Commun."},{"issue":"5","key":"1496_CR49","first-page":"575","volume":"65","author":"IR Titze","year":"2009","unstructured":"I.R. Titze, Voice research and technology: how are harmonics produced at the voice source? J. Sing. 65(5), 575\u2013576 (2009)","journal-title":"J. Sing."},{"issue":"6","key":"1496_CR50","doi-asserted-by":"publisher","first-page":"708","DOI":"10.1109\/89.876309","volume":"8","author":"T Tolonen","year":"2000","unstructured":"T. Tolonen et al., A computationally efficient multipitch analysis model. IEEE Trans. Speech Audio Process. 8(6), 708\u2013716 (2000)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"6","key":"1496_CR51","doi-asserted-by":"publisher","first-page":"4559","DOI":"10.1121\/1.2916590","volume":"123","author":"SA Zahorian","year":"2008","unstructured":"S.A. Zahorian et al., A spectral\/temporal method for robust fundamental frequency tracking. J. Acoust. Soc. Am. 123(6), 4559\u20134571 (2008)","journal-title":"J. Acoust. Soc. Am."}],"container-title":["Circuits, Systems, and Signal Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-020-01496-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00034-020-01496-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00034-020-01496-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,10]],"date-time":"2024-08-10T12:40:55Z","timestamp":1723293655000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00034-020-01496-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,7,21]]},"references-count":51,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2021,2]]}},"alternative-id":["1496"],"URL":"https:\/\/doi.org\/10.1007\/s00034-020-01496-6","relation":{},"ISSN":["0278-081X","1531-5878"],"issn-type":[{"type":"print","value":"0278-081X"},{"type":"electronic","value":"1531-5878"}],"subject":[],"published":{"date-parts":[[2020,7,21]]},"assertion":[{"value":"25 June 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 June 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 June 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}