{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,19]],"date-time":"2025-12-19T09:33:28Z","timestamp":1766136808587},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,2,3]],"date-time":"2017-02-03T00:00:00Z","timestamp":1486080000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"name":"CompMusic","award":["ERC grant agreement 267583"],"award-info":[{"award-number":["ERC grant agreement 267583"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2017,3]]},"DOI":"10.1007\/s10772-017-9397-1","type":"journal-article","created":{"date-parts":[[2017,2,3]],"date-time":"2017-02-03T14:15:03Z","timestamp":1486131303000},"page":"185-204","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Melody extraction from music using modified group delay functions"],"prefix":"10.1007","volume":"20","author":[{"given":"Rajeev","family":"Rajan","sequence":"first","affiliation":[]},{"given":"Manaswi","family":"Misra","sequence":"additional","affiliation":[]},{"given":"Hema A.","family":"Murthy","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,2,3]]},"reference":[{"issue":"3","key":"9397_CR1","doi-asserted-by":"crossref","first-page":"520","DOI":"10.1109\/TASL.2012.2227731","volume":"21","author":"V. Arora","year":"2013","unstructured":"Arora, V., & Behera, L. (2013). On-line melody extraction from polyphonic audio using harmonic cluster tracking. IEEE Transactions on Audio Speech and Language Processing, 21(3), 520\u2013530.","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"key":"9397_CR2","unstructured":"Bello, J. P. (2003). Towards the automated analysis of simple polyphonic music: A knowledge based approach. Ph.D. Diss., University of London, Queen Mary."},{"key":"9397_CR3","unstructured":"Bittner, R.\u00a0M.,\u00a0Salamon, J.,\u00a0Tierney, M.,\u00a0Mauch, M.,\u00a0Cannam, C., & Bello, J.\u00a0P. (2014). Medleydb: A multitrack dataset for annotation-intensive mir research. In Proceedings of the international society for music information retrieval (ISMIR), Taipei, Taiwan."},{"key":"9397_CR4","unstructured":"Brossier, P.\u00a0M. (2005, September). Fast melody extraction using aubio(brossier), mirex-2005. In 4th Music information retrieval evaluation eXchange (MIREX), extended abstract (pp. 325\u2013333)."},{"key":"9397_CR5","unstructured":"Cancela, P.\u00a0(2008). Tracking melody in polyphonic audio. In 4th music information retrieval evaluation eXchange (MIREX), extended abstract."},{"key":"9397_CR6","unstructured":"Cao, C.,\u00a0Li,\u00a0M.,\u00a0Liu, J., &\u00a0Yan, Y. (2007). Singing melody extraction in polyphonic music by harmonic tracking. In Proceedings of international society for music information retrieval (International Society for Music Information Retrieval conference) (pp. 373\u2013374)."},{"key":"9397_CR7","unstructured":"Dressler, K.\u00a0(2011, October). An auditory streaming approach for melody extraction from polyphonic music. In Proceedings of international society for music information retrieval conference (pp. 19\u201324)."},{"key":"9397_CR18","doi-asserted-by":"crossref","unstructured":"Durrieu, J. L.,\u00a0Richard,\u00a0G., &\u00a0Fvotte, C. (2010). Source\/filter model for unsupervised main melody extraction from polyphonic audio signals. IEEE transactions on audio, speech, and language processing (pp. 564\u2013575).","DOI":"10.1109\/TASL.2010.2041114"},{"key":"9397_CR8","unstructured":"Goto, M.,\u00a0&\u00a0Hayamizu, S. (1999, May) A real-time music scene description system: Detecting melody and bass lines in audio signals. In Working notes of the IJCAI-99 workshop on computational auditory scene analysis (pp. 31\u201340)."},{"key":"9397_CR10","unstructured":"Hsu, C.-L., Chen, L.-Y., Jang, J.-S.\u00a0R., & Li, H.-J. (2009). Singing pitch extraction fom monaural polyphonic songs by contextuual audio modeling and singing harmonic enhancement. In Proceedings of the 10th international society for music information retrieval conference (pp. 201\u2013206)."},{"key":"9397_CR9","unstructured":"Hsu, C.\u00a0L., & Jang, J.\u00a0S. (2010, May) Singing pitch extraction by voice vibrato\/tremolo estimation and instrument partial deletion. In Proceedings of international society for music information retrieval (International Society for Music Information Retrieval Conference) (pp. 525\u2013530)."},{"issue":"5","key":"9397_CR11","doi-asserted-by":"crossref","first-page":"1482","DOI":"10.1109\/TASL.2011.2182510","volume":"20","author":"C.-L. Hsu","year":"2012","unstructured":"Hsu, C.-L., Wang, D., Jang, J.-S. R., & Hu, K. (2012). A tandem algorithm for singing pitch extraction and voice separation from music accompaniment. IEEE Transactions on Audio, Speech and Langauge Processing, 20(5), 1482\u20131491.","journal-title":"IEEE Transactions on Audio, Speech and Langauge Processing"},{"issue":"8","key":"9397_CR12","doi-asserted-by":"crossref","first-page":"2067","DOI":"10.1109\/TASL.2010.2041110","volume":"18","author":"G. Hu","year":"2010","unstructured":"Hu, G., & Wang, D. L. (2010). A tandem algorithm for pitch estimation and voiced speech segregation. IEEE Transactions on Audio Speech Language Processing, 18(8), 2067\u20132079.","journal-title":"IEEE Transactions on Audio Speech Language Processing"},{"issue":"12","key":"9397_CR13","doi-asserted-by":"crossref","first-page":"2127","DOI":"10.1109\/29.61539","volume":"38","author":"D. Jones","year":"1990","unstructured":"Jones, D.,\u00a0&\u00a0Parks, T. (1990). A high-resolution data-adaptive time-frequency representation. IEEE Transactions on Acoustics, Speech, and Signal Processing, 38(12), 2127\u20132135, 1990.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9397_CR14","unstructured":"Joo, S.,\u00a0Jo,\u00a0S., & Yoo, C.\u00a0D. (2010). Melody extraction from polyphonic audio signal mirex-2010. In 6th Music information retrieval evaluation exchange (MIREX), 2010."},{"key":"9397_CR15","unstructured":"Joo, S.,\u00a0Park,\u00a0S.,\u00a0Jo, S., & Yo, C.\u00a0D. (2011). Melody extraction based on harmonic coded structures. In 12th international society for music information retrieval conference (ISMIR 2011) (pp. 227 \u2013232)."},{"key":"9397_CR16","unstructured":"Kitahara, T. (2006). Computational musical instrument recognition and its application to content-based music information retrieval. Ph.D. Diss., Kyoto University, Japan."},{"key":"9397_CR17","unstructured":"Kum, S.,\u00a0Oh,\u00a0C., &\u00a0Nam, J. (2016). Melody extraction on vocal segments using multi-column deep neural networks. In Proceedings of 17th international society for music information retrieval (ISMIR)."},{"key":"9397_CR19","doi-asserted-by":"crossref","unstructured":"Mauch, M.,\u00a0&\u00a0Dixon, S. (2014, April). Pyin: A fundamental frequency estimator using probabilistic threshold distributions. In Proceedings of IEEE international conference on acoustics, speech, and signal processing (pp. 659\u2013663).","DOI":"10.1109\/ICASSP.2014.6853678"},{"key":"9397_CR20","unstructured":"Murthy, H. A. (1991, December). Algorithms for processing fourier transform phase of signals. PhD Dissertation, Department of Computer Science and Engg, Indian Institute of Technology, Madras, India."},{"key":"9397_CR21","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1016\/0167-6393(91)90011-H","volume":"10","author":"HA Murthy","year":"1991","unstructured":"Murthy, H. A., & Yegnanarayana, B. (1991a). Formant extraction from minimum phase group delay function. Speech Communications, 10, 209\u2013221.","journal-title":"Speech Communications"},{"key":"9397_CR22","doi-asserted-by":"crossref","first-page":"259","DOI":"10.1016\/0165-1684(91)90014-A","volume":"22","author":"HA Murthy","year":"1991","unstructured":"Murthy, H. A., & Yegnanarayana, B. (1991b). Speech processing using group delay functions. Signal Processing, 22, 259\u2013267.","journal-title":"Signal Processing"},{"issue":"5","key":"9397_CR23","doi-asserted-by":"crossref","first-page":"745","DOI":"10.1007\/s12046-011-0045-1","volume":"36","author":"HA Murthy","year":"2011","unstructured":"Murthy, H. A., & Yegnanarayana, B. (2011). Group delay functions and its application to speech processing. Sadhana, 36(5), 745\u2013782.","journal-title":"Sadhana"},{"key":"9397_CR24","doi-asserted-by":"crossref","first-page":"941","DOI":"10.1049\/el:20030616","volume":"39","author":"T Nagarajan","year":"2003","unstructured":"Nagarajan, T., Prasad, V. K., & Murthy, H. A. (2003). Minimum phase signal derived from the root cepstrum. IEEE Electronics Letters, 39, 941\u2013942.","journal-title":"IEEE Electronics Letters"},{"key":"9397_CR25","volume-title":"Discrete time signal processing","author":"AV Oppenheim","year":"1990","unstructured":"Oppenheim, A. V., & Schafer, R. W. (1990). Discrete time signal processing. New Jersey: Prentice Hall Inc."},{"key":"9397_CR26","doi-asserted-by":"crossref","unstructured":"Painter, T., &\u00a0Spanias, A. (2000, April). Perceptual coding of digital audio. In Proceedings of IEEE (Vol. 88, No.\u00a04, pp. 451\u2013513).","DOI":"10.1109\/5.842996"},{"key":"9397_CR27","doi-asserted-by":"crossref","unstructured":"Poliner, G.,\u00a0Ellis,\u00a0D.,\u00a0Ehmann, A.,\u00a0Gomez, E.,\u00a0Streich, S., &\u00a0Ong, B. (2007, May). Melody transcription from music audio:approaches and evaluation. In Proceedings of the IEEE international conference on audio, speech and language processing (Vol.\u00a015, No.\u00a04, pp. 1247\u20131256).","DOI":"10.1109\/TASL.2006.889797"},{"key":"9397_CR28","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1016\/j.specom.2003.12.002","volume":"42","author":"VK Prasad","year":"2004","unstructured":"Prasad, V. K., Nagarajan, T., & Murthy, H. A. (2004). Automatic segmentation of continuous speech using minimum phase group delay functions. Speech Communications, 42, 429\u2013446.","journal-title":"Speech Communications"},{"key":"9397_CR29","doi-asserted-by":"crossref","unstructured":"Rabiner, L., Cheng,\u00a0M.\u00a0J., Rosenberg, A.\u00a0E., & McGonegal, C.\u00a0A. (1976, October). A comparative performance study of several pitch detection algorithms. IEEE transactions on acoustics, speech and signal processing (Vol. ASSP-24, No.\u00a05, pp. 399\u2013418).","DOI":"10.1109\/TASSP.1976.1162846"},{"key":"9397_CR30","doi-asserted-by":"crossref","unstructured":"Rajan, R.,\u00a0& Murthy, H.\u00a0A. (2013a, May). Group delay based melody monopitch extraction from music. In Proceedings of the IEEE international conference on audio, speech and signal processing (pp. 186\u2013190).","DOI":"10.1109\/ICASSP.2013.6637634"},{"key":"9397_CR31","doi-asserted-by":"crossref","unstructured":"Rajan, R.,\u00a0& Murthy, H.\u00a0A. (2013b, February). Melodic pitch extraction from music signals using modified group delay functions. In 2013 National conference on proceedings of the communications (NCC) (pp. 1\u20135).","DOI":"10.1109\/NCC.2013.6487986"},{"key":"9397_CR32","unstructured":"Rajan, R.,\u00a0& Murthy, H.\u00a0A. (2016). Modified group delay based multipitch estimation in co-channel speech. arXiv:1603.05435 ."},{"key":"9397_CR33","unstructured":"Ramakrishnan, S.,\u00a0Rao,\u00a0V., &\u00a0Rao, P. (2008, February). Singing voice detection in north indian classical music. In Proceedings of the national conference on communications (NCC)."},{"issue":"4","key":"9397_CR36","first-page":"378","volume":"52","author":"P Rao","year":"2004","unstructured":"Rao, P., & Shandilya, S. (2004). On the detection of melodic pitch in a percussive background. The Journal of the Audio Engineering Society, 52(4), 378\u2013391.","journal-title":"The Journal of the Audio Engineering Society"},{"issue":"1","key":"9397_CR34","doi-asserted-by":"crossref","first-page":"342","DOI":"10.1109\/TASL.2011.2162319","volume":"20","author":"V Rao","year":"2012","unstructured":"Rao, V., Gaddipati, P., & Rao, P. (2012). Signal-driven window length adaptation for sinusoid detection in polyphonic music. IEEE Transactions on Audio Speech and Language Processing, 20(1), 342\u2013348.","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"issue":"8","key":"9397_CR35","doi-asserted-by":"crossref","first-page":"2145","DOI":"10.1109\/TASL.2010.2042124","volume":"18","author":"V Rao","year":"2010","unstructured":"Rao, V., & Rao, P. (2010). Vocal melody extraction in the presence of pitched accompaniment in polyphonic music. IEEE Transactions on Audio, Speech, and Language Processing, 18(8), 2145\u20132154.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"issue":"3","key":"9397_CR37","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1162\/comj.2008.32.3.72","volume":"32","author":"M Ryynanen","year":"2008","unstructured":"Ryynanen, M., & Klapuri, A. (2008). Automatic transcription of melody, base line, and chords in polyphonic music. Computer Music Journal, 32(3), 72\u201386.","journal-title":"Computer Music Journal"},{"issue":"6","key":"9397_CR38","doi-asserted-by":"crossref","first-page":"1759","DOI":"10.1109\/TASL.2012.2188515","volume":"20","author":"J Salamon","year":"2012","unstructured":"Salamon, J., & Gomez, E. (2012). Melody extraction from polyphonic music signals using pitch contours characteristics. IEEE Transactions on Audio Speech and Language Processing, 20(6), 1759\u20131770.","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"issue":"2","key":"9397_CR40","doi-asserted-by":"crossref","first-page":"114","DOI":"10.1109\/MSP.2013.2271648","volume":"31","author":"J Salamon","year":"2014","unstructured":"Salamon, J., Gomez, E., Ellis, D. P. W., & Richard, G. (2014). Melody extraction from polyphonic music signals: Approaches, applications and challenges. IEEE Signal Processing Magazine, 31(2), 114\u2013118.","journal-title":"IEEE Signal Processing Magazine"},{"key":"9397_CR39","unstructured":"Salamon, J.,\u00a0Gomez,\u00a0E.,\u00a0Ellis, D., &\u00a0Richard, G. (2015, April). Melody extraction from music recordings. In IEEE signal processing society."},{"key":"9397_CR41","doi-asserted-by":"crossref","unstructured":"Sebastian, J., Kumar,\u00a0P.\u00a0A.\u00a0M., & Murthy, H.\u00a0A. (2016). An analysis of the high resolution property of group delay function with applications to audio signal processing. Speech Communication.","DOI":"10.1016\/j.specom.2015.12.008"},{"key":"9397_CR42","doi-asserted-by":"crossref","unstructured":"Shanmugam, S.\u00a0A., &\u00a0Murthy, H. (2014, September). A hybrid approach to segmentation of speech using group delay processing and HMM based embedded reestimation. In Proceedings of fifteenth annual conference of the international speech communication association (INTERSPEECH 2014).","DOI":"10.21437\/Interspeech.2014-390"},{"key":"9397_CR43","doi-asserted-by":"crossref","unstructured":"Tachibana, H., Ono, T., Ono, N., & Sagayama, S. (2010, April). Melody line estimation in homophonic music audio signals based on temporal-variability of melodic source. In Proceedings of IEEE international conference acoustics, speech, signal processing (pp. 425\u2013428).","DOI":"10.1109\/ICASSP.2010.5495764"},{"key":"9397_CR44","unstructured":"Thornburg, H.\u00a0(2003, September). Detection and modeling of transient audio signals with prior information. Ph.D. Thesis, Standford University."},{"key":"9397_CR45","doi-asserted-by":"crossref","unstructured":"Veldhuis, R.\u00a0(2000, October). Consistent pitch marking. In Proceedings of sixth international conference on spoken language processing (Vol.\u00a03, pp. 207\u2013210).","DOI":"10.21437\/ICSLP.2000-514"},{"key":"9397_CR46","doi-asserted-by":"crossref","unstructured":"Vijayan, K.\u00a0Kumar,\u00a0V., & Murty, K.\u00a0S.\u00a0R. (2014, September). Feature extraction from analytic phase of speech signals for speaker verification. In Proceedings of fifteenth annual conference of the international speech communication association (INTERSPEECH 2014) (pp. 1658\u20131662).","DOI":"10.21437\/Interspeech.2014-392"},{"key":"9397_CR51","unstructured":"Wavesurfer-an open source speech tool. (2000) [Online]. http:\/\/citeseerx.ist.psu.edu\/viewdoc\/download?doi=10.1.1.38.1118"},{"key":"9397_CR52","unstructured":"http:\/\/www.music-ir.org\/mirex\/wiki\/2012:mirex2012-results ."},{"issue":"9","key":"9397_CR47","doi-asserted-by":"crossref","first-page":"2281","DOI":"10.1109\/78.157227","volume":"40","author":"B Yegnanarayana","year":"1992","unstructured":"Yegnanarayana, B., & Murthy, H. A. (1992). Significance of group delay functions in spectrum estimation. IEEE Transactions on Signal Processing, 40(9), 2281\u20132289.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"9397_CR48","doi-asserted-by":"crossref","unstructured":"Yegnanarayana, B., Murthy, H. A., & Ramachandran, V. R. (1991, May). Processing of noisy speech using modified group delay functions. In Proceedings of the IEEE international conference on audio, speech and signal processing (pp. 945\u2013948).","DOI":"10.1109\/ICASSP.1991.150496"},{"key":"9397_CR49","doi-asserted-by":"crossref","unstructured":"Yeh, T. C., Wu, M. J., Jang, J. S. R., Chang, W. L., & Liao, I. B. (2012, March). A hybrid approach to singing pitch extraction based on trend estimation and hidden markov models. In Proceedings of IEEE international conference on acoustics speech and signal processing (ICASSP) Kyoto, Japan (pp. 457\u2013460).","DOI":"10.1109\/ICASSP.2012.6287915"},{"key":"9397_CR50","unstructured":"Yoon, J. -Y., Song, C.-J., Lee, S.-P., &\u00a0Park, H. (2011). Extracting predominant melody of polyphonic music based on harmonic structure. In 7th Music information retrieval evaluation eXchange (MIREX), extended abstract."}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9397-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s10772-017-9397-1\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-017-9397-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T02:11:40Z","timestamp":1692670300000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s10772-017-9397-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2,3]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2017,3]]}},"alternative-id":["9397"],"URL":"https:\/\/doi.org\/10.1007\/s10772-017-9397-1","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"value":"1381-2416","type":"print"},{"value":"1572-8110","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,2,3]]}}}