{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T12:01:36Z","timestamp":1752667296681,"version":"3.37.3"},"reference-count":46,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T00:00:00Z","timestamp":1641513600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T00:00:00Z","timestamp":1641513600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Speech Technol"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1007\/s10772-021-09946-5","type":"journal-article","created":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T11:03:33Z","timestamp":1641553413000},"page":"77-93","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Exploring human voice prosodic features and the interaction between the excitation signal and vocal tract for Assamese speech"],"prefix":"10.1007","volume":"26","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7808-7156","authenticated-orcid":false,"given":"Sippee","family":"Bharadwaj","sequence":"first","affiliation":[]},{"given":"Purnendu Bikash","family":"Acharjee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,1,7]]},"reference":[{"issue":"6","key":"9946_CR1","doi-asserted-by":"publisher","first-page":"562","DOI":"10.1109\/TASSP.1975.1162745","volume":"23","author":"TV Ananthapadmanabha","year":"1975","unstructured":"Ananthapadmanabha, T. V., & Yegnanarayana, B. (1975). Epoch extraction of voiced speech. IEEE Transactions on Acoustics, Speech, and Signal Processing, 23(6), 562\u2013570.","journal-title":"IEEE Transactions on Acoustics, Speech, and Signal Processing"},{"key":"9946_CR3","first-page":"57","volume":"2","author":"A Balyan","year":"2013","unstructured":"Balyan, A., Agrawal, S. S., & Dev, A. (2013) Speech synthesis: Review, IJERT, 2: 57-75","journal-title":"IJERT"},{"key":"9946_CR4","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.specom.2005.02.016","volume":"46","author":"T Banziger","year":"2005","unstructured":"Banziger, T., & Scherer, K. R. (2005). The role of intonation in emotional expressions. Speech Communication, 46, 252\u2013267.","journal-title":"Speech Communication"},{"issue":"2","key":"9946_CR5","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1353\/lan.2007.0053","volume":"83","author":"BZC Beaver","year":"2008","unstructured":"Beaver, B. Z. C., Flemming, E., Jaeger, T. F., & Wolters, M. (2008). When semantics meets phonetics: Acoustical studies of second-occurrence focus. Journal of the Linguistic Society of America, 83(2), 245\u2013276.","journal-title":"Journal of the Linguistic Society of America"},{"doi-asserted-by":"crossref","unstructured":"Benesty, J., Sondhi, M. M., & Huang, Y. (2008). Springer handbook of speech processing. Springer.","key":"9946_CR7","DOI":"10.1007\/978-3-540-49127-9"},{"unstructured":"Cahn, J. E. (1990). The generation of effect in synthesized speech. In JAVIOS (pp. 1\u201319).","key":"9946_CR8"},{"key":"9946_CR10","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/S0167-6393(02)00071-7","volume":"40","author":"R Cowie","year":"2003","unstructured":"Cowie, R., & Cornelius, R. R. (2003). Describing the emotional states that are expressed in speech. Speech Communication, 40, 5\u201332.","journal-title":"Speech Communication"},{"doi-asserted-by":"crossref","unstructured":"Dellaert, F., Polzin, T., & Waibel, A. (1996). Recognizing emotions in speech. In ICSLP 96.","key":"9946_CR11","DOI":"10.21437\/ICSLP.1996-462"},{"issue":"1","key":"9946_CR12","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1016\/j.csl.2011.03.003","volume":"26","author":"T Drugman","year":"2012","unstructured":"Drugman, T., Bozkurt, B., & Dutoit, T. (2012). A comparative study of glottal source estimation techniques. Computer Speech & Language, 26(1), 20\u201334.","journal-title":"Computer Speech & Language"},{"doi-asserted-by":"crossref","unstructured":"Gangamohan, P., Sudarsana R. K., & Yegnanarayana, B (2013) Analysis of emotional speech at subsegmental level. INTERSPEECH. Vol. 2013.","key":"9946_CR13","DOI":"10.21437\/Interspeech.2013-20"},{"unstructured":"Goswami, G. C. (1982). Structure of Assamese (1st ed.). Department of Publication Gauhati University.","key":"9946_CR14"},{"issue":"44","key":"9946_CR15","first-page":"173","volume":"2002","author":"P Hansson","year":"2002","unstructured":"Hansson, P. (2002). Prosodic phrasing and articulation rate variation. Proc. FONETIK, 2002(44), 173\u2013176.","journal-title":"Proc. FONETIK"},{"doi-asserted-by":"crossref","unstructured":"Hashizawa, Y., Takeda, S., Hamzah, M.D., & Ohyama, G. (2004). On the differences in prosodic features of emotional expressions in Japanese speech according to the degree of the emotion. In: Proceedings of 1993 international conference on acoustics, speech prosody, Nara, pp. 655\u2013658.","key":"9946_CR16","DOI":"10.21437\/SpeechProsody.2004-151"},{"doi-asserted-by":"crossref","unstructured":"Jankowski, C. R. Jr, Quatieri, T. F., & Reynolds, D. A. (1995) Measuring fine structure in speech: Application to speaker identification. In: IEEE international conference on acoustics, speech, and signal processing. Detroit pp. 325\u2013328.","key":"9946_CR17","DOI":"10.1109\/ICASSP.1995.479539"},{"unstructured":"Jia, Y., Huang, D., Liu, W., Dong, Y., Yu, S., & Wang, H. (2008). Text normalization in mandarin text-to-speech system. In Proceeding of the IEEE international conference on acoustics, speech and signal processing, pp. 4693\u20134696.","key":"9946_CR18"},{"unstructured":"Joseph, M. A., Guruprasad, S., Yegnanarayana, Y. (2006). Extracting formants from short segments using group delay functions. In: Interspeech. Pittsburgh, pp. 1009\u20131012.","key":"9946_CR19"},{"doi-asserted-by":"crossref","unstructured":"Kadiri, S. R. et al. (2015). Analysis of excitation source features of speech for emotion recognition In Sixteenth annual conference of the international speech communication association.","key":"9946_CR20","DOI":"10.21437\/Interspeech.2015-329"},{"unstructured":"Kakati, B. (2007). Assamese its formation and development. 5th edn. LBS Publication","key":"9946_CR21"},{"unstructured":"Kumar, K. S., Reddy, M. S. H., Murty, K. S. R., Yegnanarayana, B. (2009). Analysis of laugh signals for detecting in continuous speech. In: Interspeech. pp. 1591\u20131594.","key":"9946_CR22"},{"key":"9946_CR23","first-page":"246","volume":"5","author":"M Kuremastsu","year":"2008","unstructured":"Kuremastsu, M., et al. (2008). An extraction of emotion in human speech using speech synthesize and classifiers for each emotion. International Journal of Circuits Systems and Signal Processing., 5, 246\u2013251.","journal-title":"International Journal of Circuits Systems and Signal Processing."},{"doi-asserted-by":"crossref","unstructured":"Lee, C., Narayanan, S., & Pieraccini, R. (2001). Recognition of negative emotions from speech signal. In IEEE workshop on automatic speech and understanding, pp. 240\u2013243","key":"9946_CR24","DOI":"10.1109\/ASRU.2001.1034632"},{"key":"9946_CR25","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1109\/89.279274","volume":"2","author":"YKC Ma","year":"1994","unstructured":"Ma, Y. K. C., & Willems, L. F. (1994). A Frobenius norm approach to glottal closure detection from the speech signal. IEEE Transactions on Speech and Audio Processing, 2, 258\u2013265.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"unstructured":"Mattingly, I. G. (1974). Speech synthesis for phonetic and phonological models. In T. A. Sebeok (Ed.), Current trends in linguistics (Vol. 12, pp. 2451\u20132487). Mouton.","key":"9946_CR26"},{"doi-asserted-by":"crossref","unstructured":"McLoughlin, I. (2009). Applied speech and audio processing, with MATLAB examples. Cambridge University Press.","key":"9946_CR27","DOI":"10.1017\/CBO9780511609640"},{"issue":"5-6","key":"9946_CR28","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1016\/0167-6393(90)90021-Z","volume":"9","author":"E Moulines","year":"1990","unstructured":"Moulines, E., & Charpentier, F. (1990). Pitch-synchronous waveform processing techniques for text-to-speech synthesis using diphones. Speech Communication, 9(5\u20136), 453\u2013467.","journal-title":"Speech Communication"},{"key":"9946_CR29","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1016\/0167-6393(95)00005-9","volume":"16","author":"IR Murray","year":"1995","unstructured":"Murray, I. R., & Arnott, J. L. (1995). Implementation and testing of a system for producing emotion by rule in synthetic speech. Speech Communication, 16, 369\u2013390.","journal-title":"Speech Communication"},{"issue":"6","key":"9946_CR30","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1109\/LSP.2009.2016829","volume":"16","author":"KSR Murty","year":"2009","unstructured":"Murty, K. S. R., Yegnanarayana, B., & Joseph, M. A. (2009). Characterization of glottal activity from speech signals. IEEE Signal Processing Letters, 16(6), 469\u2013472.","journal-title":"IEEE Signal Processing Letters"},{"unstructured":"Murty, K. S. R. (2009). Significance of excitation source information for speech analysis. Ph.D. thesis, Department of Computer Science and Engineering, Indian Institute of Technology Madras.","key":"9946_CR31"},{"issue":"8","key":"9946_CR32","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1109\/97.935737","volume":"8","author":"JL Navarro-Mesa","year":"2001","unstructured":"Navarro-Mesa, J. L., Lleida-Solano, E., & Moreno-Bilbao, A. (2001). A new method for epoch detection based on the Cohen\u2019s class of time-frequency representations. IEEE Signal Processing Letters, 8(8), 225\u2013227.","journal-title":"IEEE Signal Processing Letters"},{"key":"9946_CR33","doi-asserted-by":"publisher","first-page":"603","DOI":"10.1016\/S0167-6393(03)00099-2","volume":"41","author":"TL Nwe","year":"2003","unstructured":"Nwe, T. L., Foo, S. W., & Silva, L. C. D. (2003). Speech emotion recognition using hidden Markov models. Speech Communication, 41, 603\u2013623.","journal-title":"Speech Communication"},{"key":"9946_CR34","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1007\/s00530-020-00659-4","volume":"26","author":"SP Panda","year":"2020","unstructured":"Panda, S. P., Ajit, K. N., & Satyananda, C. R. (2020). A survey on speech synthesis techniques in Indian languages. Multimedia Systems, 26, 453\u2013478.","journal-title":"Multimedia Systems"},{"key":"9946_CR35","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1016\/j.specom.2019.06.001","volume":"111","author":"H Prafianto","year":"2019","unstructured":"Prafianto, H., Nose, T., Chiba, Y., & Ito, A. (2019). Improving human scoring of prosody using parametric speech synthesis. Speech Communication, 111, 14\u201321.","journal-title":"Speech Communication"},{"unstructured":"Quatieri, T. F. (2001). Discrete-time speech signal processing principles and practices. Prentice Hall PTR.","key":"9946_CR36"},{"doi-asserted-by":"crossref","unstructured":"Raitio, T. et al. (2011). Utilizing glottal source pulse library for generating improved excitation signal for HMM-based speech synthesis. In 2011 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE.","key":"9946_CR37","DOI":"10.1109\/ICASSP.2011.5947370"},{"doi-asserted-by":"crossref","unstructured":"Rao, K. S., & Shashidhar, G. K. (2013). Robust emotion recognition using spectral and prosodic features. Springer.","key":"9946_CR38","DOI":"10.1007\/978-1-4614-6360-3"},{"doi-asserted-by":"crossref","unstructured":"Rathina, X. A., Mehata, K. M., & Ponnavaikko, M. (2012b). A study of prosodic features of emotional speech: Advances in computer science engineering & applications (pp. 41\u201349). Springer.","key":"9946_CR39","DOI":"10.1007\/978-3-642-30157-5_5"},{"unstructured":"Rathina, X. A., Mehata, K. M., & Ponnavaikko, M. (2012a). A study of prosodic features of emotional speech. Advances in computer science engineering & applications. Springer.","key":"9946_CR40"},{"unstructured":"Schroder, M., & Cowie, R. (2006). Issues in emotion-oriented computing toward a shared understanding. In Workshop on emotion and computing (HUMAINE).","key":"9946_CR41"},{"doi-asserted-by":"crossref","unstructured":"Schroder, M. (2001). Emotional speech synthesis: A review. In Seventh European conference on speech communication and technology, Eurospeech Aalborg.","key":"9946_CR42","DOI":"10.21437\/Eurospeech.2001-150"},{"issue":"4","key":"9946_CR43","doi-asserted-by":"publisher","first-page":"2061","DOI":"10.1121\/1.3203668","volume":"126","author":"G Seshadri","year":"2009","unstructured":"Seshadri, G., & Yegnanarayana, B. (2009). Perceived loudness of speech based on the characteristics of excitation source. Journal of Acoustical Society of America, 126(4), 2061\u20132071.","journal-title":"Journal of Acoustical Society of America"},{"issue":"10","key":"9946_CR44","doi-asserted-by":"publisher","first-page":"45","DOI":"10.5755\/j01.eee.18.10.3059","volume":"18","author":"M Sigmund","year":"2012","unstructured":"Sigmund, M. (2012). Influence of psychological stress on formant structure of vowels. Elektronika Ir Elektrotechnika, 18(10), 45\u201348.","journal-title":"Elektronika Ir Elektrotechnika"},{"unstructured":"Tseng, C & Lee, Y. (2004). Intensity in relation to prosody organization. In International symposium on Chinese spoken language processing, pp. 217\u2013220, Hong-Kong.","key":"9946_CR45"},{"doi-asserted-by":"crossref","unstructured":"Tuan, V. N. & d\u2019Alessandro, C. (1999). Robust glottal closure detection using the wavelet transform. In European conference on speech communication and technology, Budapest, pp. 2805\u20132808.","key":"9946_CR46","DOI":"10.21437\/Eurospeech.1999-619"},{"issue":"5","key":"9946_CR47","doi-asserted-by":"publisher","first-page":"651","DOI":"10.1007\/s12046-011-0046-0","volume":"36","author":"B Yegnanarayana","year":"2011","unstructured":"Yegnanarayana, B., & Gangashetty, S. V. (2011). Epoch-based analysis of speech signals. Sadhana, 36(5), 651\u2013697.","journal-title":"Sadhana"},{"issue":"4","key":"9946_CR48","doi-asserted-by":"publisher","first-page":"614","DOI":"10.1109\/TASL.2008.2012194","volume":"17","author":"B Yegnanarayana","year":"2009","unstructured":"Yegnanarayana, B., & Murty, K. S. R. (2009). Event-based instantaneous fundamental frequency estimation from speech signals. IEEE Transactions on Audio, Speech, and Language Processing, 17(4), 614\u2013624.","journal-title":"IEEE Transactions on Audio, Speech, and Language Processing"},{"doi-asserted-by":"crossref","unstructured":"Zhou, J., Su, X., Ylianttila, M., & Riekki, J. (2012). Exploring pervasive service computing opportunities for pursuing successful ageing. In The gerontologist, pp. 73\u201382.","key":"9946_CR49","DOI":"10.1007\/978-3-642-27916-4_9"}],"container-title":["International Journal of Speech Technology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09946-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10772-021-09946-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10772-021-09946-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,15]],"date-time":"2024-09-15T21:37:45Z","timestamp":1726436265000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10772-021-09946-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,7]]},"references-count":46,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,3]]}},"alternative-id":["9946"],"URL":"https:\/\/doi.org\/10.1007\/s10772-021-09946-5","relation":{},"ISSN":["1381-2416","1572-8110"],"issn-type":[{"type":"print","value":"1381-2416"},{"type":"electronic","value":"1572-8110"}],"subject":[],"published":{"date-parts":[[2022,1,7]]},"assertion":[{"value":"10 May 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}