{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T14:38:04Z","timestamp":1740148684685,"version":"3.37.3"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2018,2,8]],"date-time":"2018-02-08T00:00:00Z","timestamp":1518048000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100008343","name":"Beijing Language and Culture University","doi-asserted-by":"publisher","award":["16PT05","16YCX163"],"award-info":[{"award-number":["16PT05","16YCX163"]}],"id":[{"id":"10.13039\/501100008343","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2018,7]]},"DOI":"10.1007\/s11265-018-1334-2","type":"journal-article","created":{"date-parts":[[2018,2,8]],"date-time":"2018-02-08T15:49:20Z","timestamp":1518104960000},"page":"1077-1087","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["Improving Mandarin Tone Recognition Based on DNN by Combining Acoustic and Articulatory Features Using Extended Recognition Networks"],"prefix":"10.1007","volume":"90","author":[{"given":"Ju","family":"Lin","sequence":"first","affiliation":[]},{"given":"Wei","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yingming","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Yanlu","family":"Xie","sequence":"additional","affiliation":[]},{"given":"Nancy F.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Sabato Marco","family":"Siniscalchi","sequence":"additional","affiliation":[]},{"given":"Jinsong","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Chin-Hui","family":"Lee","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,2,8]]},"reference":[{"issue":"7","key":"1334_CR1","doi-asserted-by":"publisher","first-page":"988","DOI":"10.1109\/29.1620","volume":"36","author":"WJ Yang","year":"1988","unstructured":"Yang, W. J., Lee, J. C., Chang, Y. C., & Wang, H. C. (1988). Hidden markov model for mandarin lexical tone recognition. IEEE Transactions on Acoustics Speech & Signal Processing, 36(7), 988\u2013992.","journal-title":"IEEE Transactions on Acoustics Speech & Signal Processing"},{"doi-asserted-by":"crossref","unstructured":"Chang, P. C., Sun, S. W., & Chen, S. H. (1990). Mandarin tone recognition by multi-layer perceptron. IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP) (pp. 517-520). IEEE.","key":"1334_CR2","DOI":"10.1109\/ICASSP.1990.115763"},{"key":"1334_CR3","volume-title":"A grammar of spoken Chinese","author":"YR Chao","year":"1965","unstructured":"Chao, Y. R. (1965). A grammar of spoken Chinese. Berkeley: Univ of California Press."},{"issue":"3","key":"1334_CR4","doi-asserted-by":"publisher","first-page":"287","DOI":"10.1109\/89.232612","volume":"1","author":"LS Lee","year":"1993","unstructured":"Lee, L. S., Tseng, C. Y., & Hsieh, C. J. (1993). Improved tone concatenation rules in a formant-based Chinese text-to-speech system. IEEE Transactions on Speech and Audio Processing, 1(3), 287\u2013294.","journal-title":"IEEE Transactions on Speech and Audio Processing"},{"unstructured":"Shih, C. (1987). The phonetics of the Chinese tonal system. Bell Laboratories Technical Memorandum.","key":"1334_CR5"},{"issue":"3","key":"1334_CR6","doi-asserted-by":"publisher","first-page":"S70","DOI":"10.1121\/1.2004881","volume":"68","author":"N Umeda","year":"1980","unstructured":"Umeda, N. (1980). F0 declination is situation dependent. Journal of the Acoustical Society of America, 68(3), S70\u2013S70.","journal-title":"Journal of the Acoustical Society of America"},{"issue":"1","key":"1334_CR7","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1006\/jpho.1999.0086","volume":"27","author":"Y Xu","year":"1999","unstructured":"Xu, Y. (1999). Effects of tone and focus on the formation and alignment of F0 contours. Journal of Phonetics, 27(1), 55\u2013105.","journal-title":"Journal of Phonetics"},{"unstructured":"Wang, Y. B., & Lee, L. S. (2010). Mandarin tone recognition using affine-invariant prosodic features and tone posteriorgram. Medicine, 2850\u20132853.","key":"1334_CR8"},{"issue":"1","key":"1334_CR9","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1145\/595576.595581","volume":"1","author":"T Lee","year":"2002","unstructured":"Lee, T., Lau, W., Wong, Y. W., & Ching, P. C. (2002). Using tone information in Cantonese continuous speech recognition. ACM Transactions on Asian Language Information Processing, 1(1), 83\u2013102.","journal-title":"ACM Transactions on Asian Language Information Processing"},{"issue":"3","key":"1334_CR10","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1016\/j.specom.2004.01.001","volume":"42","author":"J Zhang","year":"2004","unstructured":"Zhang, J., & Hirose, K. (2004). Tone nucleus modeling for Chinese lexical tone recognition. Speech Communication, 42(3), 447\u2013466.","journal-title":"Speech Communication"},{"key":"1334_CR11","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1023\/B:IJST.0000017013.70486.51","volume":"7","author":"G Peng","year":"2004","unstructured":"Peng, G., & Wang, S. Y. (2004). An innovative prosody modeling method for Chinese speech recognition. International Journal of Speech Technology, 7, 129\u2013140.","journal-title":"International Journal of Speech Technology"},{"issue":"5","key":"1334_CR12","doi-asserted-by":"publisher","first-page":"2936","DOI":"10.1121\/1.2717413","volume":"121","author":"Y Qian","year":"2007","unstructured":"Qian, Y., Lee, T., & Soong, F. K. (2007). Tone recognition in continuous Cantonese speech using supratone models. The Journal of the Acoustical Society of America, 121(5), 2936\u20132945.","journal-title":"The Journal of the Acoustical Society of America"},{"doi-asserted-by":"crossref","unstructured":"Ryant, N., Yuan, J., & Liberman, M. (2014). Mandarin tone classification without pitch tracking. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 4868-4872). IEEE.","key":"1334_CR13","DOI":"10.1109\/ICASSP.2014.6854527"},{"doi-asserted-by":"crossref","unstructured":"Ryant, N., Slaney, M., Liberman, M., Shriberg, E., & Yuan, J. (2014). Highly accurate mandarin tone classification in the absence of pitch information. In SPEECHPROSODY 7 -- 7th International conference on Speech Prosody, May 20-23, Dublin, Ireland, Proceedings, 2014, pp. 673-677.","key":"1334_CR14","DOI":"10.21437\/SpeechProsody.2014-122"},{"unstructured":"Zhang, J. (1987). The intrinsic fundamental frequency of vowels and the effect of speech modes on formants. Acta Acustica (pp. 390-393).","key":"1334_CR15"},{"issue":"4","key":"1334_CR16","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1121\/1.1908681","volume":"33","author":"I Lehiste","year":"1961","unstructured":"Lehiste, I., & Peterson, G. E. (1961). Some basic considerations in the analysis of intonation. The Journal of the Acoustical Society of America, 33(4), 419\u2013425.","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"4","key":"1334_CR17","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/B978-0-08-051584-7.50010-3","volume":"28","author":"S Davis","year":"1990","unstructured":"Davis, S., & Mermelstein, P. (1990). Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. Readings in Speech Recognition, 28(4), 65\u201374.","journal-title":"Readings in Speech Recognition"},{"issue":"4","key":"1334_CR18","doi-asserted-by":"publisher","first-page":"1004","DOI":"10.1121\/1.382083","volume":"64","author":"CK Chuang","year":"1978","unstructured":"Chuang, C. K., & Wang, W. S. (1978). Psychophysical pitch biases related to vowel quality, intensity difference, and sequential order. Journal of the Acoustical Society of America, 64(4), 1004\u20131014.","journal-title":"Journal of the Acoustical Society of America"},{"doi-asserted-by":"crossref","unstructured":"Cao, C., Xie, Y., Lin, J., Li, Q., & Zhang, J. (2016). The preliminary study of influence on tone perception from segments. The 10th international symposium on chinese spoken language processing.","key":"1334_CR19","DOI":"10.1109\/ISCSLP.2016.7918475"},{"doi-asserted-by":"crossref","unstructured":"Chao, H., Yang, Z., & Liu, W. (2012). Improved tone modeling by exploiting articulatory features for Mandarin speech recognition. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 4741-4744). IEEE.","key":"1334_CR20","DOI":"10.1109\/ICASSP.2012.6288978"},{"doi-asserted-by":"crossref","unstructured":"Li, W., Siniscalchi, S. M., Chen, N. F., & Lee, C. H. (2016). Using tone-based extended recognition network to detect non-native Mandarin tone mispronunciations. In Signal and Information Processing Association Annual Summit and Conference (APSIPA), 2016 Asia-Pacific (pp. 1-4). IEEE.","key":"1334_CR21","DOI":"10.1109\/APSIPA.2016.7820701"},{"key":"1334_CR22","first-page":"153","volume-title":"Experimental phonetics summary [M]","author":"ZJ Wu","year":"1989","unstructured":"Wu, Z. J., & Lin, M. C. (1989). Experimental phonetics summary [M] (pp. 153\u2013191). Beijing: Higher Education Press."},{"key":"1334_CR23","volume-title":"Fundamentals of Chinese Man-Machine communication","author":"JL Zhang","year":"2010","unstructured":"Zhang, J. L. (2010). Fundamentals of Chinese Man-Machine communication. Shanghai: Shanghai Scientific & Technical Publishers."},{"issue":"3","key":"1334_CR24","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1016\/S0167-6393(01)00020-6","volume":"37","author":"K Kirchhoff","year":"2002","unstructured":"Kirchhoff, K., Fink, G. A., & Sagerer, G. (2002). Combining acoustic and articulatory feature information for robust speech recognition. Speech Communication, 37(3), 303\u2013319.","journal-title":"Speech Communication"},{"doi-asserted-by":"crossref","unstructured":"Li, W., Siniscalchi, S. M., Chen, N. F., & Lee, C. H. (2016). Improving non-native mispronunciation detection and enriching diagnostic feedback with DNN-based speech attribute modeling. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 6135-6139). IEEE.","key":"1334_CR25","DOI":"10.1109\/ICASSP.2016.7472856"},{"key":"1334_CR26","volume-title":"Pattern classification","author":"RO Duda","year":"2012","unstructured":"Duda, R. O., Hart, P. E., & Stork, D. G. (2012). Pattern classification. New York: John Wiley & Sons."},{"doi-asserted-by":"crossref","unstructured":"Gopinath, R. A. (1998). Maximum likelihood modeling with Gaussian distributions for classification. IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 661-664). IEEE.","key":"1334_CR27","DOI":"10.1109\/ICASSP.1998.675351"},{"issue":"2","key":"1334_CR28","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1006\/csla.1998.0043","volume":"12","author":"MJ Gales","year":"1998","unstructured":"Gales, M. J. (1998). Maximum likelihood linear transformations for HMM-based speech recognition. Computer Speech & Language, 12(2), 75\u201398.","journal-title":"Computer Speech & Language"},{"unstructured":"Matsoukas, S., Schwartz, R., Jin, H., & Long, N. (1997). Practical implementations of speaker-adaptive training. Darpa Speech Recognition Workshop, 21(6), 12\u201313.","key":"1334_CR29"},{"doi-asserted-by":"crossref","unstructured":"Liu, C., Ge, F., Pan, F., Dong, B., & Yan, Y. (2009). A one-step tone recognition approach using MSD-HMM for continuous speech. INTERSPEECH, Conference of the International Speech Communication Association, Brighton, United Kingdom, September (pp.3015\u20133018). DBLP.","key":"1334_CR30","DOI":"10.21437\/Interspeech.2009-763"},{"issue":"6","key":"1334_CR31","doi-asserted-by":"publisher","first-page":"1748","DOI":"10.1093\/ietisy\/e91-d.6.1748","volume":"E91-D","author":"XD Wang","year":"2008","unstructured":"Wang, X. D., Hirose, K., Zhang, J. S., & Minematsu, N. (2008). Tone recognition of continuous mandarin speech based on tone nucleus model and neural network. Ieice Transactions on Information & Systems, E91-D(6), 1748\u20131755.","journal-title":"Ieice Transactions on Information & Systems"},{"issue":"3","key":"1334_CR32","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1145\/1386869.1386872","volume":"7","author":"JC Chen","year":"2008","unstructured":"Chen, J. C., & Jang, J. S. R. (2008). Trues: tone recognition using extended segments. ACM Transactions on Asian Language Information Processing (TALIP), 7(3), 10.","journal-title":"ACM Transactions on Asian Language Information Processing (TALIP)"},{"issue":"2","key":"1334_CR33","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner, L. R. (1989). A tutorial on hidden Markov models and selected applications in speech recognition. Proceedings of the IEEE, 77(2), 257\u2013286.","journal-title":"Proceedings of the IEEE"},{"unstructured":"Xu, B., Zhang, H., Gao, S., Zhao, B., Li, C., & Huang, T. (2000). Update progress of Sinohear: Advanced Mandarin LVCSR System At NLPR. In Proc. ICSLP, vol 3, 798\u2013801.","key":"1334_CR34"},{"unstructured":"Nair, V., & Hinton, G. E. (2010). Rectified linear units improve restricted boltzmann machines. In Proceedings of the 27th International Conference on Machine Learning (ICML-10) (pp. 807-814).","key":"1334_CR35"},{"unstructured":"Chollet, F., (2015). Keras. GitHub repository, https:\/\/github.com\/fchollet\/keras .","key":"1334_CR36"},{"issue":"7","key":"1334_CR37","doi-asserted-by":"publisher","first-page":"1527","DOI":"10.1162\/neco.2006.18.7.1527","volume":"18","author":"GE Hinton","year":"2006","unstructured":"Hinton, G. E., Osindero, S., & Teh, Y. W. (2006). A fast learning algorithm for deep belief nets. Neural Computation, 18(7), 1527\u20131554.","journal-title":"Neural Computation"},{"issue":"3","key":"1334_CR38","first-page":"1","volume":"5","author":"DE Rumelhart","year":"1988","unstructured":"Rumelhart, D. E., Hinton, G. E., & Williams, R. J. (1988). Learning representations by back-propagating errors. Cognitive Modeling, 5(3), 1.","journal-title":"Cognitive Modeling"},{"unstructured":"Povey, D., Ghoshal, A., Boulianne, G., Burget, L., Glembek, O., Goel, N., \u2026 & Silovsky, J. (2011). The Kaldi speech recognition toolkit. In IEEE 2011 workshop on automatic speech recognition and understanding (No. EPFL-CONF-192584). IEEE Signal Processing Society.","key":"1334_CR39"},{"doi-asserted-by":"crossref","unstructured":"Chang, E., Zhou, J. L., Di, S., Huang, C., & Kai-FuLee. (2000). Large vocabulary Mandarin speech recognition with different approaches in modeling tones. Proc Icslp, 983\u2013986.","key":"1334_CR40","DOI":"10.21437\/ICSLP.2000-436"},{"doi-asserted-by":"crossref","unstructured":"Hu, W., Qian, Y., & Soong, F. K. (2014). A DNN-based acoustic modeling of tonal language and its application to Mandarin pronunciation training. In 2014 I.E. International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 3206-3210). IEEE.","key":"1334_CR41","DOI":"10.1109\/ICASSP.2014.6854192"},{"doi-asserted-by":"crossref","unstructured":"Tong, R., Chen, N. F., Ma, B., & Li, H. (2015). Goodness of Tone (GOT) for Non-native Mandarin tone recognition. In INTERSPEECH, Dresden Germany, pp. 801-805.","key":"1334_CR42","DOI":"10.21437\/Interspeech.2015-254"},{"unstructured":"Xin, L., Siu, M. H., Hwang, M. Y., Ostendorf, M., & Tan, L. (2006). Improved tone modeling for Mandarin broadcast news speech recognition. In INTERSPEECH, Icslp, Ninth International Conference on Spoken Language Processing, Pittsburgh, Pa, USA, September. DBLP.","key":"1334_CR43"},{"doi-asserted-by":"crossref","unstructured":"Ghahremani, P., BabaAli, B., Povey, D., Riedhammer, K., Trmal, J., & Khudanpur, S. (2014). A pitch extraction algorithm tuned for automatic speech recognition. In 2014 I.E. International Conference on Acoustics, Speech and Signal Processing (ICASSP) (pp. 2494-2498). IEEE.","key":"1334_CR44","DOI":"10.1109\/ICASSP.2014.6854049"},{"key":"1334_CR45","first-page":"518","volume":"495","author":"D Talkin","year":"1995","unstructured":"Talkin, D. (1995). A robust algorithm for pitch tracking (RAPT). Speech Coding and Synthesis, 495, 518.","journal-title":"Speech Coding and Synthesis"},{"doi-asserted-by":"crossref","unstructured":"Olsberg, M., Xu, Y., & Green, J. (2007). Dependence of tone perception on syllable perception. In INTERSPEECH, Conference of the International Speech Communication Association, Antwerp, Belgium, August (pp.2649-2652). DBLP.","key":"1334_CR46","DOI":"10.21437\/Interspeech.2007-696"},{"key":"1334_CR47","volume-title":"Experimental phonetics summary","author":"Z Wu","year":"1987","unstructured":"Wu, Z., & Lin, M. (1987). Experimental phonetics summary. Beijing: China Higher Education Press."},{"issue":"1","key":"1334_CR48","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1159\/000091406","volume":"63","author":"Y Chen","year":"2006","unstructured":"Chen, Y., & Xu, Y. (2006). Production of weak elements in speech \u2013 evidence from f0 patterns of neutral tone in standard chinese. Phonetica, 63(1), 47\u201375.","journal-title":"Phonetica"},{"key":"1334_CR49","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1016\/j.specom.2016.07.005","volume":"84","author":"NF Chen","year":"2016","unstructured":"Chen, N. F., Wee, D., Tong, R., Ma, B., & Li, H. (2016). Large-scale characterization of non-native Mandarin Chinese spoken by speakers of European origin: analysis on iCALL. Speech Communication, 84, 46\u201356.","journal-title":"Speech Communication"}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11265-018-1334-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-018-1334-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-018-1334-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T02:47:22Z","timestamp":1660445242000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11265-018-1334-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,2,8]]},"references-count":49,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2018,7]]}},"alternative-id":["1334"],"URL":"https:\/\/doi.org\/10.1007\/s11265-018-1334-2","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"type":"print","value":"1939-8018"},{"type":"electronic","value":"1939-8115"}],"subject":[],"published":{"date-parts":[[2018,2,8]]},"assertion":[{"value":"9 February 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 September 2017","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 January 2018","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 February 2018","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}