{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:51:28Z","timestamp":1740099088689,"version":"3.37.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319773827"},{"type":"electronic","value":"9783319773834"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-77383-4_54","type":"book-chapter","created":{"date-parts":[[2018,5,9]],"date-time":"2018-05-09T17:02:26Z","timestamp":1525885346000},"page":"550-559","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A Combined Feature Approach for Speaker Segmentation Using Convolution Neural Network"],"prefix":"10.1007","author":[{"given":"Jiang","family":"Zhong","sequence":"first","affiliation":[]},{"given":"Pan","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Xue","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,5,10]]},"reference":[{"key":"54_CR1","doi-asserted-by":"crossref","unstructured":"Bonastre, J.F., Delacourt, P., Fredouille, C., Merlin, T., Wellekens, C.: A speaker tracking system based on speaker turn detection for NIST evaluation. In: Proceedings of 2000 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2000, vol. 2, pp. 1177\u20131180 (2000)","DOI":"10.1109\/ICASSP.2000.859175"},{"issue":"5","key":"54_CR2","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1109\/TASL.2006.878261","volume":"14","author":"C Barras","year":"2006","unstructured":"Barras, C., Zhu, X., Meignier, S., et al.: Multistage speaker diarization of broadcast news. IEEE Trans. Audio Speech Lang. Process. 14(5), 1505\u20131512 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"issue":"5","key":"54_CR3","doi-asserted-by":"publisher","first-page":"1557","DOI":"10.1109\/TASL.2006.878256","volume":"14","author":"SE Tranter","year":"2006","unstructured":"Tranter, S.E., Reynolds, D.A.: An overview of automatic speaker diarization systems. IEEE Trans. Audio Speech Lang. Process. 14(5), 1557\u20131565 (2006)","journal-title":"IEEE Trans. Audio Speech Lang. Process."},{"key":"54_CR4","doi-asserted-by":"crossref","unstructured":"Saeidi, R., Mohammadi, H.S., Rodman, R.D., Kinnunen, T.: A new segmentation algorithm combined with transient frames power for text independent speaker verification. In: IEEE International Conference on Acoustics, Speech and Signal, ICASSP 2007, vol. 4, p. 305 (2007)","DOI":"10.1109\/ICASSP.2007.366910"},{"key":"54_CR5","unstructured":"Chen, S., Gopalakrishnan, P. S.: Speaker, environment and channel change detection and clustering via the Bayesian information criterion. In: Proceedings of DARPA Broadcast News Transcription and Understanding Workshop, vol. 8, pp. 127\u2013132 (1998)"},{"issue":"1","key":"54_CR6","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1016\/S0167-6393(00)00027-3","volume":"32","author":"P Delacourt","year":"2000","unstructured":"Delacourt, P., Wellekens, C.: DISTBIC: A speaker-based segmentation for audio data indexing. Speech Commun. 32(1), 111\u2013126 (2000)","journal-title":"Speech Commun."},{"key":"54_CR7","unstructured":"Bakis, R., Chen, S., Gopalakrishnan, P., Gopinath, R., Maes, S., Polymenakos, L., Franz, M.: Transcription of broadcast news shows with the IBM large vocabulary speech recognition system. In: Proceedings of DARPA Speech Recognition Workshop, VA, pp. 67\u201372 (1997)"},{"issue":"2","key":"54_CR8","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1016\/j.csl.2004.05.008","volume":"19","author":"M Cettolo","year":"2005","unstructured":"Cettolo, M., Vescovi, M., Rizzi, R.: Evaluation of BIC-based algorithms for audio segmentation. J. Comput. Speech & Lang. 19(2), 147\u2013170 (2005)","journal-title":"J. Comput. Speech & Lang."},{"key":"54_CR9","unstructured":"Siegler, M.A., Jain, U., Raj, B., Stern, R.M.: Automatic segmentation, classification and clustering of broadcast news audio. In: Proceedings of DARPA Speech Recognition Workshop, VA, pp. 97\u201399 (1997)"},{"key":"54_CR10","doi-asserted-by":"crossref","unstructured":"Gish, H., Siu, M.H., Rohlicek, R.: Segregation of speakers for speech recognition and speaker identification. In: 1991 International Conference on Acoustics, Speech, and Signal Processing, ICASSP 1991, pp. 873\u2013876 (1991)","DOI":"10.1109\/ICASSP.1991.150477"},{"key":"54_CR11","unstructured":"Jin, H., Kubala, F., Schwartz, R.: Automatic speaker clustering. In: Proceedings of the DARPA Speech Recognition Workshop, pp. 108\u2013111 (1997)"},{"issue":"5","key":"54_CR12","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1145\/2629500","volume":"33","author":"J Tompson","year":"2014","unstructured":"Tompson, J., Stein, M., Lecun, Y., Perlin, K.: Real-time continuous pose recovery of human hands using convolutional networks. ACM Trans. Graph. 33(5), 169 (2014)","journal-title":"ACM Trans. Graph."},{"key":"54_CR13","doi-asserted-by":"crossref","unstructured":"Sell, G., Garcia-Romero, D., McCree, A.: Speaker diarization with I-Vectors from DNN senone posteriors. In: Proceedings of Interspeech, pp. 3096\u20133099 (2015)","DOI":"10.21437\/Interspeech.2015-109"},{"key":"54_CR14","doi-asserted-by":"crossref","unstructured":"Abdel-Hamid, O., Mohamed, A.R., Jiang, H., Penn, G.: Applying convolutional neural networks concepts to hybrid NN-HMM model for speech recognition. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 4277\u20134280 (2012)","DOI":"10.1109\/ICASSP.2012.6288864"},{"key":"54_CR15","volume-title":"Discrete-time Speech Signal Processing: Principles and Practice","author":"TF Quatieri","year":"2006","unstructured":"Quatieri, T.F.: Discrete-time Speech Signal Processing: Principles and Practice. Pearson Education, Delhi, India (2006)"},{"issue":"S1","key":"54_CR16","doi-asserted-by":"publisher","first-page":"S81","DOI":"10.1121\/1.2017457","volume":"65","author":"RA Cole","year":"1979","unstructured":"Cole, R.A., Rudnicky, A.I., Zue, V.M.: Performance of an expert spectrogram reader. J. Acoust. Soc. Am. 65(S1), S81\u2013S81 (1979)","journal-title":"J. Acoust. Soc. Am."},{"key":"54_CR17","doi-asserted-by":"crossref","unstructured":"Zue, V., Lamel, L.: An expert spectrogram reader: A knowledge-based approach to speech recognition. In: IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 1986, vol. 11, pp. 1197\u20131200 (1986)","DOI":"10.1109\/ICASSP.1986.1168798"},{"issue":"4","key":"54_CR18","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TASSP.1980.1163420","volume":"28","author":"S Davis","year":"1980","unstructured":"Davis, S., Mermelstein, P.: Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Trans. Acoust. Speech Sig. Process. 28(4), 357\u2013366 (1980)","journal-title":"IEEE Trans. Acoust. Speech Sig. Process."},{"issue":"3","key":"54_CR19","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1121\/1.1915893","volume":"8","author":"SS Stevens","year":"1937","unstructured":"Stevens, S.S., Volkmann, J., Newman, E.B.: A scale for the measurement of the psychological magnitude pitch. J. Acoust. Soc. Am. 8(3), 185\u2013190 (1937)","journal-title":"J. Acoust. Soc. Am."},{"key":"54_CR20","volume-title":"Discrete Time Processing of Speech Signals","author":"JR Deller Jr","year":"2000","unstructured":"Deller Jr., J.R., Proakis, J.G., Hansen, J.H.: Discrete Time Processing of Speech Signals, 2nd edn. IEEE Press, New York (2000)","edition":"2"},{"key":"54_CR21","unstructured":"Speer, S. R., Warren, P., Schafer, A.: Intonation and sentence processing. In: Proceedings of the 15th International Congress of Phonetic Sciences, pp. 95\u2013105 (2003)"},{"issue":"5","key":"54_CR22","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1109\/79.536825","volume":"13","author":"RJ Mammone","year":"1996","unstructured":"Mammone, R.J., Zhang, X., Ramachandran, R.P.: Robust speaker recognition: a feature-based approach. IEEE Sig. Process. Mag. 13(5), 58\u201371 (1996)","journal-title":"IEEE Sig. Process. Mag."},{"issue":"4","key":"54_CR23","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1109\/89.326623","volume":"2","author":"DA Reynolds","year":"1994","unstructured":"Reynolds, D.A.: Experimental evaluation of features for robust speaker identification. IEEE Trans. Speech Audio Process. 2(4), 639\u2013643 (1994)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"8","key":"54_CR24","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1109\/LSP.2004.831666","volume":"11","author":"J Ajmera","year":"2004","unstructured":"Ajmera, J., McCowan, I., Bourlard, H.: Robust speaker change detection. IEEE Sig. Process. Lett. 11(8), 649\u2013651 (2004)","journal-title":"IEEE Sig. Process. Lett."},{"key":"54_CR25","unstructured":"Kadri, H., Davy, M., Rabaoui, A., Lachiri, Z., Ellouze, N.: Robust audio speaker segmentation using one class SVMs. In: 2008 16th European Conference on Signal Processing, pp. 1\u20135 (2008)"}],"container-title":["Lecture Notes in Computer Science","Advances in Multimedia Information Processing \u2013 PCM 2017"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-77383-4_54","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,23]],"date-time":"2022-08-23T00:31:50Z","timestamp":1661214710000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-77383-4_54"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319773827","9783319773834"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-77383-4_54","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}