{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T00:49:36Z","timestamp":1740098976197,"version":"3.37.3"},"publisher-location":"Cham","reference-count":28,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319694559"},{"type":"electronic","value":"9783319694566"}],"license":[{"start":{"date-parts":[[2017,1,1]],"date-time":"2017-01-01T00:00:00Z","timestamp":1483228800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017]]},"DOI":"10.1007\/978-3-319-69456-6_13","type":"book-chapter","created":{"date-parts":[[2017,10,18]],"date-time":"2017-10-18T10:04:34Z","timestamp":1508321074000},"page":"150-161","source":"Crossref","is-referenced-by-count":0,"title":["Visual-Only Word Boundary Detection"],"prefix":"10.1007","author":[{"given":"Muhammad Rizki Aulia Rahman","family":"Maulana","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Retno","family":"Larasati","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohamad Ivan","family":"Fanany","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2017,10,19]]},"reference":[{"issue":"2","key":"13_CR1","doi-asserted-by":"crossref","first-page":"297","DOI":"10.1002\/j.1538-7305.1975.tb02840.x","volume":"54","author":"LR Rabiner","year":"1975","unstructured":"Rabiner, L.R., Sambur, M.R.: An algorithm for determining the endpoints of isolated utterances. Bell Labs Tech. J. 54(2), 297\u2013315 (1975)","journal-title":"Bell Labs Tech. J."},{"key":"13_CR2","unstructured":"Junqua, J.-C.: Robustness and cooperative multimodal man-machine communication applications. In: Second VENACO Workshop the Structure of Multimodal Dialogue (1991)"},{"key":"13_CR3","doi-asserted-by":"crossref","unstructured":"Ying, G., Mitchell, C., Jamieson, L.: Endpoint detection of isolated utterances based on a modified teager energy measurement. In: 1993 IEEE International Conference on Acoustics, Speech, and Signal Processing ICASSP-1993, vol. 2, pp. 732\u2013735. IEEE (1993)","DOI":"10.1109\/ICASSP.1993.319416"},{"issue":"3","key":"13_CR4","doi-asserted-by":"crossref","first-page":"406","DOI":"10.1109\/89.294354","volume":"2","author":"J-C Junqua","year":"1994","unstructured":"Junqua, J.-C., Mak, B., Reaves, B.: A robust algorithm for word boundary detection in the presence of noise. IEEE Trans. Speech Audio Process. 2(3), 406\u2013412 (1994)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"5","key":"13_CR5","doi-asserted-by":"crossref","first-page":"541","DOI":"10.1109\/89.861373","volume":"8","author":"G-D Wu","year":"2000","unstructured":"Wu, G.-D., Lin, C.-T.: Word boundary detection with mel-scale frequency bank in noisy environment. IEEE Trans. Speech Audio Process. 8(5), 541\u2013554 (2000)","journal-title":"IEEE Trans. Speech Audio Process."},{"issue":"1","key":"13_CR6","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1109\/3477.907566","volume":"31","author":"G-D Wu","year":"2001","unstructured":"Wu, G.-D., Lin, C.-T.: A recurrent neural fuzzy network for word boundary detection in variable noise-level environments. IEEE Trans. Syst. Man Cybern. Part B (Cybern.) 31(1), 84\u201397 (2001)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B (Cybern.)"},{"key":"13_CR7","unstructured":"Tan, C.K.-Y., Kim-Teng, L.: Learning of word boundaries in continuous speech using time delay neural networks (2003). http:\/\/bit.ly\/2xjbHvq"},{"issue":"9","key":"13_CR8","doi-asserted-by":"crossref","first-page":"1306","DOI":"10.1109\/JPROC.2003.817150","volume":"91","author":"G Potamianos","year":"2003","unstructured":"Potamianos, G., Neti, C., Gravier, G., Garg, A., Senior, A.W.: Recent advances in the automatic recognition of audiovisual speech. Proc. IEEE 91(9), 1306\u20131326 (2003)","journal-title":"Proc. IEEE"},{"key":"13_CR9","unstructured":"Assael, Y.M., Shillingford, B., Whiteson, S., de Freitas, N.: Lipnet: sentence-level lipreading. arXiv preprint arXiv:1611.01599 (2016)"},{"key":"13_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-319-54184-6_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Lip reading in the wild. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10112, pp. 87\u2013103. Springer, Cham (2017). doi: 10.1007\/978-3-319-54184-6_6"},{"key":"13_CR11","doi-asserted-by":"crossref","unstructured":"Gergen, S., Zeiler, S., Abdelaziz, A.H., Nickel, R.M., Kolossa, D.: Dynamic stream weighting for turbo-decoding-based audiovisual ASR. In: INTERSPEECH, pp. 2135\u20132139 (2016)","DOI":"10.21437\/Interspeech.2016-166"},{"issue":"5","key":"13_CR12","doi-asserted-by":"crossref","first-page":"2421","DOI":"10.1121\/1.2229005","volume":"120","author":"M Cooke","year":"2006","unstructured":"Cooke, M., Barker, J., Cunningham, S., Shao, X.: An audio-visual corpus for speech perception and automatic speech recognition. J. Acoust. Soc. Am. 120(5), 2421\u20132424 (2006)","journal-title":"J. Acoust. Soc. Am."},{"key":"13_CR13","first-page":"1","volume":"2","author":"L Gu","year":"2002","unstructured":"Gu, L., Zahorian, S.A.: A new robust algorithm for isolated word endpoint detection. Energy 2, 1 (2002)","journal-title":"Energy"},{"key":"13_CR14","unstructured":"Garg, A., Noyola, J., Bagadia, S.: Lip reading using CNN and LSTM. Technical report (2016)"},{"issue":"2","key":"13_CR15","doi-asserted-by":"crossref","first-page":"157","DOI":"10.1109\/72.279181","volume":"5","author":"Y Bengio","year":"1994","unstructured":"Bengio, Y., Simard, P., Frasconi, P.: Learning long-term dependencies with gradient descent is difficult. IEEE Trans. Neural Netw. 5(2), 157\u2013166 (1994)","journal-title":"IEEE Trans. Neural Netw."},{"key":"13_CR16","doi-asserted-by":"crossref","unstructured":"Graves, A., Mohamed, A.-R., Hinton, G.: Speech recognition with deep recurrent neural networks. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 6645\u20136649. IEEE (2013)","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"13_CR17","doi-asserted-by":"crossref","unstructured":"Irsoy, O., Cardie, C.: Opinion mining with deep recurrent neural networks (2014)","DOI":"10.3115\/v1\/D14-1080"},{"issue":"11","key":"13_CR18","doi-asserted-by":"crossref","first-page":"2673","DOI":"10.1109\/78.650093","volume":"45","author":"M Schuster","year":"1997","unstructured":"Schuster, M., Paliwal, K.K.: Bidirectional recurrent neural networks. IEEE Trans. Signal Process. 45(11), 2673\u20132681 (1997)","journal-title":"IEEE Trans. Signal Process."},{"issue":"1","key":"13_CR19","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji, S., Xu, W., Yang, M., Yu, K.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"13_CR20","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4489\u20134497 (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"13_CR21","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International Conference on Machine Learning, pp. 448\u2013456 (2015)"},{"key":"13_CR22","unstructured":"Nair, V., Hinton, G.E.: Rectified linear units improve restricted boltzmann machines. In: Proceedings of the 27th International Conference on Machine Learning (ICML-2010), pp. 807\u2013814 (2010)"},{"key":"13_CR23","doi-asserted-by":"crossref","unstructured":"Boureau, Y.-L., Le Roux, N., Bach, F., Ponce, J., LeCun, Y.: Ask the locals: multi-way local pooling for image recognition. In: 2011 IEEE International Conference on Computer Vision (ICCV), pp. 2651\u20132658. IEEE (2011)","DOI":"10.1109\/ICCV.2011.6126555"},{"key":"13_CR24","doi-asserted-by":"crossref","unstructured":"Kazemi, V., Sullivan, J.: One millisecond face alignment with an ensemble of regression trees. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1867\u20131874 (2014)","DOI":"10.1109\/CVPR.2014.241"},{"key":"13_CR25","unstructured":"dlib.net\/face_landmark_detection.py.html. http:\/\/dlib.net\/face_landmark_detection.py.html"},{"key":"13_CR26","unstructured":"eddersko\/wordboundary. https:\/\/github.com\/eddersko\/WordBoundary"},{"issue":"3\u20134","key":"13_CR27","doi-asserted-by":"crossref","first-page":"299","DOI":"10.1016\/0304-3800(89)90035-5","volume":"44","author":"D Wallach","year":"1989","unstructured":"Wallach, D., Goffinet, B.: Mean squared error of prediction as a criterion for evaluating and comparing system models. Ecol. Model. 44(3\u20134), 299\u2013306 (1989)","journal-title":"Ecol. Model."},{"issue":"2","key":"13_CR28","doi-asserted-by":"crossref","first-page":"111","DOI":"10.1007\/s10651-007-0043-y","volume":"15","author":"RG Pontius","year":"2008","unstructured":"Pontius, R.G., Thontteh, O., Chen, H.: Components of information for multiple resolution comparison between maps that share a real variable. Environmental and Ecological Statistics 15(2), 111\u2013142 (2008)","journal-title":"Environmental and Ecological Statistics"}],"container-title":["Lecture Notes in Computer Science","Multi-disciplinary Trends in Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-69456-6_13","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,10,4]],"date-time":"2019-10-04T16:58:22Z","timestamp":1570208302000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-69456-6_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017]]},"ISBN":["9783319694559","9783319694566"],"references-count":28,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-69456-6_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2017]]}}}