{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T13:44:20Z","timestamp":1748699060937,"version":"3.40.3"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030616557"},{"type":"electronic","value":"9783030616564"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-61656-4_4","type":"book-chapter","created":{"date-parts":[[2020,11,4]],"date-time":"2020-11-04T16:03:54Z","timestamp":1604505834000},"page":"62-78","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Hybrid Deep Convolutional Neural Network with Multimodal Fusion"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9414-2477","authenticated-orcid":false,"given":"Olena","family":"Vynokurova","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4881-6933","authenticated-orcid":false,"given":"Dmytro","family":"Peleshko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9315-1590","authenticated-orcid":false,"given":"Marta","family":"Peleshko","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,5]]},"reference":[{"issue":"B","key":"4_CR1","doi-asserted-by":"publisher","first-page":"252","DOI":"10.1016\/j.asoc.2013.05.020","volume":"14","author":"Y Bodyanskiy","year":"2014","unstructured":"Bodyanskiy, Y., Dolotov, A., Vynokurova, O.: Evolving spiking wavelet-neuro-fuzzy self-learning system. Appl. Soft Comput. J. 14(B), 252\u2013258 (2014). https:\/\/doi.org\/10.1016\/j.asoc.2013.05.020","journal-title":"Appl. Soft Comput. J."},{"key":"4_CR2","doi-asserted-by":"publisher","unstructured":"Bodyanskiy, Y., Setlak, G., Peleshko, D., Vynokurova, O.: Hybrid generalized additive neuro-fuzzy system and its adaptive learning algorithms. In: Proceedings of the 2015 IEEE 8th International Conference on Intelligent Data Acquisition and Advanced Computing Systems: Technology and Applications, IDAACS 2015, vol. 1, pp. 328\u2013333 (2015). https:\/\/doi.org\/10.1109\/IDAACS.2015.7340753","DOI":"10.1109\/IDAACS.2015.7340753"},{"key":"4_CR3","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/978-3-319-39639-2_5","volume-title":"Dependability Engineering and Complex Systems","author":"Y Bodyanskiy","year":"2016","unstructured":"Bodyanskiy, Y., Vynokurova, O., Pliss, I., Peleshko, D., Rashkevych, Y.: Hybrid generalized additive wavelet-neuro-fuzzy-system and its adaptive learning. In: Zamojski, W., Mazurkiewicz, J., Sugier, J., Walkowiak, T., Kacprzyk, J. (eds.) Dependability Engineering and Complex Systems. AISC, vol. 470, pp. 51\u201361. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-39639-2_5"},{"issue":"3","key":"4_CR4","doi-asserted-by":"publisher","first-page":"207","DOI":"10.1109\/TAMD.2011.2106782","volume":"3","author":"C Castellini","year":"2011","unstructured":"Castellini, C., Tommasi, T., Noceti, N., Odone, F., Caputo, B.: Using object affordances to improve object recognition. IEEE Trans. Auton. Ment. Dev. 3(3), 207\u2013215 (2011). https:\/\/doi.org\/10.1109\/TAMD.2011.2106782","journal-title":"IEEE Trans. Auton. Ment. Dev."},{"key":"4_CR5","doi-asserted-by":"publisher","first-page":"17098","DOI":"10.3390\/s131217098","volume":"13","author":"ST Cheng","year":"2013","unstructured":"Cheng, S.T., Hsu, C.W., Li, J.P.: Combined hand gesture-speech model for human action recognition. Sensors 13, 17098\u201317129 (2013). https:\/\/doi.org\/10.3390\/s131217098","journal-title":"Sensors"},{"key":"4_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/978-3-319-54184-6_6","volume-title":"Computer Vision \u2013 ACCV 2016","author":"JS Chung","year":"2017","unstructured":"Chung, J.S., Zisserman, A.: Lip reading in the wild. In: Lai, S.-H., Lepetit, V., Nishino, K., Sato, Y. (eds.) ACCV 2016. LNCS, vol. 10112, pp. 87\u2013103. Springer, Cham (2017). https:\/\/doi.org\/10.1007\/978-3-319-54184-6_6. http:\/\/www.robots.ox.ac.uk\/~vgg\/data\/lipreading\/lrw1.html"},{"key":"4_CR7","unstructured":"Dibia, V.: Handtrack: A library for prototyping real-time hand tracking interfaces using convolutional neural networks (2017). https:\/\/github.com\/victordibia\/handtracking"},{"key":"4_CR8","doi-asserted-by":"publisher","unstructured":"Ding, R., Pang, C., Liu, H.: Audio-visual keyword spotting based on multidimensional convolution neural network. In: Proceedings of the 2018 IEEE International Conference on Image Processing, Athens, Greece, pp. 4138\u20134142 (2018). https:\/\/doi.org\/10.1109\/ICIP.2018.8451096","DOI":"10.1109\/ICIP.2018.8451096"},{"key":"4_CR9","unstructured":"Favorskaya, M., Nosov, A., Popov, A.: Localization and recognition of dynamic hand gesture based on hierarchy of manifold classifiers. Int. Arch. Photogram. Remote Sens. Spat. Inf. Sci. XL-5\/W6, 151\u2013161 (2015). https:\/\/www.int-arch-photogramm-remote-sens-spatial-inf-sci.net\/XL-5-W6\/1\/2015\/isprsarchives-XL-5-W6-1-2015.pdf"},{"key":"4_CR10","unstructured":"Feichtenhofer, C., Pinz, A., Zisserman, A.: Convolutional two-stream network fusion for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1933\u20131941 (2016). arXiv:1604.06573"},{"key":"4_CR11","volume-title":"The Theory of Affordances","author":"J Gibson","year":"1977","unstructured":"Gibson, J.: The Theory of Affordances. Erlbaum, Hillsdale (1977)"},{"key":"4_CR12","volume-title":"The Ecological Approach to Visual Perception","author":"J Gibson","year":"1986","unstructured":"Gibson, J.: The Ecological Approach to Visual Perception. Erlbaum, Hillsdale (1986)"},{"key":"4_CR13","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.neucom.2018.08.042","volume":"318","author":"Z Hu","year":"2018","unstructured":"Hu, Z., Youmin, H., Liu, J., Wu, B., Han, D., Kurfess, T.: 3D separable convolutional neural network for dynamic hand gesture recognition. Neurocomputing 318, 151\u2013161 (2018). https:\/\/doi.org\/10.1016\/j.neucom.2018.08.042","journal-title":"Neurocomputing"},{"key":"4_CR14","unstructured":"Jackson, L.: Motion-detection-python (2017). https:\/\/github.com\/ic0n\/Motion-Detection-Python"},{"key":"4_CR15","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1007\/978-1-4615-0913-4_11","volume-title":"Video-Based Surveillance Systems","author":"P KaewTraKulPong","year":"2001","unstructured":"KaewTraKulPong, P., Bowden, R.: An improved adaptive background mixture model for real-time tracking with shadow detection. In: Remagnino, P., Jones, G.A., Paragios, N., Regazzoni, C.S. (eds.) Video-Based Surveillance Systems, pp. 135\u2013144. Springer, Boston (2001). https:\/\/doi.org\/10.1007\/978-1-4615-0913-4_11. http:\/\/personal.ee.surrey.ac.uk\/Personal\/R.Bowden\/publications\/avbs01\/avbs01.pdf"},{"key":"4_CR16","unstructured":"Kaiming, H., Xiangyu, Z., Shaoqing, R., Sun, J.: Delving deep into rectifiers: surpassing human-level performance on ImageNet classification. In: Computer Vision and Pattern Recognition (2015). arXiv:1502.01852"},{"key":"4_CR17","doi-asserted-by":"publisher","unstructured":"Kampman, O., Barezi, E., Bertero, D., Fung, P.: Investigating audio, video, and text fusion methods for end-to-end automatic personality prediction. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, Melbourne, Australia, pp. 606\u2013611. Springer (2018). https:\/\/doi.org\/10.1007\/978-981-10-7560-5118","DOI":"10.1007\/978-981-10-7560-5118"},{"key":"4_CR18","unstructured":"Lyons, R.: FFT interpolation based on FFT samples: a detective story with a surprise ending (2018). https:\/\/www.dsprelated.com\/showarticle\/1156.php"},{"issue":"1","key":"4_CR19","first-page":"38","volume":"79","author":"S Majeed","year":"2015","unstructured":"Majeed, S., Husain, H., Samad, S., Idbeaa, T.: Mel frequency cepstral coefficients (MFCC) feature extraction enhancement in the application of speech recognition: a comparison study. J. Theoret. Appl. Inf. Technol. 79(1), 38\u201356 (2015)","journal-title":"J. Theoret. Appl. Inf. Technol."},{"key":"4_CR20","unstructured":"Nguyen, A.: Scene understanding for autonomous manipulation with deep learning (2019). arXiv:1903.09761"},{"key":"4_CR21","unstructured":"RMSProp (2020). http:\/\/ruder.io\/optimizing-gradient-descent\/index.html#rmsprop"},{"key":"4_CR22","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1007\/s10044-008-0121-2","volume":"12","author":"E Rua","year":"2009","unstructured":"Rua, E., Bredin, H., Mateo, C., Chollet, A., Jimenez, D.: Audio-visual speech asynchrony detection using co-inertia analysis and coupled hidden Markov models. Pattern Anal. Appl. 12, 271\u2013284 (2009). https:\/\/doi.org\/10.1007\/s10044-008-0121-2","journal-title":"Pattern Anal. Appl."},{"key":"4_CR23","doi-asserted-by":"publisher","first-page":"1396","DOI":"10.1109\/TMM.2007.906583","volume":"9","author":"M Sargin","year":"2007","unstructured":"Sargin, M., Yemez, Y., Erzin, E., Tekalp, A.: Audiovisual synchronization and fusion using canonical correlation analysis. IEEE Trans. Multimedia 9, 1396\u20131403 (2007). https:\/\/doi.org\/10.1109\/TMM.2007.906583","journal-title":"IEEE Trans. Multimedia"},{"key":"4_CR24","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in Neural Information Processing Systems, vol. 1, pp. 568\u2013576 (2014). arXiv:1406.21998"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Stafylakis, T., Tzimiropoulos, G.: Combining residual networks with LSTMs for lipreading. In: INTERSPEECH, pp. 3652\u20133656 (2017)","DOI":"10.21437\/Interspeech.2017-85"},{"key":"4_CR26","doi-asserted-by":"publisher","unstructured":"Stauffer, C., Grimson, W.: Adaptive background mixture models for real-time tracking. In: Proceedings of IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 246\u2013252 (1999). https:\/\/doi.org\/10.1109\/CVPR.1999.784637","DOI":"10.1109\/CVPR.1999.784637"},{"key":"4_CR27","doi-asserted-by":"publisher","first-page":"1396","DOI":"10.1109\/ACCESS.2017.2761539","volume":"PP","author":"A Torfi","year":"2017","unstructured":"Torfi, A., Iranmanesh, S., Nasrabadi, N., Dawson, J.: 3D convolutional neural networks for cross audio-visual matching recognition. Comput. Vis. Pattern Recogn. PP, 1396\u20131403 (2017). https:\/\/doi.org\/10.1109\/ACCESS.2017.2761539","journal-title":"Comput. Vis. Pattern Recogn."},{"key":"4_CR28","doi-asserted-by":"publisher","first-page":"76","DOI":"10.1016\/j.neucom.2016.12.088","volume":"268","author":"E Tsironi","year":"2017","unstructured":"Tsironi, E., Barros, P., Weber, C., Wermter, S.: An analysis of convolutional long short-term memory recurrent neural networks for gesture recognition. Neurocomputing 268, 76\u201386 (2017). https:\/\/doi.org\/10.1016\/j.neucom.2016.12.088","journal-title":"Neurocomputing"},{"issue":"8","key":"4_CR29","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1109\/JSTSP.2017.2764438","volume":"11","author":"P Tzirakis","year":"2017","unstructured":"Tzirakis, P., Trigeorgis, G., Nicolaou, M.A., Schuller, B., Zafeiriou, S.: End-to-end multimodal emotion recognition using deep neural networks. IEEE J. Sel. Top. Signal Process. 11(8), 1301\u20131309 (2017). https:\/\/doi.org\/10.1109\/JSTSP.2017.2764438","journal-title":"IEEE J. Sel. Top. Signal Process."},{"issue":"2","key":"4_CR30","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1109\/CVPR.1999.784637","volume":"57","author":"P Viola","year":"2004","unstructured":"Viola, P., Jones, M.: Robust real-time face detection. Int. J. Comput. Vis. 57(2), 137\u2013154 (2004). https:\/\/doi.org\/10.1109\/CVPR.1999.784637","journal-title":"Int. J. Comput. Vis."},{"issue":"3","key":"4_CR31","doi-asserted-by":"publisher","first-page":"326","DOI":"10.1109\/TMM.2016.2520091","volume":"18","author":"P Wu","year":"2016","unstructured":"Wu, P., Liu, H., Li, X., Fan, T., Zhang, X.: A novel lip descriptor for audio-visual keyword spotting based on adaptive decision fusion. IEEE Trans. Multimedia 18(3), 326\u2013338 (2016). https:\/\/doi.org\/10.1109\/TMM.2016.2520091","journal-title":"IEEE Trans. Multimedia"},{"key":"4_CR32","doi-asserted-by":"publisher","unstructured":"Yue-Hei Ng, J., Hausknecht, M., Vijayanarasimhan, S., et al.: Beyond short snippets: deep networks for video classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4694\u20134702 (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7299101","DOI":"10.1109\/CVPR.2015.7299101"}],"container-title":["Communications in Computer and Information Science","Data Stream Mining &amp; Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-61656-4_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,24]],"date-time":"2021-04-24T09:09:14Z","timestamp":1619255354000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-61656-4_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030616557","9783030616564"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-61656-4_4","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"5 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DSMP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Data Stream Mining and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ukraine","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dsmp2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/dsmp.in.ua\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"easyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"134","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"36","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}