{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:33:48Z","timestamp":1750221228588,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,10,2]],"date-time":"2018-10-02T00:00:00Z","timestamp":1538438400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100010663","name":"H2020 European Research Council","doi-asserted-by":"publisher","award":["338164"],"award-info":[{"award-number":["338164"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,10,2]]},"DOI":"10.1145\/3242969.3243683","type":"proceedings-article","created":{"date-parts":[[2018,10,2]],"date-time":"2018-10-02T12:09:29Z","timestamp":1538482169000},"page":"574-578","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Deep End-to-End Representation Learning for Food Type Recognition from Speech"],"prefix":"10.1145","author":[{"given":"Benjamin","family":"Sertolli","sequence":"first","affiliation":[{"name":"University of Augsburg, Augsburg, Germany"}]},{"given":"Nicholas","family":"Cummins","sequence":"additional","affiliation":[{"name":"University of Augsburg, Augsburg, Germany"}]},{"given":"Abdulkadir","family":"Sengur","sequence":"additional","affiliation":[{"name":"Firat University, Elazig, Turkey"}]},{"given":"Bjoern W.","family":"Schuller","sequence":"additional","affiliation":[{"name":"Imperial College London, London, United Kingdom"}]}],"member":"320","published-online":{"date-parts":[[2018,10,2]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3136755.3136760"},{"volume-title":"2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE","author":"Aldeneh Z.","key":"e_1_3_2_1_2_1"},{"volume-title":"2017 IEEE International Conference on Pervasive Computing and Communications Workshops (PerCom Workshops). IEEE","author":"Alharbi R.","key":"e_1_3_2_1_3_1"},{"volume-title":"Proceedings of INTERSPEECH 2017, 18th Annual Conference of the International Speech Communication Association. ISCA","author":"Amiriparian S.","key":"e_1_3_2_1_4_1"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3130902"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.279181"},{"volume-title":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE","author":"Brueckner R.","key":"e_1_3_2_1_7_1"},{"key":"e_1_3_2_1_8_1","unstructured":"R. Collobert C. Puhrsch and G. Synnaeve. 2016. Wav2Letter: an End-to-End ConvNet-based Speech Recognition System. CoRR abs\/1609.03193 (2016).  R. Collobert C. Puhrsch and G. Synnaeve. 2016. Wav2Letter: an End-to-End ConvNet-based Speech Recognition System. CoRR abs\/1609.03193 (2016)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123371"},{"key":"e_1_3_2_1_10_1","unstructured":"J.-B. Delbrouck and S. Dupont. 2017. Multimodal Compact Bilinear Pooling for Multimodal Neural Machine Translation. CoRR abs\/1703.08084 (2017).  J.-B. Delbrouck and S. Dupont. 2017. Multimodal Compact Bilinear Pooling for Multimodal Neural Machine Translation. CoRR abs\/1703.08084 (2017)."},{"key":"e_1_3_2_1_11_1","unstructured":"R. Dobbs C. Sawers F. Thompson J. Manyika J. R. Woetzel P. Child S. McKenna and A. Spatharou. 2014. Overcoming obesity: an initial economic analysis. https:\/\/goo.gl\/6R7kz2. Accessed: 31-05--2018.  R. Dobbs C. Sawers F. Thompson J. Manyika J. R. Woetzel P. Child S. McKenna and A. Spatharou. 2014. Overcoming obesity: an initial economic analysis. https:\/\/goo.gl\/6R7kz2. Accessed: 31-05--2018."},{"volume-title":"Proceedings of INTERSPEECH 2017, 18th Annual Conference of the International Speech Communication Association. ISCA","author":"Fernando S.","key":"e_1_3_2_1_12_1"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2014.2306773"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"A. Fukui D. H. Park D. Yang A. Rohrbach T. Darrell and M. Rohrbach. 2016. Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding. CoRR abs\/1606.01847 (2016).  A. Fukui D. H. Park D. Yang A. Rohrbach T. Darrell and M. Rohrbach. 2016. Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding. CoRR abs\/1606.01847 (2016).","DOI":"10.18653\/v1\/D16-1044"},{"volume-title":"Compact Bilinear Pooling. In 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE","author":"Gao Y.","key":"e_1_3_2_1_15_1"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.10.013"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"J. Han Z. Zhang N. Cummins F. Ringeval and B. Schuller. 2016. Strength Modelling for Real-World Automatic Continuous Affect Recognition from Audiovisual Signals. Image and Vision Computing Special Issue on Multimodal Sentiment Analysis and Mining in the Wild 65 (Sep. 2016) 76--86.   J. Han Z. Zhang N. Cummins F. Ringeval and B. Schuller. 2016. Strength Modelling for Real-World Automatic Continuous Affect Recognition from Audiovisual Signals. Image and Vision Computing Special Issue on Multimodal Sentiment Analysis and Mining in the Wild 65 (Sep. 2016) 76--86.","DOI":"10.1016\/j.imavis.2016.11.020"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3242969.3243681"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0154486"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"volume-title":"2017 IEEE International Conference on Multimedia and Expo (ICME). IEEE","author":"Huang C. W.","key":"e_1_3_2_1_21_1"},{"volume-title":"Proceedings INTERSPEECH 2015, 16th Annual Conference of the International Speech Communication Association. ISCA","author":"Kaya H.","key":"e_1_3_2_1_22_1"},{"key":"e_1_3_2_1_23_1","unstructured":"J.-H. Kim K. W. On W. Lim J. Kim J. Ha and B.-T. Zhang. 2016. Hadamard Product for Low-rank Bilinear Pooling. CoRR abs\/1610.04325 (2016).  J.-H. Kim K. W. On W. Lim J. Kim J. Ha and B.-T. Zhang. 2016. Hadamard Product for Low-rank Bilinear Pooling. CoRR abs\/1610.04325 (2016)."},{"volume-title":"Low-Rank Bilinear Pooling for Fine-Grained Classification. In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR). IEEE","author":"Kong S.","key":"e_1_3_2_1_24_1"},{"volume-title":"Proceedings of INTERSPEECH 2017, 18th Annual Conference of the International Speech Communication Association. ISCA","author":"Le D.","key":"e_1_3_2_1_25_1"},{"volume-title":"2016 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA). IEEE","author":"Lim W.","key":"e_1_3_2_1_26_1"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"E. A. Lin G. M. Barlow and R. Mathur. 2015. The Health Burden of Obesity. Springer New York New York NY 19--42.  E. A. Lin G. M. Barlow and R. Mathur. 2015. The Health Burden of Obesity. Springer New York New York NY 19--42.","DOI":"10.1007\/978-1-4939-2146-1_2"},{"key":"e_1_3_2_1_28_1","unstructured":"V. Liptchinsky G. Synnaeve and R. Collobert. 2017. Letter-Based Speech Recognition with Gated ConvNets. CoRR abs\/1712.09444 (2017).  V. Liptchinsky G. Synnaeve and R. Collobert. 2017. Letter-Based Speech Recognition with Gated ConvNets. CoRR abs\/1712.09444 (2017)."},{"key":"e_1_3_2_1_29_1","unstructured":"H. Liu H. Ning Q. Mu Y. Zheng J. Zeng L. T. Yang R. Huang and J. Ma. 2017. A review of the smart world. Future Generation Computer Systems (2017). 14 pages in press.  H. Liu H. Ning Q. Mu Y. Zheng J. Zeng L. T. Yang R. Huang and J. Ma. 2017. A review of the smart world. Future Generation Computer Systems (2017). 14 pages in press."},{"volume-title":"Proceedings INTERSPEECH 2015, 16th Annual Conference of the International Speech Communication Association. ISCA","author":"Milde B.","key":"e_1_3_2_1_30_1"},{"volume-title":"2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","author":"Panayotov V.","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/2487575.2487591"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"B. Schuller S. Steidl A. Batliner S. Hantke F. H\u00f6nig J. R. Orozco-Arroyave E. N\u00f6th Y. Zhang and F. Weninger. 2015. The INTERSPEECH 2015 Computational Paralinguistics Challenge: Degree of Nativeness Parkinson's & Eating Condition. In Proceedings INTERSPEECH 2015 16th Annual Conference of the International Speech Communication Association. ISCA Dresden Germany 478--482.  B. Schuller S. Steidl A. Batliner S. Hantke F. H\u00f6nig J. R. Orozco-Arroyave E. N\u00f6th Y. Zhang and F. Weninger. 2015. The INTERSPEECH 2015 Computational Paralinguistics Challenge: Degree of Nativeness Parkinson's & Eating Condition. In Proceedings INTERSPEECH 2015 16th Annual Conference of the International Speech Communication Association. ISCA Dresden Germany 478--482.","DOI":"10.21437\/Interspeech.2015-179"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015349"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.3390\/ijerph14040435"},{"volume-title":"Proceedings 41st IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2016. IEEE, Shanghai, P. R. China, 5200--5204","author":"Trigeorgis G.","key":"e_1_3_2_1_37_1"},{"key":"e_1_3_2_1_38_1","unstructured":"World Health Organization (WHO). 2018. Obesity and Overweight. http:\/\/www.who.int\/mediacentre\/factsheets\/fs311\/en\/. Accessed: 26-03--2018.  World Health Organization (WHO). 2018. Obesity and Overweight. http:\/\/www.who.int\/mediacentre\/factsheets\/fs311\/en\/. Accessed: 26-03--2018."},{"volume-title":"Multi-Modal Factorized Bilinear Pooling With Co-Attention Learning for Visual Question Answering. In The IEEE International Conference on Computer Vision (ICCV). IEEE","year":"1821","author":"Yu Z.","key":"e_1_3_2_1_39_1"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2766843"}],"event":{"name":"ICMI '18: INTERNATIONAL CONFERENCE ON MULTIMODAL INTERACTION","sponsor":["SIGCHI Specialist Interest Group in Computer-Human Interaction of the ACM"],"location":"Boulder CO USA","acronym":"ICMI '18"},"container-title":["Proceedings of the 20th ACM International Conference on Multimodal Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3242969.3243683","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3242969.3243683","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:06:58Z","timestamp":1750212418000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3242969.3243683"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,10,2]]},"references-count":40,"alternative-id":["10.1145\/3242969.3243683","10.1145\/3242969"],"URL":"https:\/\/doi.org\/10.1145\/3242969.3243683","relation":{},"subject":[],"published":{"date-parts":[[2018,10,2]]},"assertion":[{"value":"2018-10-02","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}