{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,26]],"date-time":"2026-01-26T20:00:25Z","timestamp":1769457625040,"version":"3.49.0"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2019,8,5]],"date-time":"2019-08-05T00:00:00Z","timestamp":1564963200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2019,8,5]],"date-time":"2019-08-05T00:00:00Z","timestamp":1564963200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/100015599","name":"Toyota Research Institute","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100015599","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1007\/s11263-019-01205-0","type":"journal-article","created":{"date-parts":[[2019,8,5]],"date-time":"2019-08-05T14:02:53Z","timestamp":1565013773000},"page":"620-641","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Jointly Discovering Visual Objects and Spoken Words from Raw Sensory Input"],"prefix":"10.1007","volume":"128","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0206-0253","authenticated-orcid":false,"given":"David","family":"Harwath","sequence":"first","affiliation":[]},{"given":"Adri\u00e0","family":"Recasens","sequence":"additional","affiliation":[]},{"given":"D\u00eddac","family":"Sur\u00eds","sequence":"additional","affiliation":[]},{"given":"Galen","family":"Chuang","sequence":"additional","affiliation":[]},{"given":"Antonio","family":"Torralba","sequence":"additional","affiliation":[]},{"given":"James","family":"Glass","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,8,5]]},"reference":[{"key":"1205_CR1","doi-asserted-by":"crossref","unstructured":"Alishahi, A., Barking, M., & Chrupala, G. (2017). Encoding of phonology in a recurrent neural model of grounded speech. In Proceedings of the ACL conference on natural language learning (CoNLL).","DOI":"10.18653\/v1\/K17-1037"},{"key":"1205_CR2","doi-asserted-by":"crossref","unstructured":"Antol, S., Agrawal, A., Lu, J., Mitchell, M., Batra, D., Lawrence, Z., et al. (2015). VQA: Visual question answering. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.279"},{"key":"1205_CR3","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., & Zisserman, A. (2017). Look, listen, and learn. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.73"},{"key":"1205_CR4","unstructured":"Aytar, Y., Vondrick, C., & Torralba, A. (2016). Soundnet: Learning sound representations from unlabeled video. In Proceedings of the neural information processing systems (NeurIPS)."},{"key":"1205_CR5","unstructured":"Bergamo, A., Bazzani, L., Anguelov, D., & Torresani, L. (2014). Self-taught object localization with deep networks. CoRR. arXiv:1409.3964 ."},{"key":"1205_CR6","first-page":"737","volume-title":"Advances in neural information processing systems","author":"J Bromley","year":"1994","unstructured":"Bromley, J., Guyon, I., LeCun, Y., S\u00e4ckinger, E., & Shah, R. (1994). Signature verification using a \u201csiamese\u201d time delay neural network. In J. D. Cowan, G. Tesauro, & J. Alspector (Eds.), Advances in neural information processing systems (Vol. 6, pp. 737\u2013744). Burlington: Morgan-Kaufmann."},{"key":"1205_CR7","doi-asserted-by":"crossref","unstructured":"Cho, M., Kwak, S., Schmid, C., & Ponce, J. (2015). Unsupervised object discovery and localization in the wild: Part-based matching with bottom-up region proposals. In Proceedings of IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298724"},{"key":"1205_CR8","doi-asserted-by":"crossref","unstructured":"Chrupala, G., Gelderloos, L., & Alishahi, A. (2017). Representations of language in a model of visually grounded speech signal. In Proceedings of the annual meeting of the association for computational linguistics (ACL).","DOI":"10.18653\/v1\/P17-1057"},{"issue":"1","key":"1205_CR9","doi-asserted-by":"publisher","first-page":"189","DOI":"10.1109\/TPAMI.2016.2535231","volume":"39","author":"R Cinbis","year":"2016","unstructured":"Cinbis, R., Verbeek, J., & Schmid, C. (2016). Weakly supervised object localization with multi-fold multiple instance learning. IEEE Transactions on Pattern Analysis and Machine Intelligence, 39(1), 189\u2013203.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1205_CR10","doi-asserted-by":"crossref","unstructured":"de Vries, H., Strub, F., Chandar, S., Pietquin, O., Larochelle, H., & Courville, A. C. (2017). Guesswhat?! Visual object discovery through multi-modal dialogue. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.475"},{"key":"1205_CR11","unstructured":"Doersch, C., Gupta, A., & Efros, A. A. (2015). Unsupervised visual representation learning by context prediction. CoRR. arXiv:1505.05192 ."},{"key":"1205_CR12","doi-asserted-by":"crossref","unstructured":"Drexler, J., & Glass, J. (2017). Analysis of audio-visual features for unsupervised speech recognition. In Proceedings of the grounded language understanding workshop.","DOI":"10.21437\/GLU.2017-12"},{"key":"1205_CR13","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1016\/j.cognition.2017.11.008","volume":"173","author":"E Dupoux","year":"2018","unstructured":"Dupoux, E. (2018). Cognitive science in the era of artificial intelligence: A roadmap for reverse-engineering the infant language-learner. Cognition, 173, 43\u201359.","journal-title":"Cognition"},{"key":"1205_CR14","unstructured":"Faghri, F., Fleet, D. J., Kiros, J. R., & Fidler, S. (2018). Vse++: Improving visual-semantic embeddings with hard negatives. In Proceedings of the British machine vision conference (BMVC)."},{"key":"1205_CR15","doi-asserted-by":"crossref","unstructured":"Fang, H., Gupta, S., Iandola, F., Rupesh, S., Deng, L., Dollar, P., et al. (2015). From captions to visual concepts and back. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"1205_CR16","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/7287.001.0001","volume-title":"WordNet: An electronic lexical database","author":"C Fellbaum","year":"1998","unstructured":"Fellbaum, C. (1998). WordNet: An electronic lexical database. Bradford: Bradford Books."},{"key":"1205_CR17","unstructured":"Gao, H., Mao, J., Zhou, J., Huang, Z., & Yuille, A. (2015). Are you talking to a machine? Dataset and methods for multilingual image question answering. In Proceedings of the neural information processing systems (NeurIPS)."},{"key":"1205_CR18","unstructured":"Gelderloos, L., & Chrupala, G. (2016). From phonemes to images: Levels of representation in a recurrent neural model of visually-grounded language learning. arXiv:1610.03342 ."},{"key":"1205_CR19","doi-asserted-by":"crossref","unstructured":"Gemmeke, J. F., Ellis, D. P. W., Freedman, D., Jansen, A., Lawrence, W., Moore, R. C., et al. (2017). Audio set: An ontology and human-labeled dataset for audio events. In Proceedings of the international conference on acoustics, speech and signal processing (ICASSP).","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"1205_CR20","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2013). Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)."},{"key":"1205_CR21","unstructured":"Gu\u00e9rin, J., Gibaru, O., Thiery, S., & Nyiri, E. (2017). CNN features are also great at unsupervised classification. CoRR. arXiv:1707.01700 ."},{"key":"1205_CR22","doi-asserted-by":"crossref","unstructured":"Harwath, D., & Glass, J. (2017). Learning word-like units from joint audio-visual analysis. In Proceedings of the annual meeting of the association for computational linguistics (ACL).","DOI":"10.18653\/v1\/P17-1047"},{"key":"1205_CR23","doi-asserted-by":"crossref","unstructured":"Harwath, D., Recasens, A., Sur\u00eds, D., Chuang, G., Torralba, A., & Glass, J. (2018). Jointly discovering visual objects and spoken words from raw sensory input. In Proceedings of the IEEE European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-030-01231-1_40"},{"key":"1205_CR24","unstructured":"Harwath, D., Torralba, A., & Glass, J. R. (2016). Unsupervised learning of spoken language with visual context. In Proceeding of the neural information processing systems (NeurIPS)."},{"key":"1205_CR25","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Deep residual learning for image recognition. CoRR. arXiv:1512.03385 ."},{"key":"1205_CR26","unstructured":"Ioffe, S., & Szegedy, C. (2015). Batch normalization: Accelerating deep network training by reducing internal covariate shift. In Proceedings of the international conference on machine learning (ICML)."},{"key":"1205_CR27","doi-asserted-by":"crossref","unstructured":"Jansen, A., Church, K., & Hermansky, H. (2010). Toward spoken term discovery at scale with zero resources. In Proceedings of the annual conference of international speech communication association (INTERSPEECH).","DOI":"10.21437\/Interspeech.2010-483"},{"key":"1205_CR28","doi-asserted-by":"crossref","unstructured":"Jansen, A., Plakal, M., Pandya, R., Ellis, D. P., Hershey, S., Liu, J., et al. (2018). Unsupervised learning of semantic audio representations. In Proceedings of the international conference on acoustics, speech and signal processing (ICASSP).","DOI":"10.1109\/ICASSP.2018.8461684"},{"key":"1205_CR29","doi-asserted-by":"crossref","unstructured":"Jansen, A., & Van Durme, B. (2011). Efficient spoken term discovery using randomized algorithms. In Proceedings of the IEEE workshop on automfatic speech recognition and understanding (ASRU).","DOI":"10.1109\/ASRU.2011.6163965"},{"key":"1205_CR30","doi-asserted-by":"crossref","unstructured":"Johnson, J., Karpathy, A., & Fei-Fei, L. (2016). Densecap: Fully convolutional localization networks for dense captioning. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.494"},{"key":"1205_CR31","doi-asserted-by":"crossref","unstructured":"Kamper, H., Elsner, M., Jansen, A., & Goldwater, S. (2015). Unsupervised neural network based feature extraction using weak top-down constraints. In Proceedings of the international conference on acoustics, speech and signal processing (ICASSP).","DOI":"10.1109\/ICASSP.2015.7179087"},{"issue":"4","key":"1205_CR32","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1109\/TASLP.2016.2517567","volume":"24","author":"H Kamper","year":"2016","unstructured":"Kamper, H., Jansen, A., & Goldwater, S. (2016). Unsupervised word segmentation and lexicon discovery using acoustic word embeddings. IEEE Transactions on Audio, Speech and Language Processing, 24(4), 669\u2013679.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"1205_CR33","doi-asserted-by":"crossref","unstructured":"Kamper, H., Settle, S., Shakhnarovich, G., & Livescu, K. (2017). Visually grounded learning of keyword prediction from untranscribed speech. In Proceedings of the annual conference of international speech communication association (INTERSPEECH).","DOI":"10.21437\/Interspeech.2017-502"},{"key":"1205_CR34","doi-asserted-by":"crossref","unstructured":"Karpathy, A., & Fei-Fei, L. (2015). Deep visual-semantic alignments for generating image descriptions. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"1205_CR35","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Proceedings of the neural information processing systems (NeurIPS)."},{"issue":"11","key":"1205_CR36","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., & Haffner, P. (1998). Gradient-based learning applied to document recognition. Proceedings of the IEEE, 86(11), 2278\u20132324.","journal-title":"Proceedings of the IEEE"},{"key":"1205_CR37","unstructured":"Lee, C., & Glass, J. (2012). A nonparametric Bayesian approach to acoustic model discovery. In Proceedings of the annual meeting of the association for computational linguistics (ACL)."},{"key":"1205_CR38","unstructured":"Lewis, M. P., Simon, G. F., & Fennig, C. D. (2016). Ethnologue: Languages of the World (19th ed.). SIL International. Online version: http:\/\/www.ethnologue.com ."},{"key":"1205_CR39","unstructured":"Lin, T., Marie, M., Belongie, S., Bourdev, L., Girshick, R., Perona, P., et al. (2015). Microsoft COCO: Common objects in context. In arXiv:1405.0312 ."},{"key":"1205_CR40","unstructured":"Malinowski, M., & Fritz, M. (2014). A multi-world approach to question answering about real-world scenes based on uncertain input. In Proceedings of the neural information processing systems (NeurIPS)."},{"key":"1205_CR41","doi-asserted-by":"crossref","unstructured":"Malinowski, M., Rohrbach, M., & Fritz, M. (2015). Ask your neurons: A neural-based approach to answering questions about images. In Proceedings of the IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.9"},{"key":"1205_CR42","doi-asserted-by":"crossref","unstructured":"Ondel, L., Burget, L., & Cernocky, J. (2016) Variational inference for acoustic unit discovery. In 5th Workshop on spoken language technology for under-resourced language.","DOI":"10.1016\/j.procs.2016.04.033"},{"key":"1205_CR43","doi-asserted-by":"crossref","unstructured":"Owens, A., Isola, P., McDermott, J. H., Torralba, A., Adelson, E. H., & Freeman, W. T. (2016a) Visually indicated sounds. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.264"},{"key":"1205_CR44","doi-asserted-by":"crossref","unstructured":"Owens, A., Wu, J., McDermott, J. H., Freeman, W. T., & Torralba, A. (2016b). Ambient sound provides supervision for visual learning. In Proceedings of the IEEE European conference on computer vision (ECCV).","DOI":"10.1007\/978-3-319-46448-0_48"},{"issue":"1","key":"1205_CR45","doi-asserted-by":"publisher","first-page":"186","DOI":"10.1109\/TASL.2007.909282","volume":"16","author":"A Park","year":"2008","unstructured":"Park, A., & Glass, J. (2008). Unsupervised pattern discovery in speech. IEEE Transactions on Audio, Speech and Language Processing, 16(1), 186\u2013197.","journal-title":"IEEE Transactions on Audio, Speech and Language Processing"},{"key":"1205_CR46","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016) You only look once: Unified, real-time object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.91"},{"key":"1205_CR47","unstructured":"Reed, S. E., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., & Lee, H. (2016). Generative adversarial text to image synthesis. CoRR. arXiv:1605.05396 ."},{"key":"1205_CR48","unstructured":"Ren, M., Kiros, R., & Zemel, R. (2015). Exploring models and data for image question answering. In Proceedings of the neural information processing systems (NeurIPS)."},{"key":"1205_CR49","doi-asserted-by":"crossref","unstructured":"Renshaw, D., Kamper, H., Jansen, A., & Goldwater, S. (2015). A comparison of neural network methods for unsupervised representation learning on the zero resource speech challenge. In Proceedings of the annual conference of international speech communication association (INTERSPEECH).","DOI":"10.21437\/Interspeech.2015-644"},{"issue":"2","key":"1205_CR50","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1109\/TMM.2003.811618","volume":"5","author":"D Roy","year":"2003","unstructured":"Roy, D. (2003). Grounded spoken language acquisition: Experiments in word learning. IEEE Transactions on Multimedia, 5(2), 197\u2013209.","journal-title":"IEEE Transactions on Multimedia"},{"key":"1205_CR51","doi-asserted-by":"publisher","first-page":"113","DOI":"10.1207\/s15516709cog2601_4","volume":"26","author":"D Roy","year":"2002","unstructured":"Roy, D., & Pentland, A. (2002). Learning words from sights and sounds: A computational model. Cognitive Science, 26, 113\u2013146.","journal-title":"Cognitive Science"},{"issue":"3","key":"1205_CR52","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). ImageNet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y .","journal-title":"International Journal of Computer Vision"},{"key":"1205_CR53","doi-asserted-by":"crossref","unstructured":"Russell, B., Efros, A., Sivic, J., Freeman, W., & Zisserman, A. (2006). Using multiple segmentations to discover objects and their extent in image collections. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2006.326"},{"key":"1205_CR54","unstructured":"Shih, K. J., Singh, S., & Hoiem, D. (2015). Where to look: Focus regions for visual question answering. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)."},{"key":"1205_CR55","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. CoRR. arXiv:1409.1556 ."},{"issue":"1","key":"1205_CR56","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1016\/0364-0213(90)90025-R","volume":"14","author":"ES Spelke","year":"1990","unstructured":"Spelke, E. S. (1990). Principles of object perception. Cognitive Science, 14(1), 29\u201356. https:\/\/doi.org\/10.1016\/0364-0213(90)90025-R .","journal-title":"Cognitive Science"},{"key":"1205_CR57","doi-asserted-by":"crossref","unstructured":"Thiolliere, R., Dunbar, E., Synnaeve, G., Versteegh, M., & Dupoux, E. (2015). A hybrid dynamic time warping-deep neural network architecture for unsupervised acoustic modeling. In Proceedings of the annual conference of international speech communication association (INTERSPEECH).","DOI":"10.21437\/Interspeech.2015-640"},{"key":"1205_CR58","unstructured":"Thomee, B., Shamma, D. A., Friedland, G., Elizalde, B., Ni, K., Poland, D., et al. (2015). The new data and new challenges in multimedia research. CoRR. arXiv:1503.01817 ."},{"key":"1205_CR59","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., & Erhan, D. (2015). Show and tell: A neural image caption generator. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"1205_CR60","unstructured":"Weber, M., Welling, M., & Perona, P. (2010). Towards automatic discovery of object categories. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)."},{"key":"1205_CR61","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhudinov, R., et al. (2015). Show, attend and tell: Neural image caption generation with visual attention. In Proceedings of the international conference on machine learning (ICML)."},{"key":"1205_CR62","doi-asserted-by":"crossref","unstructured":"Zhang, T., Ramakrishnan, R., & Livny, M. (1996). Birch: an efficient data clustering method for very large databases. In ACM SIGMOD international conference on management of data (pp. 103\u2013114).","DOI":"10.1145\/235968.233324"},{"key":"1205_CR63","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Salakhutdinov, R., Chang, H. A., & Glass, J. (2012). Resource configurable spoken query detection using deep Boltzmann machines. In Proceedings of the international conference on acoustics, speech and signal processing (ICASSP).","DOI":"10.1109\/ICASSP.2012.6289082"},{"key":"1205_CR64","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., & Torralba, A. (2015). Object detectors emerge in deep scene CNNs. In Proceedings of the international conference on learning representations (ICLR)."},{"key":"1205_CR65","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., & Torralba, A. (2016). Learning deep features for discriminative localization. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.319"},{"key":"1205_CR66","unstructured":"Zhou, B., Lapedriza, A., Xiao, J., Torralba, A., & Oliva, A. (2014). Learning deep features for scene recognition using places database. In Proceedings of the neural information processing systems (NeurIPS)."},{"key":"1205_CR67","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., & Torralba, A. (2017). Scene parsing through ADE20K dataset. In Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.544"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01205-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01205-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01205-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,25]],"date-time":"2022-09-25T05:17:22Z","timestamp":1664083042000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01205-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,8,5]]},"references-count":67,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2020,3]]}},"alternative-id":["1205"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01205-0","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,8,5]]},"assertion":[{"value":"1 February 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 August 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}