{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T14:04:29Z","timestamp":1762956269140,"version":"3.44.0"},"reference-count":33,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/ivcnz48456.2019.8960960","type":"proceedings-article","created":{"date-parts":[[2020,1,16]],"date-time":"2020-01-16T21:21:23Z","timestamp":1579209683000},"page":"1-5","source":"Crossref","is-referenced-by-count":20,"title":["Are These Birds Similar: Learning Branched Networks for Fine-grained Representations"],"prefix":"10.1109","author":[{"given":"Shah","family":"Nawaz","sequence":"first","affiliation":[{"name":"University of Insubria,Department of Theoretical and Applied Science,Varese,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alessandro","family":"Calefati","sequence":"additional","affiliation":[{"name":"University of Insubria,Department of Theoretical and Applied Science,Varese,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Moreno","family":"Caraffini","sequence":"additional","affiliation":[{"name":"University of Insubria,Department of Theoretical and Applied Science,Varese,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nicola","family":"Landro","sequence":"additional","affiliation":[{"name":"University of Insubria,Department of Theoretical and Applied Science,Varese,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ignazio","family":"Gallo","sequence":"additional","affiliation":[{"name":"University of Insubria,Department of Theoretical and Applied Science,Varese,Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.136"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.170"},{"key":"ref31","first-page":"3856","article-title":"Dynamic routing between capsules","author":"sabour","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2017.8019371"},{"article-title":"A bert baseline for the natural questions","year":"2019","author":"alberti","key":"ref10"},{"article-title":"The Caltech-UCSD Birds-200-2011 Dataset","year":"2011","author":"wah","key":"ref11"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.96"},{"key":"ref13","first-page":"3466","article-title":"A codebook-free and annotation-free approach for fine-grained image categorization","author":"yao","year":"2012","journal-title":"2012 IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299194"},{"key":"ref15","first-page":"842","article-title":"The application of two-level attention models in deep convolutional neural network for fine-grained image classification","author":"xiao","year":"2015","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.129"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10590-1_54"},{"article-title":"Gated multimodal units for information fusion","year":"2017","author":"arevalo","key":"ref18"},{"key":"ref19","first-page":"5198","article-title":"Efficient large-scale multi-modal classification","author":"kiela","year":"2018","journal-title":"Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_5"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00879"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.110"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.128"},{"article-title":"Resnet in resnet: Generalizing residual architectures","year":"2016","author":"targ","key":"ref29"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.132"},{"key":"ref8","article-title":"Multimodal machine learning: A survey and taxonomy","author":"baltru\u0161aitis","year":"2018","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.775"},{"key":"ref2","article-title":"The caltech-ucsd birds-200-2011 dataset","author":"wah","year":"2011","journal-title":"California Institute of Technology"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_26"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.259"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2017.326"},{"key":"ref22","first-page":"6","article-title":"Bottom-up and top-down attention for image captioning and visual question answering","volume":"3","author":"anderson","year":"2018","journal-title":"CVPR"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1044"},{"key":"ref24","article-title":"Aiding intra-text representations with visual context for multimodal named entity recognition","author":"arshad","year":"2019","journal-title":"International Conference on Document Analysis and Recognition"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.11962","article-title":"Adaptive co-attention network for named entity recognition in tweets","author":"zhang","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"article-title":"Vse++: Improving visual-semantic embeddings with hard negatives","year":"2017","author":"faghri","key":"ref26"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"}],"event":{"name":"2019 International Conference on Image and Vision Computing New Zealand (IVCNZ)","start":{"date-parts":[[2019,12,2]]},"location":"Dunedin, New Zealand","end":{"date-parts":[[2019,12,4]]}},"container-title":["2019 International Conference on Image and Vision Computing New Zealand (IVCNZ)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8955481\/8960955\/08960960.pdf?arnumber=8960960","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T19:22:01Z","timestamp":1756754521000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8960960\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":33,"URL":"https:\/\/doi.org\/10.1109\/ivcnz48456.2019.8960960","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}