{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T05:23:00Z","timestamp":1669872180633},"reference-count":49,"publisher":"Institute of Electronics, Information and Communications Engineers (IEICE)","issue":"6","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEICE Trans. Fundamentals"],"published-print":{"date-parts":[[2021,6,1]]},"DOI":"10.1587\/transfun.2020imp0011","type":"journal-article","created":{"date-parts":[[2020,11,29]],"date-time":"2020-11-29T22:06:45Z","timestamp":1606687605000},"page":"866-875","source":"Crossref","is-referenced-by-count":0,"title":["Domain Adaptive Cross-Modal Image Retrieval via Modality and Domain Translations"],"prefix":"10.1587","volume":"E104.A","author":[{"given":"Rintaro","family":"YANAGI","sequence":"first","affiliation":[{"name":"Graduate School of Information Science and Technology, Hokkaido University"}]},{"given":"Ren","family":"TOGO","sequence":"additional","affiliation":[{"name":"Education and Research Center for Mathematical and Data Science, Hokkaido University"}]},{"given":"Takahiro","family":"OGAWA","sequence":"additional","affiliation":[{"name":"Faculty of Information Science and Technology, Division of Media and Network Technologies, Hokkaido University"}]},{"given":"Miki","family":"HASEYAMA","sequence":"additional","affiliation":[{"name":"Faculty of Information Science and Technology, Division of Media and Network Technologies, Hokkaido University"}]}],"member":"532","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"[1] M. Chen, S. Mao, and Y. Liu, \u201cBig data: A survey,\u201d Mobile Netw. Appl., vol.19, no.2, pp.171-209, 2014. 10.1007\/s11036-013-0489-0","DOI":"10.1007\/s11036-013-0489-0"},{"key":"2","doi-asserted-by":"crossref","unstructured":"[2] K. Juneja, A. Verma, S. Goel, and S. Goel, \u201cA survey on recent image indexing and retrieval techniques for low-level feature extraction in CBIR systems,\u201d Proc. IEEE International Conference on Computational Intelligence and Communication Technology, pp.67-72, 2015. 10.1109\/cict.2015.92","DOI":"10.1109\/CICT.2015.92"},{"key":"3","unstructured":"[3] W. Zhou, H. Li, and Q. Tian, \u201cRecent advance in content-based image retrieval: A literature survey,\u201d arXiv preprint arXiv:1706.06064, 2017."},{"key":"4","unstructured":"[4] K. Wang, Q. Yin, W. Wang, S. Wu, and L. Wang, \u201cA comprehensive survey on cross-modal retrieval,\u201d arXiv:1607.06215, 2016."},{"key":"5","doi-asserted-by":"publisher","unstructured":"[5] L. Wu, R. Jin, and A.K. Jain, \u201cTag completion for image retrieval,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.35, no.3, pp.716-727, 2013. 10.1109\/tpami.2012.124","DOI":"10.1109\/TPAMI.2012.124"},{"key":"6","doi-asserted-by":"publisher","unstructured":"[6] L. Zheng, Y. Yang, and Q. Tian, \u201cSIFT meets CNN: A decade survey of instance retrieval,\u201d IEEE Trans. Pattern Anal. Mach. Intell., vol.40, no.5, pp.1224-1244, 2018. 10.1109\/tpami.2017.2709749","DOI":"10.1109\/TPAMI.2017.2709749"},{"key":"7","doi-asserted-by":"crossref","unstructured":"[7] H. Venkateswara, J. Eusebio, S. Chakraborty, and S. Panchanathan, \u201cDeep hashing network for unsupervised domain adaptation,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.5018-5027, 2017. 10.1109\/cvpr.2017.572","DOI":"10.1109\/CVPR.2017.572"},{"key":"8","unstructured":"[8] G. Csurka, \u201cDomain adaptation for visual applications: A comprehensive survey,\u201d arXiv preprint arXiv:1702.05374, 2017."},{"key":"9","doi-asserted-by":"crossref","unstructured":"[9] Y. Jing, Y. Yang, Z. Feng, J. Ye, Y. Yu, and M. Song, \u201cNeural style transfer: A review,\u201d IEEE Trans. Vis. Comput. Graphics, vol.26, no.11, pp.3365-3385, 2020. 10.1109\/tvcg.2019.2921336","DOI":"10.1109\/TVCG.2019.2921336"},{"key":"10","doi-asserted-by":"crossref","unstructured":"[10] J. Agnese, J. Herrera, H. Tao, and X. Zhu, \u201cA survey and taxonomy of adversarial neural networks for text-to-image synthesis,\u201d arXiv preprint arXiv:1910.09399, 2019.","DOI":"10.1002\/widm.1345"},{"key":"11","doi-asserted-by":"crossref","unstructured":"[11] R. Yanagi, R. Togo, T. Ogawa, and M. Haseyama, \u201cScene retrieval from multiple resolution generated images based on text-to-image gan,\u201d 2019 IEEE International Symposium on Circuits and Systems, pp.1-5, 2019. 10.1109\/iscas.2019.8702155","DOI":"10.1109\/ISCAS.2019.8702155"},{"key":"12","doi-asserted-by":"crossref","unstructured":"[12] R. Yanagi, R. Togo, T. Ogawa, and M. Haseyama, \u201cScene retrieval for video summarization based on text-to-image gan,\u201d 2019 IEEE International Conference on Image Processing, pp.1825-1829, 2019. 10.1109\/icip.2019.8803177","DOI":"10.1109\/ICIP.2019.8803177"},{"key":"13","doi-asserted-by":"publisher","unstructured":"[13] R. Yanagi, R. Togo, T. Ogawa, and M. Haseyama, \u201cQuery is GAN: Scene retrieval with attentional text-to-image generative adversarial network,\u201d IEEE Access, vol.7, pp.153183-153193, 2019. 10.1109\/access.2019.2947409","DOI":"10.1109\/ACCESS.2019.2947409"},{"key":"14","doi-asserted-by":"publisher","unstructured":"[14] R. Yanagi, R. Togo, T. Ogawa, and M. Haseyama, \u201cEnhancing cross-modal retrieval based on modality-specific and embedding spaces,\u201d IEEE Access, vol.8, no.1, pp.96777-96786, 2020. 10.1109\/access.2020.2995815","DOI":"10.1109\/ACCESS.2020.2995815"},{"key":"15","doi-asserted-by":"crossref","unstructured":"[15] H. Hotelling, \u201cRelations between two sets of variates,\u201d Breakthroughs in Statistics, pp.162-190, Springer, New York, NY, 1992. 10.1007\/978-1-4612-4380-9_14","DOI":"10.1007\/978-1-4612-4380-9_14"},{"key":"16","unstructured":"[16] R. Kiros, R. Salakhutdinov, and R.S. Zemel, \u201cUnifying visual-semantic embeddings with multimodal neural language models,\u201d arXiv:1411.2539, 2014."},{"key":"17","unstructured":"[17] F. Faghri, D.J. Fleet, J.R. Kiros, G.B. Toronto, and S. Fidler, \u201cVSE++: Improving visual-semantic embeddings with hard negatives,\u201d arXiv:1707.05612, 2017."},{"key":"18","doi-asserted-by":"crossref","unstructured":"[18] Y. Zhang and H. Lu, \u201cDeep cross-modal projection learning for image-text matching,\u201d Proc. IEEE European Conference on Computer Vision, pp.686-701, 2018.","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"19","doi-asserted-by":"crossref","unstructured":"[19] J. Gu, J. Cai, S.R. Joty, L. Niu, and G. Wang, \u201cLook, imagine and match: Improving textual-visual cross-modal retrieval with generative models,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.7181-7189, 2018. 10.1109\/cvpr.2018.00750","DOI":"10.1109\/CVPR.2018.00750"},{"key":"20","doi-asserted-by":"crossref","unstructured":"[20] Z. Ji, H. Wang, J. Han, and Y. Pang, \u201cSaliency-guided attention network for image-sentence matching,\u201d Proc. IEEE International Conference on Computer Vision, pp.5754-5763, 2019. 10.1109\/iccv.2019.00585","DOI":"10.1109\/ICCV.2019.00585"},{"key":"21","doi-asserted-by":"crossref","unstructured":"[21] S. Kullback and R.A. Leibler, \u201cOn information and sufficiency,\u201d Ann. Math. Statist., vol.22, no.1, pp.79-86, 1951. 10.1214\/aoms\/1177729694","DOI":"10.1214\/aoms\/1177729694"},{"key":"22","doi-asserted-by":"crossref","unstructured":"[22] J. Song, Y.Z. Song, T. Xiang, and T.M. Hospedales, \u201cFine-grained image retrieval: The text\/sketch input dilemma,\u201d Proc. British Machine Vision Conference, pp.1-12, 2017. 10.5244\/c.31.45","DOI":"10.5244\/C.31.45"},{"key":"23","doi-asserted-by":"publisher","unstructured":"[23] X. Gu, Y. Wong, L. Shou, P. Peng, G. Chen, and M.S. Kankanhalli, \u201cMulti-modal and multi-domain embedding learning for fashion retrieval and analysis,\u201d IEEE Trans. Multimedia, vol.21, no.6, pp.1524-1537, 2018. 10.1109\/tmm.2018.2876822","DOI":"10.1109\/TMM.2018.2876822"},{"key":"24","unstructured":"[24] I.J. Goodfellow, J. Pouget-abadie, M. Mirza, B. Xu, D. Warde-farley, S. Ozair, A. Courville, and Y. Bengio, \u201cGenerative adversarial nets,\u201d Proc. Advances in Neural Information Processing Systems, pp.2672-2680, 2014."},{"key":"25","unstructured":"[25] S. Reed, Z. Akata, S. Mohan, S. Tenka, B. Schiele, and H. Lee, \u201cLearning what and where to draw,\u201d Proc. Advances in Neural Information Processing Systems, pp.217-225, 2016."},{"key":"26","unstructured":"[26] S. Reed, Z. Akata, X. Yan, L. Logeswaran, B. Schiele, and H. Lee, \u201cGenerative adversarial text to image synthesis,\u201d arXiv:1605.05396, 2016."},{"key":"27","unstructured":"[27] A. Dash, J.C.B. Gamboa, S. Ahmed, M. Liwicki, and M.Z. Afzal, \u201cTAC-GAN-Text conditioned auxiliary classifier generative adversarial network,\u201d arXiv:1703.06412, 2017."},{"key":"28","doi-asserted-by":"crossref","unstructured":"[28] H. Zhang, T. Xu, H. Li, S. Zhang, X. Wang, X. Huang, and D. Metaxas, \u201cStackGAN: Text to photo-realistic image synthesis with stacked generative adversarial networks,\u201d Proc. IEEE Conference on Computer Vision, pp.5907-5915, 2017. 10.1109\/iccv.2017.629","DOI":"10.1109\/ICCV.2017.629"},{"key":"29","doi-asserted-by":"crossref","unstructured":"[29] Z. Zhang, Y. Xie, and L. Yang, \u201cPhotographic text-to-image synthesis with a hierarchically-nested adversarial network,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.6199-6208, 2018. 10.1109\/cvpr.2018.00649","DOI":"10.1109\/CVPR.2018.00649"},{"key":"30","doi-asserted-by":"crossref","unstructured":"[30] T. Xu, P. Zhang, Q. Huang, H. Zhang, Z. Gan, X. Huang, and X. He, \u201cAttnGAN: Fine-grained text to image generation with attentional generative adversarial networks,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.1316-1324, 2018. 10.1109\/cvpr.2018.00143","DOI":"10.1109\/CVPR.2018.00143"},{"key":"31","doi-asserted-by":"crossref","unstructured":"[31] N. Bodla, G. Hua, and R. Chellappa, \u201cSemi-supervised FusedGAN for conditional image generation,\u201d arXiv:1801.05551, 2018.","DOI":"10.1007\/978-3-030-01228-1_41"},{"key":"32","doi-asserted-by":"crossref","unstructured":"[32] M. Zhu, P. Pan, W. Chen, and Y. Yang, \u201cDM-GAN: Dynamic memory generative adversarial networks for text-to-image synthesis,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.5802-5810, 2019. 10.1109\/cvpr.2019.00595","DOI":"10.1109\/CVPR.2019.00595"},{"key":"33","doi-asserted-by":"crossref","unstructured":"[33] T. Qiao, J. Zhang, D. Xu, and D. Tao, \u201cMirrorGAN: Learning text-to-image generation by redescription,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.1505-1514, 2019. 10.1109\/cvpr.2019.00160","DOI":"10.1109\/CVPR.2019.00160"},{"key":"34","doi-asserted-by":"crossref","unstructured":"[34] B. Gooch and A. Gooch, Non-Photorealistic Rendering, CRC Press, 2001. 10.1201\/9781439864173","DOI":"10.1201\/9781439864173"},{"key":"35","unstructured":"[35] T. Strothotte and S. Schlechtweg, Non-Photorealistic Computer Graphics: Modeling, Rendering, and Animation, Morgan Kaufmann, 2002. 10.1016\/B978-1-55860-787-3.X5000-2"},{"key":"36","doi-asserted-by":"crossref","unstructured":"[36] P. Isola, J.Y. Zhu, T. Zhou, and A.A. Efros, \u201cImage-to-image translation with conditional adversarial networks,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.1125-1134, 2017. 10.1109\/cvpr.2017.632","DOI":"10.1109\/CVPR.2017.632"},{"key":"37","doi-asserted-by":"crossref","unstructured":"[37] J.Y. Zhu, T. Park, P. Isola, and A.A. Efros, \u201cUnpaired image-to-image translation using cycle-consistent adversarial networks,\u201d Proc. IEEE International Conference on Computer Vision, pp.2223-2232, 2017. 10.1109\/iccv.2017.244","DOI":"10.1109\/ICCV.2017.244"},{"key":"38","unstructured":"[38] M.Y. Liu, T. Breuel, and J. Kautz, \u201cUnsupervised image-to-image translation networks,\u201d Proc. Advances in Neural Information Processing Systems, pp.700-708, 2017."},{"key":"39","unstructured":"[39] T. Kim, M. Cha, H. Kim, J.K. Lee, and J. Kim, \u201cLearning to discover cross-domain relations with generative adversarial networks,\u201d Proc. International Conference on Machine Learning, pp.1857-1865, 2017."},{"key":"40","unstructured":"[40] I. Vendrov, R. Kiros, S. Fidler, and R. Urtasun, \u201cOrder-embeddings of images and language,\u201d Proc. International Conference on Learning Representations, pp.1-12, 2016."},{"key":"41","doi-asserted-by":"publisher","unstructured":"[41] Z. Zheng, L. Zheng, M. Garrett, Y. Yang, and Y.D. Shen, \u201cDual-path convolutional image-text embedding with instance loss,\u201d ACM Trans. Multimedia Comput. Commun. Appl., vol.16, no.2, 2020. 10.1145\/3383184","DOI":"10.1145\/3383184"},{"key":"42","doi-asserted-by":"crossref","unstructured":"[42] Y. Liu, Y. Guo, E.M. Bakker, and M.S. Lew, \u201cLearning a recurrent residual fusion network for multimodal matching,\u201d Proc. IEEE International Conference on Computer Vision, pp.4107-4116, 2017. 10.1109\/iccv.2017.442","DOI":"10.1109\/ICCV.2017.442"},{"key":"43","doi-asserted-by":"crossref","unstructured":"[43] Y. Huang, Q. Wu, C. Song, and L. Wang, \u201cLearning semantic concepts and order for image and sentence matching,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.6163-6171, 2018. 10.1109\/cvpr.2018.00645","DOI":"10.1109\/CVPR.2018.00645"},{"key":"44","doi-asserted-by":"crossref","unstructured":"[44] Y. Song and M. Soleymani, \u201cPolysemous visual-semantic embedding for cross-modal retrieval,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.1979-1978, 2019. 10.1109\/cvpr.2019.00208","DOI":"10.1109\/CVPR.2019.00208"},{"key":"45","doi-asserted-by":"crossref","unstructured":"[45] G. Huang, Z. Liu, L. Van Der Maaten, and K.Q. Weinberger, \u201cDensely connected convolutional networks,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.4700-4708, 2017. 10.1109\/cvpr.2017.243","DOI":"10.1109\/CVPR.2017.243"},{"key":"46","doi-asserted-by":"crossref","unstructured":"[46] T.Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ramanan, P. Doll\u00e1r, and C.L. Zitnick, \u201cMicrosoft COCO: Common objects in context,\u201d Proc. IEEE European Conference on Computer Vision, pp.740-755, 2014. 10.1007\/978-3-319-10602-1_48","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"47","doi-asserted-by":"crossref","unstructured":"[47] C.L. Zitnick and D. Parikh, \u201cBringing semantics into focus using visual abstraction,\u201d Proc. IEEE Conference on Computer Vision and Pattern Recognition, pp.3009-3016, 2013. 10.1109\/cvpr.2013.387","DOI":"10.1109\/CVPR.2013.387"},{"key":"48","doi-asserted-by":"crossref","unstructured":"[48] M. Stefanini, M. Cornia, L. Baraldi, M. Corsini, and R. Cucchiara, \u201cArtpedia: A new visual-semantic dataset with visual and contextual sentences in the artistic domain,\u201d International Conference on Image Analysis and Processing, pp.729-740, 2019. 10.1007\/978-3-030-30645-8_66","DOI":"10.1007\/978-3-030-30645-8_66"},{"key":"49","unstructured":"[49] R. Geirhos, P. Rubisch, C. Michaelis, M. Bethge, F.A. Wichmann, and W. Brendel, \u201cImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness,\u201d Proc. IEEE International Conference on Learning Representations, pp.1-22, 2019."}],"container-title":["IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E104.A\/6\/E104.A_2020IMP0011\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,30]],"date-time":"2022-11-30T16:35:34Z","timestamp":1669826134000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/transfun\/E104.A\/6\/E104.A_2020IMP0011\/_article"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,1]]},"references-count":49,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021]]}},"URL":"https:\/\/doi.org\/10.1587\/transfun.2020imp0011","relation":{},"ISSN":["0916-8508","1745-1337"],"issn-type":[{"value":"0916-8508","type":"print"},{"value":"1745-1337","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,1]]}}}