{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,8]],"date-time":"2025-06-08T04:01:05Z","timestamp":1749355265659,"version":"3.41.0"},"reference-count":90,"publisher":"Springer Science and Business Media LLC","issue":"5-6","license":[{"start":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T00:00:00Z","timestamp":1732752000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T00:00:00Z","timestamp":1732752000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s13042-024-02477-w","type":"journal-article","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T03:46:11Z","timestamp":1732765571000},"page":"3729-3746","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dual variational network for unsupervised cross-modal hashing"],"prefix":"10.1007","volume":"16","author":[{"given":"Xuran","family":"Deng","sequence":"first","affiliation":[]},{"given":"Zhihang","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Pandeng","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,28]]},"reference":[{"issue":"10","key":"2477_CR1","doi-asserted-by":"publisher","first-page":"2730","DOI":"10.1109\/TCSVT.2017.2715227","volume":"28","author":"J Tang","year":"2017","unstructured":"Tang J, Li Z (2017) Weakly supervised multimodal hashing for scalable social image retrieval. IEEE Trans Circuits Syst Video Technol 28(10):2730\u20132741","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2477_CR2","doi-asserted-by":"crossref","unstructured":"Hidayati SC, Hsu C-C, Chang Y-T, Hua K-L, Fu J, Cheng W-H (2018) What dress fits me best? fashion recommendation on the clothing style for personal body shape. In: Proceedings of the 26th ACM international conference on multimedia, pp 438\u2013446","DOI":"10.1145\/3240508.3240546"},{"key":"2477_CR3","doi-asserted-by":"publisher","first-page":"986","DOI":"10.1109\/TIP.2020.3038365","volume":"30","author":"M Meng","year":"2020","unstructured":"Meng M, Wang H, Yu J, Chen H, Wu J (2020) Asymmetric supervised consistent and specific hashing for cross-modal retrieval. IEEE Trans Image Process 30:986\u20131000","journal-title":"IEEE Trans Image Process"},{"issue":"4","key":"2477_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3447239","volume":"54","author":"W-H Cheng","year":"2021","unstructured":"Cheng W-H, Song S, Chen C-Y, Hidayati SC, Liu J (2021) Fashion meets computer vision: a survey. ACM Comput Surv (CSUR) 54(4):1\u201341","journal-title":"ACM Comput Surv (CSUR)"},{"issue":"3","key":"2477_CR5","doi-asserted-by":"publisher","first-page":"1857","DOI":"10.1109\/TAFFC.2022.3143100","volume":"14","author":"H-X Xie","year":"2022","unstructured":"Xie H-X, Lo L, Shuai H-H, Cheng W-H (2022) An overview of facial micro-expression analysis: data, methodology and challenge. IEEE Trans Affect Comput 14(3):1857\u20131875","journal-title":"IEEE Trans Affect Comput"},{"key":"2477_CR6","doi-asserted-by":"crossref","unstructured":"Jiang-Lin J-Y, Huang K-Y, Lo L, Huang Y-N, Lin T, Wu J-C, Shuai H-H, Cheng W-H (2024) Record: Reasoning and correcting diffusion for hoi generation. arXiv:2407.17911","DOI":"10.1145\/3664647.3680936"},{"key":"2477_CR7","doi-asserted-by":"crossref","unstructured":"Zhang B, Xie H, Wang Y, Xu J, Zhang Y (2023) Linguistic more: taking a further step toward efficient and accurate scene text recognition. arXiv:2305.05140","DOI":"10.24963\/ijcai.2023\/189"},{"key":"2477_CR8","doi-asserted-by":"crossref","unstructured":"Gao Z, Wang Y, Qu Y, Zhang B, Wang Z, Xu J, Xie H (2024) Self-supervised pre-training with symmetric superimposition modeling for scene text recognition","DOI":"10.24963\/ijcai.2024\/85"},{"issue":"6","key":"2477_CR9","doi-asserted-by":"publisher","first-page":"1899","DOI":"10.1007\/s11263-023-01917-4","volume":"132","author":"X Wang","year":"2024","unstructured":"Wang X, Zhang S, Cen J, Gao C, Zhang Y, Zhao D, Sang N (2024) Clip-guided prototype modulating for few-shot action recognition. Int J Comput Vis 132(6):1899\u20131912","journal-title":"Int J Comput Vis"},{"key":"2477_CR10","doi-asserted-by":"crossref","unstructured":"Zhang B, Xie H, Gao Z, Wang Y (2024) Choose what you need: disentangled representation learning for scene text recognition removal and editing. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 28358\u201328368","DOI":"10.1109\/CVPR52733.2024.02679"},{"key":"2477_CR11","doi-asserted-by":"crossref","unstructured":"Liu Z, Li J, Xie H, Li P, Ge J, Liu S-A, Jin G (2024) Towards balanced alignment: modal-enhanced semantic modeling for video moment retrieval. In: Proceedings of the AAAI conference on artificial intelligence, vol 38, pp 3855\u20133863","DOI":"10.1609\/aaai.v38i4.28177"},{"key":"2477_CR12","unstructured":"Li P, Xie C-W, Xie H, Zhao L, Zhang L, Zheng Y, Zhao D, Zhang Y (2024) Momentdiff: generative video moment retrieval from random to real. Advances in neural information processing systems, vol 36"},{"issue":"7","key":"2477_CR13","doi-asserted-by":"publisher","first-page":"3157","DOI":"10.1109\/TIP.2016.2564638","volume":"25","author":"J Tang","year":"2016","unstructured":"Tang J, Wang K, Shao L (2016) Supervised matrix factorization hashing for cross-modal retrieval. IEEE Trans Image Process 25(7):3157\u20133166","journal-title":"IEEE Trans Image Process"},{"key":"2477_CR14","doi-asserted-by":"publisher","first-page":"1344","DOI":"10.1109\/TIP.2019.2941858","volume":"29","author":"J Guo","year":"2019","unstructured":"Guo J, Zhu W (2019) Collective affinity learning for partial cross-modal hashing. IEEE Trans Image Process 29:1344\u20131355","journal-title":"IEEE Trans Image Process"},{"key":"2477_CR15","doi-asserted-by":"crossref","unstructured":"Chang C-H, Hu M-C, Cheng W-H, Chuang Y-Y (2013) Rectangling stereographic projection for wide-angle image visualization. In: Proceedings of the IEEE international conference on computer vision, pp 2824\u20132831","DOI":"10.1109\/ICCV.2013.351"},{"key":"2477_CR16","doi-asserted-by":"crossref","unstructured":"Yang X, Tang K, Zhang H, Cai J (2019) Auto-encoding scene graphs for image captioning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10685\u201310694","DOI":"10.1109\/CVPR.2019.01094"},{"issue":"9","key":"2477_CR17","doi-asserted-by":"publisher","first-page":"2251","DOI":"10.1109\/TPAMI.2018.2857768","volume":"41","author":"Y Xian","year":"2018","unstructured":"Xian Y, Lampert CH, Schiele B, Akata Z (2018) Zero-shot learning-a comprehensive evaluation of the good, the bad and the ugly. IEEE Trans Pattern Anal Mach Intell 41(9):2251\u20132265","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"2477_CR18","doi-asserted-by":"publisher","first-page":"8892","DOI":"10.1109\/TIP.2020.3020383","volume":"29","author":"C Deng","year":"2020","unstructured":"Deng C, Xu X, Wang H, Yang M, Tao D (2020) Progressive cross-modal semantic network for zero-shot sketch-based image retrieval. IEEE Trans Image Process 29:8892\u20138902","journal-title":"IEEE Trans Image Process"},{"key":"2477_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110110","volume":"147","author":"X Wang","year":"2024","unstructured":"Wang X, Zhang S, Qing Z, Zuo Z, Gao C, Jin R, Sang N (2024) Hyrsm++: hybrid relation guided temporal set matching for few-shot action recognition. Pattern Recogn 147:110110","journal-title":"Pattern Recogn"},{"key":"2477_CR20","unstructured":"Zhang B, Gao Z, Qu Y, Xie H (2024) How control information influences multilingual text image generation and editing? arXiv:2407.11502"},{"key":"2477_CR21","doi-asserted-by":"crossref","unstructured":"Mithun NC, Li J, Metze F, Roy-Chowdhury AK (2018) Learning joint embedding with multimodal cues for cross-modal video-text retrieval. In: Proceedings of the 2018 ACM on international conference on multimedia retrieval, pp 19\u201327","DOI":"10.1145\/3206025.3206064"},{"key":"2477_CR22","doi-asserted-by":"publisher","first-page":"3626","DOI":"10.1109\/TIP.2020.2963957","volume":"29","author":"D Xie","year":"2020","unstructured":"Xie D, Deng C, Li C, Liu X, Tao D (2020) Multi-task consistency-preserving adversarial hashing for cross-modal retrieval. IEEE Trans Image Process 29:3626\u20133637","journal-title":"IEEE Trans Image Process"},{"key":"2477_CR23","doi-asserted-by":"crossref","unstructured":"Li P, Xie C-W, Zhao L, Xie H, Ge J, Zheng Y, Zhao D, Zhang Y (2023) Progressive spatio-temporal prototype matching for text-video retrieval. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 4100\u20134110","DOI":"10.1109\/ICCV51070.2023.00379"},{"issue":"8","key":"2477_CR24","doi-asserted-by":"publisher","first-page":"2265","DOI":"10.1007\/s11263-020-01331-0","volume":"128","author":"Z Li","year":"2020","unstructured":"Li Z, Tang J, Zhang L, Yang J (2020) Weakly-supervised semantic guided hashing for social image retrieval. Int J Comput Vis 128(8):2265\u20132278","journal-title":"Int J Comput Vis"},{"issue":"8","key":"2477_CR25","doi-asserted-by":"publisher","first-page":"3893","DOI":"10.1109\/TIP.2018.2821921","volume":"27","author":"C Deng","year":"2018","unstructured":"Deng C, Chen Z, Liu X, Gao X, Tao D (2018) Triplet-based deep hashing network for cross-modal retrieval. IEEE Trans Image Process 27(8):3893\u20133903","journal-title":"IEEE Trans Image Process"},{"issue":"6","key":"2477_CR26","doi-asserted-by":"publisher","first-page":"2189","DOI":"10.1109\/TNNLS.2019.2929068","volume":"31","author":"C Deng","year":"2019","unstructured":"Deng C, Yang E, Liu T, Tao D (2019) Two-stream deep hashing with class-specific centers for supervised image search. IEEE Trans Neural Netw Learn Syst 31(6):2189\u20132201","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2477_CR27","doi-asserted-by":"crossref","unstructured":"Cao Y, Liu B, Long M, Wang J (2018) Cross-modal hamming hashing. In: Proceedings of the European conference on computer vision (ECCV), pp 202\u2013218","DOI":"10.1007\/978-3-030-01246-5_13"},{"issue":"4","key":"2477_CR28","doi-asserted-by":"publisher","first-page":"1838","DOI":"10.1109\/TNNLS.2020.2997020","volume":"34","author":"L Jin","year":"2020","unstructured":"Jin L, Li Z, Tang J (2020) Deep semantic multimodal hashing network for scalable image-text and video-text retrievals. IEEE Trans Neural Netw Learn Syst 34(4):1838\u20131851","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2477_CR29","doi-asserted-by":"crossref","unstructured":"Schroff F, Kalenichenko D, Philbin J (2015) Facenet: A unified embedding for face recognition and clustering. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 815\u2013823","DOI":"10.1109\/CVPR.2015.7298682"},{"issue":"3","key":"2477_CR30","doi-asserted-by":"publisher","first-page":"1405","DOI":"10.1109\/TIP.2017.2776745","volume":"27","author":"L Jin","year":"2017","unstructured":"Jin L, Li K, Hu H, Qi G-J, Tang J (2017) Semantic neighbor graph hashing for multimodal retrieval. IEEE Trans Image Process 27(3):1405\u20131417","journal-title":"IEEE Trans Image Process"},{"key":"2477_CR31","doi-asserted-by":"crossref","unstructured":"Cao Y, Long M, Wang J, Liu S (2017) Collective deep quantization for efficient cross-modal retrieval. In: Thirty-first AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v31i1.11218"},{"issue":"3","key":"2477_CR32","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1109\/TPAMI.2018.2882816","volume":"42","author":"VE Liong","year":"2018","unstructured":"Liong VE, Lu J, Duan L-Y, Tan Y-P (2018) Deep variational and structural hashing. IEEE Trans Pattern Anal Mach Intell 42(3):580\u2013595","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"5","key":"2477_CR33","doi-asserted-by":"publisher","first-page":"2494","DOI":"10.1109\/TIP.2017.2676345","volume":"26","author":"X Xu","year":"2017","unstructured":"Xu X, Shen F, Yang Y, Shen HT, Li X (2017) Learning discriminative binary codes for large-scale cross-modal retrieval. IEEE Trans Image Process 26(5):2494\u20132507","journal-title":"IEEE Trans Image Process"},{"key":"2477_CR34","doi-asserted-by":"crossref","unstructured":"Hsieh C-W, Chen C-Y, Chou C-L, Shuai H-H, Liu J, Cheng W-H (2019) Fashionon: semantic-guided image-based virtual try-on with detailed human and clothing information. In: Proceedings of the 27th ACM international conference on multimedia, pp 275\u2013283","DOI":"10.1145\/3343031.3351075"},{"issue":"1","key":"2477_CR35","doi-asserted-by":"publisher","first-page":"174","DOI":"10.1109\/TMM.2019.2922128","volume":"22","author":"J Zhang","year":"2019","unstructured":"Zhang J, Peng Y (2019) Multi-pathway generative adversarial hashing for unsupervised cross-modal retrieval. IEEE Trans Multimed 22(1):174\u2013187","journal-title":"IEEE Trans Multimed"},{"key":"2477_CR36","doi-asserted-by":"crossref","unstructured":"Zhang J, Peng Y, Yuan M (2018) Unsupervised generative adversarial cross-modal hashing. In: Proceedings of the AAAI conference on artificial intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.11263"},{"issue":"11","key":"2477_CR37","doi-asserted-by":"publisher","first-page":"6306","DOI":"10.1109\/TNNLS.2021.3076684","volume":"33","author":"X Liu","year":"2021","unstructured":"Liu X, Wang X, Cheung Y-m (2021) Fddh: fast discriminative discrete hashing for large-scale cross-modal retrieval. IEEE Trans Neural Netw Learn Syst 33(11):6306\u20136320","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2477_CR38","doi-asserted-by":"publisher","first-page":"8946","DOI":"10.1109\/TMM.2023.3243608","volume":"25","author":"R-C Tu","year":"2023","unstructured":"Tu R-C, Mao X-L, Lin Q, Ji W, Qin W, Wei W, Huang H (2023) Unsupervised cross-modal hashing via semantic text mining. IEEE Trans Multimed 25:8946\u20138957","journal-title":"IEEE Trans Multimed"},{"key":"2477_CR39","doi-asserted-by":"crossref","unstructured":"Hu H, Xie L, Hong R, Tian Q (2020) Creating something from nothing: unsupervised knowledge distillation for cross-modal hashing. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 3123\u20133132","DOI":"10.1109\/CVPR42600.2020.00319"},{"issue":"4","key":"2477_CR40","doi-asserted-by":"publisher","first-page":"973","DOI":"10.1109\/TMM.2018.2866771","volume":"21","author":"D Hu","year":"2018","unstructured":"Hu D, Nie F, Li X (2018) Deep binary reconstruction for cross-modal hashing. IEEE Trans Multimed 21(4):973\u2013985","journal-title":"IEEE Trans Multimed"},{"issue":"10","key":"2477_CR41","doi-asserted-by":"publisher","first-page":"2703","DOI":"10.1109\/TCSVT.2017.2723302","volume":"28","author":"D Wang","year":"2017","unstructured":"Wang D, Wang Q, Gao X (2017) Robust and flexible discrete hashing for cross-modal similarity search. IEEE Trans Circuits Syst Video Technol 28(10):2703\u20132715","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"2477_CR42","unstructured":"Kumar S, Udupa R (2011) Learning hash functions for cross-view similarity search. In: Twenty-second international joint conference on artificial intelligence"},{"key":"2477_CR43","doi-asserted-by":"crossref","unstructured":"Song J, Yang Y, Yang Y, Huang Z, Shen HT (2013) Inter-media hashing for large-scale retrieval from heterogeneous data sources. In: Proceedings of the 2013 ACM SIGMOD international conference on management of data, pp 785\u2013796","DOI":"10.1145\/2463676.2465274"},{"key":"2477_CR44","unstructured":"Weiss Y, Torralba A, Fergus R (2009) Spectral hashing. In: Advances in neural information processing systems, pp 1753\u20131760"},{"key":"2477_CR45","doi-asserted-by":"crossref","unstructured":"Su S, Zhong Z, Zhang C (2019) Deep joint-semantics reconstructing hashing for large-scale unsupervised cross-modal retrieval. In: Proceedings of the IEEE international conference on computer vision, pp 3027\u20133035","DOI":"10.1109\/ICCV.2019.00312"},{"key":"2477_CR46","doi-asserted-by":"crossref","unstructured":"Zhu X, Huang Z, Shen HT, Zhao X (2013) Linear cross-modal hashing for efficient multimedia search. In: Proceedings of the 21st ACM international conference on multimedia, pp 143\u2013152","DOI":"10.1145\/2502081.2502107"},{"key":"2477_CR47","doi-asserted-by":"crossref","unstructured":"Ding G, Guo Y, Zhou J (2014) Collective matrix factorization hashing for multimodal data. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2075\u20132082","DOI":"10.1109\/CVPR.2014.267"},{"key":"2477_CR48","doi-asserted-by":"crossref","unstructured":"Zhou J, Ding G, Guo Y (2014) Latent semantic sparse hashing for cross-modal similarity search. In: Proceedings of the 37th international ACM SIGIR conference on research & development in information retrieval, pp 415\u2013424","DOI":"10.1145\/2600428.2609610"},{"key":"2477_CR49","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. In: Advances in neural information processing systems, pp 1097\u20131105"},{"key":"2477_CR50","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"2477_CR51","unstructured":"Srivastava N, Salakhutdinov RR (2012) Multimodal learning with deep boltzmann machines. In: Advances in neural information processing systems, pp 2222\u20132230"},{"key":"2477_CR52","doi-asserted-by":"crossref","unstructured":"Wu G, Lin Z, Han J, Liu L, Ding G, Zhang B, Shen J (2018) Unsupervised deep hashing via binary latent factor models for large-scale cross-modal retrieval. In: IJCAI, pp 2854\u20132860","DOI":"10.24963\/ijcai.2018\/396"},{"key":"2477_CR53","doi-asserted-by":"crossref","unstructured":"Yu J, Zhou H, Zhan Y, Tao D (2021) Deep graph-neighbor coherence preserving network for unsupervised cross-modal hashing. In: Proceedings of the AAAI conference on artificial intelligence. AAAI, pp 4626\u20134634","DOI":"10.1609\/aaai.v35i5.16592"},{"key":"2477_CR54","doi-asserted-by":"crossref","unstructured":"Li C, Deng C, Wang L, Xie D, Liu X (2019) Coupled cyclegan: unsupervised hashing network for cross-modal retrieval. In: Proceedings of the AAAI conference on artificial intelligence, vol 33, pp 176\u2013183","DOI":"10.1609\/aaai.v33i01.3301176"},{"key":"2477_CR55","doi-asserted-by":"crossref","unstructured":"Zhang P-F, Duan J, Huang Z, Yin H (2021) Joint-teaching: learning to refine knowledge for resource-constrained unsupervised cross-modal retrieval. In: Proceedings of the 29th ACM international conference on multimedia, pp 1517\u20131525","DOI":"10.1145\/3474085.3475286"},{"issue":"9","key":"2477_CR56","doi-asserted-by":"publisher","first-page":"6289","DOI":"10.1109\/TNNLS.2021.3135420","volume":"34","author":"T Hoang","year":"2022","unstructured":"Hoang T, Do T-T, Nguyen TV, Cheung N-M (2022) Multimodal mutual information maximization: a novel approach for unsupervised deep cross-modal hashing. IEEE Trans Neural Netw Learn Syst 34(9):6289\u20136302","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"2477_CR57","doi-asserted-by":"crossref","unstructured":"Zhuo Y, Li Y, Hsiao J, Ho C, Li B (2022) Clip4hashing: unsupervised deep hashing for cross-modal video-text retrieval. In: Proceedings of the 2022 international conference on multimedia retrieval, pp 158\u2013166","DOI":"10.1145\/3512527.3531381"},{"key":"2477_CR58","doi-asserted-by":"crossref","unstructured":"Xu M, Lai H, Yin J (2023) Clip-hash: a lightweight hashing network for cross-modal retrieval. In: 2023 26th international conference on computer supported cooperative work in design (CSCWD). IEEE, pp 1086\u20131091","DOI":"10.1109\/CSCWD57460.2023.10152621"},{"issue":"9","key":"2477_CR59","doi-asserted-by":"publisher","first-page":"5296","DOI":"10.1109\/TCSVT.2023.3251395","volume":"33","author":"R-C Tu","year":"2023","unstructured":"Tu R-C, Jiang J, Lin Q, Cai C, Tian S, Wang H, Liu W (2023) Unsupervised cross-modal hashing with modality-interaction. IEEE Trans Circuits Syst Video Technol 33(9):5296\u20135308","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"issue":"5786","key":"2477_CR60","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1126\/science.1127647","volume":"313","author":"GE Hinton","year":"2006","unstructured":"Hinton GE, Salakhutdinov RR (2006) Reducing the dimensionality of data with neural networks. Science 313(5786):504\u2013507","journal-title":"Science"},{"key":"2477_CR61","unstructured":"Kingma DP, Welling M (2013) Auto-encoding variational bayes. arXiv:1312.6114"},{"key":"2477_CR62","unstructured":"Rezende DJ, Mohamed S, Wierstra D (2014) Stochastic backpropagation and approximate inference in deep generative models. In: International conference on machine learning. PMLR, pp 1278\u20131286"},{"key":"2477_CR63","unstructured":"Sohn K, Lee H, Yan X (2015) Learning structured output representation using deep conditional generative models. In: Advances in neural information processing systems, pp 3483\u20133491"},{"key":"2477_CR64","unstructured":"Reed S, Akata Z, Yan X, Logeswaran L, Schiele B, Lee H (2016) Generative adversarial text to image synthesis. In: International conference on machine learning. PMLR, pp 1060\u20131069"},{"key":"2477_CR65","doi-asserted-by":"crossref","unstructured":"Bao J, Chen D, Wen F, Li H, Hua G (2017) Cvae-gan: fine-grained image generation through asymmetric training. In: Proceedings of the IEEE international conference on computer vision, pp 2745\u20132754","DOI":"10.1109\/ICCV.2017.299"},{"key":"2477_CR66","doi-asserted-by":"crossref","unstructured":"Verma VK, Arora G, Mishra A, Rai P (2018) Generalized zero-shot learning via synthesized examples. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4281\u20134289","DOI":"10.1109\/CVPR.2018.00450"},{"key":"2477_CR67","doi-asserted-by":"crossref","unstructured":"Lin C-C, Hung Y, Feris R, He L (2020) Video instance segmentation tracking with a modified vae architecture. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13147\u201313157","DOI":"10.1109\/CVPR42600.2020.01316"},{"key":"2477_CR68","doi-asserted-by":"crossref","unstructured":"Zhao T, Zhao R, Eskenazi M (2017) Learning discourse-level diversity for neural dialog models using conditional variational autoencoders. arXiv:1703.10960","DOI":"10.18653\/v1\/P17-1061"},{"key":"2477_CR69","doi-asserted-by":"crossref","unstructured":"Jing M, Li J, Zhu L, Lu K, Yang Y, Huang Z (2020) Incomplete cross-modal retrieval with dual-aligned variational autoencoders. In: Proceedings of the 28th ACM international conference on multimedia, pp 3283\u20133291","DOI":"10.1145\/3394171.3413676"},{"key":"2477_CR70","unstructured":"He D, Xia Y, Qin T, Wang L, Yu N, Liu T-Y, Ma W-Y (2016) Dual learning for machine translation. In: Advances in neural information processing systems, pp 820\u2013828"},{"key":"2477_CR71","doi-asserted-by":"crossref","unstructured":"Yi Z, Zhang H, Tan P, Gong M (2017) Dualgan: unsupervised dual learning for image-to-image translation. In: Proceedings of the IEEE international conference on computer vision, pp 2849\u20132857","DOI":"10.1109\/ICCV.2017.310"},{"key":"2477_CR72","doi-asserted-by":"crossref","unstructured":"Luo P, Wang G, Lin L, Wang X (2017) Deep dual learning for semantic image segmentation. In: Proceedings of the IEEE international conference on computer vision, pp 2718\u20132726","DOI":"10.1109\/ICCV.2017.296"},{"key":"2477_CR73","doi-asserted-by":"crossref","unstructured":"Chen Z-D, Yu W-J, Li C-X, Nie L, Xu X-S (2018) Dual deep neural networks cross-modal hashing. In: Proceedings of the AAAI conference on artificial intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.11249"},{"key":"2477_CR74","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Advances in neural information processing systems, pp 2672\u20132680"},{"key":"2477_CR75","doi-asserted-by":"crossref","unstructured":"Cao Z, Long M, Wang J, Yu PS (2017) Hashnet: deep learning to hash by continuation. In: Proceedings of the IEEE international conference on computer vision, pp 5608\u20135617","DOI":"10.1109\/ICCV.2017.598"},{"key":"2477_CR76","unstructured":"Oord A, Vinyals O et\u00a0al (2017) Neural discrete representation learning. In: Advances in neural information processing systems, pp 6306\u20136315"},{"key":"2477_CR77","doi-asserted-by":"crossref","unstructured":"Tu R-C, Mao X, Wei W (2020) Mls3rduh: deep unsupervised hashing via manifold based local semantic similarity structure reconstructing. In: IJCAI, pp 3466\u20133472","DOI":"10.24963\/ijcai.2020\/479"},{"key":"2477_CR78","doi-asserted-by":"crossref","unstructured":"Yang E, Deng C, Liu T, Liu W, Tao D (2018) Semantic structure-based unsupervised deep hashing. In: Proceedings of the 27th international joint conference on artificial intelligence, pp 1064\u20131070","DOI":"10.24963\/ijcai.2018\/148"},{"key":"2477_CR79","doi-asserted-by":"crossref","unstructured":"Li S, Li X, Lu J, Zhou J (2021) Self-supervised video hashing via bidirectional transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 13549\u201313558","DOI":"10.1109\/CVPR46437.2021.01334"},{"key":"2477_CR80","unstructured":"Li W-J, Wang S, Kang W-C (2015) Feature learning based deep supervised hashing with pairwise labels. arXiv:1511.03855"},{"key":"2477_CR81","doi-asserted-by":"crossref","unstructured":"Rasiwasia N, Costa\u00a0Pereira J, Coviello E, Doyle G, Lanckriet GR, Levy R, Vasconcelos N (2010) A new approach to cross-modal multimedia retrieval. In: Proceedings of the 18th ACM international conference on multimedia, pp 251\u2013260","DOI":"10.1145\/1873951.1873987"},{"key":"2477_CR82","doi-asserted-by":"crossref","unstructured":"Chua T-S, Tang J, Hong R, Li H, Luo Z, Zheng Y (2009) Nus-wide: a real-world web image database from national university of Singapore. In: Proceedings of the ACM international conference on image and video retrieval, pp 1\u20139","DOI":"10.1145\/1646396.1646452"},{"key":"2477_CR83","doi-asserted-by":"crossref","unstructured":"Huiskes MJ, Lew MS (2008) The mir flickr retrieval evaluation. In: Proceedings of the 1st ACM international conference on multimedia information retrieval, pp 39\u201343","DOI":"10.1145\/1460096.1460104"},{"key":"2477_CR84","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, et al (2019) Pytorch: an imperative style, high-performance deep learning library. Advances in neural information processing systems, vol 32"},{"key":"2477_CR85","doi-asserted-by":"crossref","unstructured":"Chatfield K, Simonyan K, Vedaldi A, Zisserman A (2014) Return of the devil in the details: delving deep into convolutional nets. arXiv:1405.3531","DOI":"10.5244\/C.28.6"},{"issue":"1\u20133","key":"2477_CR86","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1016\/0169-7439(87)80084-9","volume":"2","author":"S Wold","year":"1987","unstructured":"Wold S, Esbensen K, Geladi P (1987) Principal component analysis. Chemom Intell Lab Syst 2(1\u20133):37\u201352","journal-title":"Chemom Intell Lab Syst"},{"key":"2477_CR87","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: Bengio Y, LeCun Y (eds) 3rd international conference on learning representations, ICLR 2015, San Diego, CA, USA, May 7\u20139, 2015, Conference track proceedings"},{"key":"2477_CR88","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: 2016 IEEE conference on computer vision and pattern recognition, CVPR 2016, Las Vegas, NV, USA, June 27\u201330, 2016, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"issue":"11","key":"2477_CR89","first-page":"2579","volume":"9","author":"L Maaten","year":"2008","unstructured":"Maaten L, Hinton G (2008) Visualizing data using t-sne. J Mach Learn Res 9(11):2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"2477_CR90","unstructured":"Chen T, Kornblith S, Norouzi M, Hinton G (2020) A simple framework for contrastive learning of visual representations. In: International conference on machine learning. PMLR, pp 1597\u20131607"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02477-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02477-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02477-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T04:31:09Z","timestamp":1749270669000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02477-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,28]]},"references-count":90,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["2477"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02477-w","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"type":"print","value":"1868-8071"},{"type":"electronic","value":"1868-808X"}],"subject":[],"published":{"date-parts":[[2024,11,28]]},"assertion":[{"value":"26 August 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}