{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T08:47:00Z","timestamp":1776761220337,"version":"3.51.2"},"reference-count":67,"publisher":"Elsevier BV","issue":"7","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Processing &amp; Management"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.ipm.2026.104813","type":"journal-article","created":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T03:48:51Z","timestamp":1776743331000},"page":"104813","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["VTFAformer: A visual\u2013tactile fusion network for object attribute recognition"],"prefix":"10.1016","volume":"63","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7051-2384","authenticated-orcid":false,"given":"Zizhen","family":"Yi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7175-1221","authenticated-orcid":false,"given":"Guochu","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Haiyan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Liang","family":"Li","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7952-6691","authenticated-orcid":false,"given":"Baojiang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Feng","family":"Qi","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.ipm.2026.104813_b1","doi-asserted-by":"crossref","DOI":"10.3389\/fnbot.2024.1427786","article-title":"Multi-modal remote perception learning for object sensory data","volume":"18","author":"Almujally","year":"2024","journal-title":"Frontiers in Neurorobotics"},{"key":"10.1016\/j.ipm.2026.104813_b2","series-title":"Efficientvit: Multi-scale linear attention for high-resolution dense prediction","author":"Cai","year":"2022"},{"key":"10.1016\/j.ipm.2026.104813_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.jvcir.2024.104210","article-title":"Transferable adversarial attack on image tampering localization","volume":"102","author":"Cao","year":"2024","journal-title":"Journal of Visual Communication and Image Representation"},{"key":"10.1016\/j.ipm.2026.104813_b4","doi-asserted-by":"crossref","DOI":"10.1016\/j.asoc.2025.113478","article-title":"Advances in EEG-based emotion recognition: Challenges, methodologies, and future directions","volume":"180","author":"Chen","year":"2025","journal-title":"Applied Soft Computing"},{"key":"10.1016\/j.ipm.2026.104813_b5","article-title":"Driver fatigue detection using EEG-based graph attention convolutional neural networks: An end-to-end learning approach with mutual information-driven connectivity","author":"Chen","year":"2025","journal-title":"Applied Soft Computing"},{"key":"10.1016\/j.ipm.2026.104813_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.chaos.2025.116751","article-title":"Decoding driving states based on normalized mutual information features and hyperparameter self-optimized Gaussian kernel-based radial basis function extreme learning machine","volume":"199","author":"Chen","year":"2025","journal-title":"Chaos, Solitons & Fractals"},{"key":"10.1016\/j.ipm.2026.104813_b7","doi-asserted-by":"crossref","unstructured":"Chen, K., Jiang, X., Hu, Y., Tang, X., Gao, Y., Chen, J., & Xie, W. (2023). Ovarnet: Towards open-vocabulary object attribute recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 23518\u201323527).","DOI":"10.1109\/CVPR52729.2023.02252"},{"key":"10.1016\/j.ipm.2026.104813_b8","series-title":"Visuo-tactile transformers for manipulation","author":"Chen","year":"2022"},{"issue":"10","key":"10.1016\/j.ipm.2026.104813_b9","doi-asserted-by":"crossref","first-page":"6994","DOI":"10.1109\/TCSVT.2022.3178144","article-title":"A simple visual-textual baseline for pedestrian attribute recognition","volume":"32","author":"Cheng","year":"2022","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.ipm.2026.104813_b10","series-title":"SToLa: Self-adaptive touch-language framework with tactile commonsense reasoning in open-ended scenarios","author":"Cheng","year":"2025"},{"key":"10.1016\/j.ipm.2026.104813_b11","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.103305","article-title":"Touch100k: A large-scale touch-language-vision dataset for touch-centric multimodal representation","volume":"124","author":"Cheng","year":"2025","journal-title":"Information Fusion"},{"key":"10.1016\/j.ipm.2026.104813_b12","series-title":"Gaussian process-based active exploration strategies in vision and touch","author":"Choi","year":"2025"},{"key":"10.1016\/j.ipm.2026.104813_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2023.126763","article-title":"Object and attribute recognition for product image with self-supervised learning","volume":"558","author":"Dai","year":"2023","journal-title":"Neurocomputing"},{"issue":"4","key":"10.1016\/j.ipm.2026.104813_b14","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2025.104127","article-title":"Neurosymbolic graph enrichment for grounded world models","volume":"62","author":"De Giorgis","year":"2025","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.ipm.2026.104813_b15","doi-asserted-by":"crossref","unstructured":"Devlin, J., Chang, M.-W., Lee, K., & Toutanova, K. (2019). Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers) (pp. 4171\u20134186).","DOI":"10.18653\/v1\/N19-1423"},{"key":"10.1016\/j.ipm.2026.104813_b16","doi-asserted-by":"crossref","unstructured":"Ding, X., Zhang, Y., Ge, Y., Zhao, S., Song, L., Yue, X., & Shan, Y. (2024). Unireplknet: A universal perception large-kernel convnet for audio video point cloud time-series and image recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 5513\u20135524).","DOI":"10.1109\/CVPR52733.2024.00527"},{"key":"10.1016\/j.ipm.2026.104813_b17","doi-asserted-by":"crossref","unstructured":"Feng, C., Chen, Z., & Owens, A. (2023). Self-supervised video forensics by audio-visual anomaly detection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10491\u201310503).","DOI":"10.1109\/CVPR52729.2023.01011"},{"key":"10.1016\/j.ipm.2026.104813_b18","series-title":"Anytouch: Learning unified static-dynamic representation across multiple visuo-tactile sensors","author":"Feng","year":"2025"},{"key":"10.1016\/j.ipm.2026.104813_b19","series-title":"A touch, vision, and language dataset for multimodal alignment","author":"Fu","year":"2024"},{"key":"10.1016\/j.ipm.2026.104813_b20","series-title":"Objectfolder: A dataset of objects with implicit visual, auditory, and tactile representations","author":"Gao","year":"2021"},{"key":"10.1016\/j.ipm.2026.104813_b21","doi-asserted-by":"crossref","unstructured":"Gao, R., Dou, Y., Li, H., Agarwal, T., Bohg, J., Li, Y., Fei-Fei, L., & Wu, J. (2023). The objectfolder benchmark: Multisensory learning with neural and real objects. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 17276\u201317286).","DOI":"10.1109\/CVPR52729.2023.01657"},{"key":"10.1016\/j.ipm.2026.104813_b22","doi-asserted-by":"crossref","unstructured":"Gao, R., Si, Z., Chang, Y.-Y., Clarke, S., Bohg, J., Fei-Fei, L., Yuan, W., & Wu, J. (2022). Objectfolder 2.0: A multisensory object dataset for sim2real transfer. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10598\u201310608).","DOI":"10.1109\/CVPR52688.2022.01034"},{"issue":"1","key":"10.1016\/j.ipm.2026.104813_b23","doi-asserted-by":"crossref","first-page":"554","DOI":"10.1109\/TMECH.2024.3400789","article-title":"Learning generalizable vision-tactile robotic grasping strategy for deformable objects via transformer","volume":"30","author":"Han","year":"2024","journal-title":"IEEE\/ASME Transactions on Mechatronics"},{"key":"10.1016\/j.ipm.2026.104813_b24","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.ipm.2026.104813_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.128645","article-title":"A comprehensive survey on contrastive learning","volume":"610","author":"Hu","year":"2024","journal-title":"Neurocomputing"},{"key":"10.1016\/j.ipm.2026.104813_b26","series-title":"3D-vitac: Learning fine-grained manipulation with visuo-tactile sensing","author":"Huang","year":"2024"},{"issue":"3","key":"10.1016\/j.ipm.2026.104813_b27","doi-asserted-by":"crossref","first-page":"3838","DOI":"10.1109\/LRA.2020.2977257","article-title":"Digit: A novel design for a low-cost compact high-resolution tactile sensor with application to in-hand manipulation","volume":"5","author":"Lambeta","year":"2020","journal-title":"IEEE Robotics and Automation Letters"},{"key":"10.1016\/j.ipm.2026.104813_b28","first-page":"1","article-title":"VITO-transformer: A visual-tactile fusion network for object recognition","volume":"72","author":"Li","year":"2023","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"10.1016\/j.ipm.2026.104813_b29","series-title":"2018 IEEE international conference on robotics and automation","first-page":"7772","article-title":"Slip detection with combined tactile and visual information","author":"Li","year":"2018"},{"key":"10.1016\/j.ipm.2026.104813_b30","series-title":"Rediscovering bce loss for uniform classification","author":"Li","year":"2024"},{"key":"10.1016\/j.ipm.2026.104813_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.102943","article-title":"TVT-Transformer: A Tactile-visual-textual fusion network for object recognition","volume":"118","author":"Li","year":"2025","journal-title":"Information Fusion"},{"key":"10.1016\/j.ipm.2026.104813_b32","first-page":"9694","article-title":"Align before fuse: Vision and language representation learning with momentum distillation","volume":"34","author":"Li","year":"2021","journal-title":"Advances in Neural Information Processing Systems"},{"key":"10.1016\/j.ipm.2026.104813_b33","doi-asserted-by":"crossref","unstructured":"Liu, Z., Hu, H., Lin, Y., Yao, Z., Xie, Z., Wei, Y., Ning, J., Cao, Y., Zhang, Z., Dong, L., et al. (2022). Swin transformer v2: Scaling up capacity and resolution. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12009\u201312019).","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"10.1016\/j.ipm.2026.104813_b34","article-title":"A 2 tformer: addressing Temporal bias and non-stationarity in transformer-based IoT time series classification","author":"Luo","year":"2025","journal-title":"IEEE Internet of Things Journal"},{"key":"10.1016\/j.ipm.2026.104813_b35","series-title":"Cltp: Contrastive language-tactile pre-training for 3d contact geometry understanding","author":"Ma","year":"2025"},{"key":"10.1016\/j.ipm.2026.104813_b36","series-title":"International conference on machine learning","first-page":"23803","article-title":"Cross-entropy loss functions: Theoretical analysis and applications","author":"Mao","year":"2023"},{"key":"10.1016\/j.ipm.2026.104813_b37","doi-asserted-by":"crossref","DOI":"10.1016\/j.measurement.2024.115332","article-title":"Tactile sensors: A review","volume":"238","author":"Meribout","year":"2024","journal-title":"Measurement"},{"key":"10.1016\/j.ipm.2026.104813_b38","series-title":"Representation learning with contrastive predictive coding","author":"Oord","year":"2018"},{"key":"10.1016\/j.ipm.2026.104813_b39","doi-asserted-by":"crossref","unstructured":"Pham, K., Kafle, K., Lin, Z., Ding, Z., Cohen, S., Tran, Q., & Shrivastava, A. (2021). Learning to predict visual attributes in the wild. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 13018\u201313028).","DOI":"10.1109\/CVPR46437.2021.01282"},{"issue":"4","key":"10.1016\/j.ipm.2026.104813_b40","doi-asserted-by":"crossref","first-page":"6631","DOI":"10.1109\/TII.2024.3352232","article-title":"Glass makes blurs: Learning the visual blurriness for glass surface detection","volume":"20","author":"Qi","year":"2024","journal-title":"IEEE Transactions on Industrial Informatics"},{"key":"10.1016\/j.ipm.2026.104813_b41","series-title":"International conference on machine learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.ipm.2026.104813_b42","series-title":"2009 IEEE\/RSJ international conference on intelligent robots and systems","first-page":"243","article-title":"Object identification with tactile sensors using bag-of-features","author":"Schneider","year":"2009"},{"key":"10.1016\/j.ipm.2026.104813_b43","doi-asserted-by":"crossref","DOI":"10.1109\/TIM.2025.3583364","article-title":"VLCIM: A vision-language cyclic interaction model for industrial defect detection","author":"Shen","year":"2025","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"issue":"5","key":"10.1016\/j.ipm.2026.104813_b44","doi-asserted-by":"crossref","first-page":"3510","DOI":"10.1109\/TCSVT.2023.3326375","article-title":"Discriminative feature learning with co-occurrence attention network for vehicle ReID","volume":"34","author":"Sheng","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.ipm.2026.104813_b45","doi-asserted-by":"crossref","first-page":"178","DOI":"10.1109\/TNSRE.2023.3346955","article-title":"B2-ViT Net: Broad vision transformer network with broad attention for seizure prediction","volume":"32","author":"Shi","year":"2023","journal-title":"IEEE Transactions on Neural Systems and Rehabilitation Engineering"},{"issue":"2","key":"10.1016\/j.ipm.2026.104813_b46","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2024.103989","article-title":"Adaptive CLIP for open-domain 3D model retrieval","volume":"62","author":"Song","year":"2025","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.ipm.2026.104813_b47","series-title":"2019 international conference on robotics and automation","first-page":"8951","article-title":"Deep visuo-tactile learning: Estimation of tactile properties from images","author":"Takahashi","year":"2019"},{"key":"10.1016\/j.ipm.2026.104813_b48","article-title":"Can vision feel touch? Tactile-aware visual grasping for transparent objects","author":"Tong","year":"2025","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"11","key":"10.1016\/j.ipm.2026.104813_b49","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"Journal of Machine Learning Research"},{"issue":"1","key":"10.1016\/j.ipm.2026.104813_b50","doi-asserted-by":"crossref","first-page":"61","DOI":"10.1023\/B:VISI.0000046589.39864.ee","article-title":"A statistical approach to texture classification from single images","volume":"62","author":"Varma","year":"2005","journal-title":"International Journal of Computer Vision"},{"key":"10.1016\/j.ipm.2026.104813_b51","unstructured":"Vasu, P. K. A., Gabriel, J., Zhu, J., Tuzel, O., & Ranjan, A. (2023). Fastvit: A fast hybrid vision transformer using structural reparameterization. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 5785\u20135795)."},{"key":"10.1016\/j.ipm.2026.104813_b52","doi-asserted-by":"crossref","unstructured":"Wang, A., Chen, H., Lin, Z., Han, J., & Ding, G. (2024). Repvit: Revisiting mobile cnn from vit perspective. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 15909\u201315920).","DOI":"10.1109\/CVPR52733.2024.01506"},{"issue":"6","key":"10.1016\/j.ipm.2026.104813_b53","doi-asserted-by":"crossref","first-page":"1068","DOI":"10.7307\/ptt.v36i6.667","article-title":"Fusing visual quantified features for heterogeneous traffic flow prediction","volume":"36","author":"Wang","year":"2024","journal-title":"Promet-Traffic&Transportation"},{"issue":"1","key":"10.1016\/j.ipm.2026.104813_b54","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2024.103898","article-title":"Multi-granularity contrastive zero-shot learning model based on attribute decomposition","volume":"62","author":"Wang","year":"2025","journal-title":"Information Processing & Management"},{"key":"10.1016\/j.ipm.2026.104813_b55","article-title":"Nonlinear feature decomposition and deep temporal-spatial learning for single-channel sEMG-based lower limb motion recognition","author":"Wei","year":"2025","journal-title":"IEEE Sensors Journal"},{"key":"10.1016\/j.ipm.2026.104813_b56","series-title":"2022 international joint conference on neural networks","first-page":"1","article-title":"Alignment and multi-scale fusion for visual-tactile object recognition","author":"Wei","year":"2022"},{"key":"10.1016\/j.ipm.2026.104813_b57","doi-asserted-by":"crossref","unstructured":"Wu, Z., Xiong, Y., Yu, S. X., & Lin, D. (2018). Unsupervised feature learning via non-parametric instance discrimination. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 3733\u20133742).","DOI":"10.1109\/CVPR.2018.00393"},{"key":"10.1016\/j.ipm.2026.104813_b58","first-page":"1","article-title":"A vision-based tactile sensing system for multimodal contact information perception via neural network","volume":"73","author":"Xu","year":"2024","journal-title":"IEEE Transactions on Instrumentation and Measurement"},{"key":"10.1016\/j.ipm.2026.104813_b59","doi-asserted-by":"crossref","unstructured":"Yang, F., Feng, C., Chen, Z., Park, H., Wang, D., Dou, Y., Zeng, Z., Chen, X., Gangopadhyay, R., Owens, A., et al. (2024). Binding touch to everything: Learning unified multimodal tactile representations. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 26340\u201326353).","DOI":"10.1109\/CVPR52733.2024.02488"},{"key":"10.1016\/j.ipm.2026.104813_b60","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.103096","article-title":"AOGN-CZSL: An attribute-and object-guided network for compositional zero-shot learning","volume":"120","author":"Yang","year":"2025","journal-title":"Information Fusion"},{"key":"10.1016\/j.ipm.2026.104813_b61","series-title":"Touch and go: Learning from human-collected vision and touch","author":"Yang","year":"2022"},{"issue":"12","key":"10.1016\/j.ipm.2026.104813_b62","doi-asserted-by":"crossref","first-page":"2762","DOI":"10.3390\/s17122762","article-title":"Gelsight: High-resolution robot tactile sensors for estimating geometry and force","volume":"17","author":"Yuan","year":"2017","journal-title":"Sensors"},{"key":"10.1016\/j.ipm.2026.104813_b63","article-title":"Online adaptive keypoint extraction for visual odometry across different scenes","author":"Zhang","year":"2025","journal-title":"IEEE Robotics and Automation Letters"},{"issue":"1","key":"10.1016\/j.ipm.2026.104813_b64","doi-asserted-by":"crossref","first-page":"1294","DOI":"10.1109\/TIV.2023.3288810","article-title":"Target-driven visual navigation by using causal intervention","volume":"9","author":"Zhao","year":"2023","journal-title":"IEEE Transactions on Intelligent Vehicles"},{"issue":"9","key":"10.1016\/j.ipm.2026.104813_b65","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3649447","article-title":"Deep multimodal data fusion","volume":"56","author":"Zhao","year":"2024","journal-title":"ACM Computing Surveys"},{"issue":"3","key":"10.1016\/j.ipm.2026.104813_b66","doi-asserted-by":"crossref","first-page":"1192","DOI":"10.1109\/TNNLS.2020.2980892","article-title":"Lifelong visual-tactile cross-modal learning for robotic material perception","volume":"32","author":"Zheng","year":"2020","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"10.1016\/j.ipm.2026.104813_b67","series-title":"International conference on artificial neural networks","first-page":"233","article-title":"Visual-haptic-kinesthetic object recognition with multimodal transformer","author":"Zhou","year":"2023"}],"container-title":["Information Processing &amp; Management"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306457326002049?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0306457326002049?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T07:58:33Z","timestamp":1776758313000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0306457326002049"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":67,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2026,11]]}},"alternative-id":["S0306457326002049"],"URL":"https:\/\/doi.org\/10.1016\/j.ipm.2026.104813","relation":{},"ISSN":["0306-4573"],"issn-type":[{"value":"0306-4573","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"VTFAformer: A visual\u2013tactile fusion network for object attribute recognition","name":"articletitle","label":"Article Title"},{"value":"Information Processing & Management","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.ipm.2026.104813","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104813"}}