{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T19:02:26Z","timestamp":1775502146374,"version":"3.50.1"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Advanced Engineering Informatics"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.aei.2026.104610","type":"journal-article","created":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T17:18:34Z","timestamp":1774286314000},"page":"104610","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["Few-shot assembly action recognition in smart manufacturing: A cross-domain metric framework"],"prefix":"10.1016","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-0196-6184","authenticated-orcid":false,"given":"Yan","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Xinyuan","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Xinlong","family":"Qi","sequence":"additional","affiliation":[]},{"given":"Enze","family":"Cui","sequence":"additional","affiliation":[]},{"given":"Hongyong","family":"Fu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.aei.2026.104610_b0005","doi-asserted-by":"crossref","first-page":"1629","DOI":"10.1186\/s40064-016-3279-x","article-title":"Human motion segmentation and recognition using machine vision for mechanical assembly operation","volume":"5","author":"Jiang","year":"2016","journal-title":"Springerplus"},{"key":"10.1016\/j.aei.2026.104610_b0010","doi-asserted-by":"crossref","first-page":"785","DOI":"10.1109\/TII.2023.3264284","article-title":"Unsupervised human activity recognition learning for disassembly tasks","volume":"20","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Ind. Inf."},{"key":"10.1016\/j.aei.2026.104610_b0015","doi-asserted-by":"crossref","unstructured":"J. Huang, X. Liu, H. Hu, S. Tang, C. Li, S. Zhao, S. Lian, Spatial-Temporal Transformer Network for Continuous Action Recognition in Industrial Assembly, in: International Conference on Intelligent Computing, Springer Nature Singapore, Singapore, 2024, pp. 114\u2013130.","DOI":"10.1007\/978-981-97-5609-4_9"},{"key":"10.1016\/j.aei.2026.104610_b0020","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3502287","article-title":"A systematic review on data scarcity problem in deep learning: solution and applications","volume":"54","author":"Bansal","year":"2022","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.aei.2026.104610_b0025","series-title":"In: 2016 IEEE International Conference on Image Processing (ICIP)","first-page":"3693","article-title":"Fine tuning CNNs with scarce training data\u2014Adapting ImageNet to art epoch classification","author":"Hentschel","year":"2016"},{"key":"10.1016\/j.aei.2026.104610_b0030","unstructured":"W. Y. Chen, Y. C. Liu, Z. Kira, Y. C. F. Wang, J. B. Huang, A closer look at few-shot classification, arXiv preprint arXiv:1904.04232 (2019)."},{"key":"10.1016\/j.aei.2026.104610_b0035","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3659943","article-title":"Meta-learning approaches for few-shot learning: a survey of recent advances","volume":"56","author":"Gharoun","year":"2024","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.aei.2026.104610_b0040","unstructured":"C. Careaga, B. Hutchinson, N. Hodas, L. Phillips, Metric-based few-shot learning for video action recognition, arXiv preprint arXiv:1909.09602 (2019)."},{"key":"10.1016\/j.aei.2026.104610_b0200","article-title":"Prototypical networks for few-shot learning","volume":"30","author":"Snell","year":"2017","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"10.1016\/j.aei.2026.104610_b0045","doi-asserted-by":"crossref","first-page":"745","DOI":"10.1038\/s41597-022-01843-z","article-title":"The HA4M dataset: multi-modal monitoring of an assembly task for human action recognition in manufacturing","volume":"9","author":"Cicirelli","year":"2022","journal-title":"Sci. Data"},{"key":"10.1016\/j.aei.2026.104610_b0050","series-title":"In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"1569","article-title":"The meccano dataset: Understanding human-object interactions from egocentric videos in an industrial-like domain","author":"Ragusa","year":"2021"},{"key":"10.1016\/j.aei.2026.104610_b0055","unstructured":"K. Soomro, A. R. Zamir, M. Shah, UCF101: A dataset of 101 human actions classes from videos in the wild, arXiv preprint arXiv:1212.0402 (2012)."},{"key":"10.1016\/j.aei.2026.104610_b0060","series-title":"In: Proceedings of the IEEE International Conference on Computer Vision","first-page":"5842","article-title":"The \u201csomething something\u201d video database for learning and evaluating visual common sense","author":"Goyal","year":"2017"},{"key":"10.1016\/j.aei.2026.104610_b0065","series-title":"In: 2011 International Conference on Computer Vision","first-page":"2556","article-title":"HMDB: a large video database for human motion recognition","author":"Kuehne","year":"2011"},{"key":"10.1016\/j.aei.2026.104610_b0070","doi-asserted-by":"crossref","first-page":"3048","DOI":"10.1109\/TPAMI.2021.3055564","article-title":"Knowledge distillation and student-teacher learning for visual intelligence: a review and new outlooks","volume":"44","author":"Wang","year":"2021","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104610_b0075","series-title":"In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7161","article-title":"Cross-domain few-shot learning with task-specific adapters","author":"Li","year":"2022"},{"key":"10.1016\/j.aei.2026.104610_b0080","doi-asserted-by":"crossref","first-page":"1300","DOI":"10.1080\/0951192X.2020.1815852","article-title":"Generation of disassembly plans and quality assessment based on CAD data","volume":"33","author":"Belhadj","year":"2020","journal-title":"Int. J. Comput. Integr. Manuf."},{"key":"10.1016\/j.aei.2026.104610_b0085","doi-asserted-by":"crossref","first-page":"1565","DOI":"10.1007\/s40684-023-00522-7","article-title":"Disassembly process planning and its lines balancing prediction","volume":"10","author":"Aicha","year":"2023","journal-title":"Int. J. Precision Eng. Manuf.-Green Technol."},{"key":"10.1016\/j.aei.2026.104610_b0090","doi-asserted-by":"crossref","unstructured":"A. Bedeoui, R. Ben Hadj, M. Hammadi, N. Aifaoui, Tool workspace consideration for assembly plan generation, Assembly Automation41(2021) 612\u2013625.","DOI":"10.1108\/AA-05-2020-0063"},{"key":"10.1016\/j.aei.2026.104610_b0095","doi-asserted-by":"crossref","first-page":"1731","DOI":"10.1007\/s00170-016-9128-9","article-title":"Modelling and implementation of geometric and technological information for disassembly simulation in CAD environment","volume":"89","author":"Issaoui","year":"2017","journal-title":"Int. J. Adv. Manuf. Technol."},{"key":"10.1016\/j.aei.2026.104610_b0100","series-title":"In: 2017 13th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery (ICNC-FSKD)","first-page":"548","article-title":"Human activity recognition based on random forests","author":"Xu","year":"2017"},{"key":"10.1016\/j.aei.2026.104610_b0105","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"5533","article-title":"Learning spatio-temporal representation with pseudo-3D residual networks, in","author":"Qiu","year":"2017"},{"key":"10.1016\/j.aei.2026.104610_b0110","article-title":"Skeleton-based action recognition via spatial and temporal transformer networks","volume":"208","author":"Plizzari","year":"2021","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.aei.2026.104610_b0115","doi-asserted-by":"crossref","unstructured":"Y. Wang, Y. Ao, F. Ong, W. Jiang, Z. Cao, J. T. Zhou, J. Yuan, 3DV: 3D dynamic voxel for action recognition in depth video, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 511\u2013520.","DOI":"10.1109\/CVPR42600.2020.00059"},{"key":"10.1016\/j.aei.2026.104610_b0120","doi-asserted-by":"crossref","first-page":"4028","DOI":"10.3390\/s25134028","article-title":"A comprehensive methodological survey of human activity recognition across diverse data modalities","volume":"25","author":"Shin","year":"2025","journal-title":"Sensors"},{"key":"10.1016\/j.aei.2026.104610_b0125","doi-asserted-by":"crossref","first-page":"1569","DOI":"10.1109\/TIP.2014.2302677","article-title":"Evaluation of color spatio-temporal interest points for human action recognition","volume":"23","author":"Everts","year":"2014","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.aei.2026.104610_b0130","article-title":"Two-stream convolutional networks for action recognition in videos","volume":"27","author":"Simonyan","year":"2014","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"10.1016\/j.aei.2026.104610_b0135","doi-asserted-by":"crossref","first-page":"1510","DOI":"10.1109\/TMM.2017.2666540","article-title":"Sequential deep trajectory descriptor for action recognition with three-stream CNN","volume":"19","author":"Shi","year":"2017","journal-title":"IEEE Trans. Multimedia"},{"key":"10.1016\/j.aei.2026.104610_b0140","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"6299","article-title":"Quo vadis, action recognition? A new model and the kinetics dataset, in","author":"Carreira","year":"2017"},{"key":"10.1016\/j.aei.2026.104610_b0145","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7794","article-title":"Non-local neural networks, in","author":"Wang","year":"2018"},{"key":"10.1016\/j.aei.2026.104610_b0150","series-title":"In: Proceedings of the IEEE International Conference on Computer Vision","first-page":"4489","article-title":"Learning spatiotemporal features with 3D convolutional networks","author":"Tran","year":"2015"},{"key":"10.1016\/j.aei.2026.104610_b0155","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"803","article-title":"Temporal relational reasoning in videos, in","author":"Zhou","year":"2018"},{"key":"10.1016\/j.aei.2026.104610_b0160","doi-asserted-by":"crossref","first-page":"273","DOI":"10.1023\/A:1022627411411","article-title":"Support-vector networks","volume":"20","author":"Cortes","year":"1995","journal-title":"Mach. Learn."},{"key":"10.1016\/j.aei.2026.104610_b0165","doi-asserted-by":"crossref","first-page":"257","DOI":"10.1109\/5.18626","article-title":"A tutorial on hidden Markov models and selected applications in speech recognition","volume":"77","author":"Rabiner","year":"2002","journal-title":"Proc. IEEE"},{"key":"10.1016\/j.aei.2026.104610_b0170","doi-asserted-by":"crossref","first-page":"553","DOI":"10.1016\/j.jmsy.2024.08.019","article-title":"A skeleton-based assembly action recognition method with feature fusion for human-robot collaborative assembly","volume":"76","author":"Liu","year":"2024","journal-title":"J. Manuf. Syst."},{"key":"10.1016\/j.aei.2026.104610_b0175","doi-asserted-by":"crossref","first-page":"8579","DOI":"10.1109\/TIE.2021.3105977","article-title":"Toward proactive human-robot collaborative assembly: a multimodal transfer-learning-enabled action prediction approach","volume":"69","author":"Li","year":"2021","journal-title":"IEEE Trans. Ind. Electron."},{"key":"10.1016\/j.aei.2026.104610_b0180","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2022.101792","article-title":"Human-object integrated assembly intention recognition for context-aware human-robot collaborative assembly","volume":"54","author":"Zhang","year":"2022","journal-title":"Adv. Eng. Inf."},{"key":"10.1016\/j.aei.2026.104610_b0185","first-page":"193","article-title":"Recognition of assembly tasks based on the actions associated to the manipulated objects, in, IEEE\/SICE International Symposium on System Integration (SII)","volume":"2019","author":"Fukuda","year":"2019","journal-title":"IEEE"},{"key":"10.1016\/j.aei.2026.104610_b0190","series-title":"In: Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"9435","article-title":"Meta navigator: Search for a good adaptation policy for few-shot learning","author":"Zhang","year":"2021"},{"key":"10.1016\/j.aei.2026.104610_b0195","article-title":"Matching networks for one shot learning","volume":"29","author":"Vinyals","year":"2016","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"10.1016\/j.aei.2026.104610_b0205","series-title":"In: International Conference on Machine Learning","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"Finn","year":"2017"},{"key":"10.1016\/j.aei.2026.104610_b0210","series-title":"In: 2019 IEEE\/CVF International Conference on Computer Vision Workshop (ICCVW)","first-page":"1308","article-title":"Protogan: Towards few shot learning for action recognition","author":"Dwivedi","year":"2019"},{"key":"10.1016\/j.aei.2026.104610_b0215","series-title":"Proceedings of the European Conference on Computer Vision (ECCV)","first-page":"751","article-title":"Compound memory networks for few-shot video classification, in","author":"Zhu","year":"2018"},{"key":"10.1016\/j.aei.2026.104610_b0220","unstructured":"M. Bishay, G. Zoumpourlis, I. Patras, TARN: Temporal attentive relation network for few-shot and zero-shot action recognition, arXiv preprint arXiv:1907.09021 (2019)."},{"key":"10.1016\/j.aei.2026.104610_b0225","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"10618","article-title":"Few-shot video classification via temporal alignment, in","author":"Cao","year":"2020"},{"key":"10.1016\/j.aei.2026.104610_b0230","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"475","article-title":"Temporal-relational cross-transformers for few-shot action recognition, in","author":"Perrett","year":"2021"},{"key":"10.1016\/j.aei.2026.104610_b0235","unstructured":"N. Tishby, F. C. Pereira, W. Bialek, The information bottleneck method, arXiv preprint physics\/0004057 (2000)."},{"key":"10.1016\/j.aei.2026.104610_b0245","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6202","article-title":"SlowFast networks for video recognition, in","author":"Feichtenhofer","year":"2019"},{"key":"10.1016\/j.aei.2026.104610_b0240","article-title":"Learning a distance metric from relative comparisons","volume":"16","author":"Schultz","year":"2003","journal-title":"Adv. Neural Inf. Proces. Syst."},{"key":"10.1016\/j.aei.2026.104610_b0250","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition, in","author":"He","year":"2016"},{"key":"10.1016\/j.aei.2026.104610_b0255","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"7229","article-title":"Few-shot image recognition by predicting parameters from activations, in","author":"Qiao","year":"2018"},{"key":"10.1016\/j.aei.2026.104610_b0260","unstructured":"A. A. Rusu, D. Rao, J. Sygnowski, O. Vinyals, R. Pascanu, S. Osindero, R. Hadsell, Meta-learning with latent embedding optimization, arXiv preprint arXiv:1807.05960 (2018)."},{"key":"10.1016\/j.aei.2026.104610_b0265","first-page":"5632","article-title":"DeepEMD: differentiable earth mover's distance for few-shot learning","volume":"45","author":"Zhang","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104610_b0270","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","article-title":"Long short-term memory","volume":"9","author":"Hochreiter","year":"1997","journal-title":"Neural Comput."},{"key":"10.1016\/j.aei.2026.104610_b0275","doi-asserted-by":"crossref","DOI":"10.1016\/j.cie.2024.110527","article-title":"Deep learning based assembly process action recognition and progress prediction facing human-centric intelligent manufacturing","volume":"196","author":"Wang","year":"2024","journal-title":"Comput. Ind. Eng."}],"container-title":["Advanced Engineering Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626003022?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626003022?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,6]],"date-time":"2026-04-06T18:00:40Z","timestamp":1775498440000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1474034626003022"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":55,"alternative-id":["S1474034626003022"],"URL":"https:\/\/doi.org\/10.1016\/j.aei.2026.104610","relation":{},"ISSN":["1474-0346"],"issn-type":[{"value":"1474-0346","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Few-shot assembly action recognition in smart manufacturing: A cross-domain metric framework","name":"articletitle","label":"Article Title"},{"value":"Advanced Engineering Informatics","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.aei.2026.104610","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104610"}}