{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,23]],"date-time":"2026-02-23T15:55:21Z","timestamp":1771862121844,"version":"3.50.1"},"reference-count":63,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2025,9,1]],"date-time":"2025-09-01T00:00:00Z","timestamp":1756684800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2025,9]]},"DOI":"10.1016\/j.knosys.2025.114001","type":"journal-article","created":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T03:00:09Z","timestamp":1751425209000},"page":"114001","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":3,"special_numbering":"C","title":["Optimal strategies to perform multilingual analysis of social content for a novel dataset in the tourism domain"],"prefix":"10.1016","volume":"326","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5254-7583","authenticated-orcid":false,"given":"Maxime","family":"Masson","sequence":"first","affiliation":[]},{"given":"Rodrigo","family":"Agerri","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3605-3927","authenticated-orcid":false,"given":"Christian","family":"Sallaberry","sequence":"additional","affiliation":[]},{"given":"Marie-Noelle","family":"Bessagnet","sequence":"additional","affiliation":[]},{"given":"Annig","family":"Le Parc Lacayrelle","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2227-3283","authenticated-orcid":false,"given":"Philippe","family":"Roose","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2025.114001_b1","first-page":"27","article-title":"What do we know about social media in tourism? A review","volume":"10","author":"Zeng","year":"2014","journal-title":"Tour. Manag. Perspect."},{"key":"10.1016\/j.knosys.2025.114001_b2","unstructured":"D. Maynard, K. Bontcheva, D. Rout, Challenges in developing opinion mining tools for social media, in: Workshop Programme, p. 15."},{"key":"10.1016\/j.knosys.2025.114001_b3","series-title":"Proceedings of the 9th International Workshop on Semantic Evaluation (SemEVal 2015)","first-page":"451","article-title":"SemEval-2015 task 10: Sentiment analysis in Twitter","author":"Rosenthal","year":"2015"},{"issue":"5","key":"10.1016\/j.knosys.2025.114001_b4","doi-asserted-by":"crossref","first-page":"65","DOI":"10.26689\/jcer.v6i5.3958","article-title":"Overview of named entity recognition","volume":"6","author":"Liu","year":"2022","journal-title":"J. Contemp. Educ. Res."},{"key":"10.1016\/j.knosys.2025.114001_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.jbi.2020.103526","article-title":"Clinical concept extraction: a methodology review","volume":"109","author":"Fu","year":"2020","journal-title":"J. Biomed. Inform."},{"issue":"2","key":"10.1016\/j.knosys.2025.114001_b6","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3605943","article-title":"Recent advances in natural language processing via large pre-trained language models: A survey","volume":"56","author":"Min","year":"2023","journal-title":"ACM Comput. Surv."},{"key":"10.1016\/j.knosys.2025.114001_b7","series-title":"Thesaurus on Tourism and Leisure Activities 2001","author":"World Tourism Organization","year":"2002"},{"key":"10.1016\/j.knosys.2025.114001_b8","series-title":"Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","first-page":"5721","article-title":"Template-free prompt tuning for few-shot NER","author":"Ma","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b9","series-title":"Efficient few-shot learning without prompts","author":"Tunstall","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b10","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2025.114001_b11","doi-asserted-by":"crossref","first-page":"30046","DOI":"10.1073\/pnas.1907367117","article-title":"Emergent linguistic structure in artificial neural networks trained by self-supervision","volume":"117","author":"Manning","year":"2020","journal-title":"Proc. Natl. Acad. Sci."},{"key":"10.1016\/j.knosys.2025.114001_b12","first-page":"1","article-title":"On the role of morphological information for contextual lemmatization","author":"Toporkov","year":"2024","journal-title":"Comput. Linguist."},{"key":"10.1016\/j.knosys.2025.114001_b13","series-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"10.1016\/j.knosys.2025.114001_b14","doi-asserted-by":"crossref","unstructured":"A. Conneau, K. Khandelwal, N. Goyal, V. Chaudhary, G. Wenzek, F. Guzm\u00e1n, \u00c9. Grave, M. Ott, L. Zettlemoyer, V. Stoyanov, Unsupervised Cross-lingual Representation Learning at Scale, in: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, 2020, pp. 8440\u20138451.","DOI":"10.18653\/v1\/2020.acl-main.747"},{"issue":"70","key":"10.1016\/j.knosys.2025.114001_b15","first-page":"1","article-title":"Scaling instruction-finetuned language models","volume":"25","author":"Chung","year":"2024","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.knosys.2025.114001_b16","series-title":"Medical mT5: An open-source multilingual Text-to-Text LLM for the medical domain","author":"Garc\u00eda-Ferrero","year":"2024"},{"key":"10.1016\/j.knosys.2025.114001_b17","series-title":"The Twelfth International Conference on Learning Representations","article-title":"GoLLIE: Annotation guidelines improve zero-shot information-extraction","author":"Sainz","year":"2024"},{"key":"10.1016\/j.knosys.2025.114001_b18","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.knosys.2025.114001_b19","series-title":"Mistral 7B","author":"Jiang","year":"2023"},{"key":"10.1016\/j.knosys.2025.114001_b20","series-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023"},{"key":"10.1016\/j.knosys.2025.114001_b21","series-title":"2021 IEEE 22nd International Conference on Information Reuse and Integration for Data Science","first-page":"248","article-title":"Using inductive transfer learning to improve hotel review spam detection","author":"Crawford","year":"2021"},{"key":"10.1016\/j.knosys.2025.114001_b22","series-title":"Transformers approach for sentiment analysis: Classification of Mexican tourists reviews from TripAdvisor","author":"Enr\u00edquez","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b23","unstructured":"J. V\u00e1squez, H. G\u00f3mez-Adorno, G. Bel-Enguix, Bert-based Approach for Sentiment Analysis of Spanish Reviews from TripAdvisor, in: IberLEF@ SEPLN, 2021, pp. 165\u2013170."},{"issue":"4","key":"10.1016\/j.knosys.2025.114001_b24","doi-asserted-by":"crossref","DOI":"10.3390\/su13042397","article-title":"Applying deep learning techniques for sentiment analysis to assess sustainable transport","volume":"13","author":"Serna","year":"2021","journal-title":"Sustainability"},{"key":"10.1016\/j.knosys.2025.114001_b25","doi-asserted-by":"crossref","first-page":"373","DOI":"10.1016\/j.procs.2021.12.256","article-title":"Named entity recognition applied on moroccan tourism corpus","volume":"198","author":"Bouabdallaoui","year":"2022","journal-title":"Procedia Comput. Sci."},{"key":"10.1016\/j.knosys.2025.114001_b26","series-title":"Machine Translation: 16th China Conference, CCMT 2020, Hohhot, China, October 10-12, 2020, Revised Selected Papers 16","first-page":"11","article-title":"MTNER: A corpus for mongolian tourism named entity recognition","author":"Cheng","year":"2020"},{"issue":"1","key":"10.1016\/j.knosys.2025.114001_b27","first-page":"108","article-title":"Information extraction on tourism domain using SpaCy and BERT","volume":"15","author":"Chantrapornchai","year":"2021","journal-title":"ECTI Trans. Comput. Inf. Technol."},{"key":"10.1016\/j.knosys.2025.114001_b28","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1080\/01434632.2021.1962331","article-title":"Social analysis of young Basque-speaking communities in twitter","author":"de Landa","year":"2021","journal-title":"J. Multiling. Multicult. Dev."},{"key":"10.1016\/j.knosys.2025.114001_b29","series-title":"Intelligent Systems Design and Applications: 18th International Conference on Intelligent Systems Design and Applications (ISDA 2018) Held in Vellore, India, December 6-8, 2018, Volume 1","first-page":"100","article-title":"Review and analysis of zero, one and few shot learning approaches","author":"Kadam","year":"2020"},{"key":"10.1016\/j.knosys.2025.114001_b30","series-title":"Gpt-3 models are poor few-shot learners in the biomedical domain","author":"Moradi","year":"2021"},{"key":"10.1016\/j.knosys.2025.114001_b31","doi-asserted-by":"crossref","unstructured":"T. Schick, H. Sch\u00fctze, Exploiting Cloze-Questions for Few-Shot Text Classification and Natural Language Inference, in: Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, 2021, pp. 255\u2013269.","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"key":"10.1016\/j.knosys.2025.114001_b32","series-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"10.1016\/j.knosys.2025.114001_b33","series-title":"InstructionNER: A multi-task instruction-based generative framework for few-shot NER","author":"Wang","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b34","series-title":"Findings of the Association for Computational Linguistics: EMNLP 2022","first-page":"6403","article-title":"Model and data transfer for cross-lingual sequence labelling in zero-resource settings","author":"Garc\u00eda-Ferrero","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b35","doi-asserted-by":"crossref","unstructured":"I. Garc\u00eda-Ferrero, R. Agerri, G. Rigau, T-Projection: High Quality Annotation Projection for Sequence Labeling Tasks, in: Findings of the Association for Computational Linguistics: EMNLP 2023, 2023, pp. 15203\u201315217.","DOI":"10.18653\/v1\/2023.findings-emnlp.1015"},{"key":"10.1016\/j.knosys.2025.114001_b36","series-title":"Cross-lingual argument mining in the medical domain","author":"Yeginbergen","year":"2024"},{"key":"10.1016\/j.knosys.2025.114001_b37","doi-asserted-by":"crossref","unstructured":"M. Artetxe, G. Labaka, E. Agirre, Translation Artifacts in Cross-lingual Transfer Learning, in: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP, 2020, pp. 7674\u20137684.","DOI":"10.18653\/v1\/2020.emnlp-main.618"},{"key":"10.1016\/j.knosys.2025.114001_b38","doi-asserted-by":"crossref","unstructured":"M. Artetxe, V. Goswami, S. Bhosale, A. Fan, L. Zettlemoyer, Revisiting Machine Translation for Cross-lingual Classification, in: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, 2023, pp. 6489\u20136499.","DOI":"10.18653\/v1\/2023.emnlp-main.399"},{"key":"10.1016\/j.knosys.2025.114001_b39","series-title":"Proceedings of the 28th International Conference on Computational Linguistics","first-page":"266","article-title":"Improving sentiment analysis over non-english tweets using multilingual transformers and automatic translation for data-augmentation","author":"Barriere","year":"2020"},{"key":"10.1016\/j.knosys.2025.114001_b40","series-title":"LREC","first-page":"139","article-title":"Corpus description of the ESTER evaluation campaign for the rich transcription of french broadcast news","author":"Galliano","year":"2006"},{"key":"10.1016\/j.knosys.2025.114001_b41","series-title":"Lrec","article-title":"Ancora: Multilevel annotated corpora for Catalan and Spanish","author":"Taul\u00e9","year":"2008"},{"key":"10.1016\/j.knosys.2025.114001_b42","unstructured":"L. Derczynski, K. Bontcheva, I. Roberts, Broad Twitter Corpus: A Diverse Named Entity Recognition Resource, in: Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers, Osaka, Japan, 2016, pp. 1169\u20131179."},{"issue":"12","key":"10.1016\/j.knosys.2025.114001_b43","first-page":"2009","article-title":"Twitter sentiment classification using distant supervision","volume":"1","author":"Go","year":"2009","journal-title":"CS224N Proj. Rep. Stanf."},{"key":"10.1016\/j.knosys.2025.114001_b44","series-title":"Evaluation datasets for Twitter sentiment analysis: a survey and a new dataset, the STS-Gold","author":"Saif","year":"2013"},{"key":"10.1016\/j.knosys.2025.114001_b45","series-title":"Second Joint Conference on Lexical and Computational Semantics (*SEM), Volume 2: Proceedings of the Seventh International Workshop on Semantic Evaluation (SemEVal 2013)","first-page":"312","article-title":"SemEval-2013 task 2: Sentiment analysis in Twitter","author":"Nakov","year":"2013"},{"key":"10.1016\/j.knosys.2025.114001_b46","series-title":"Proceedings of the 8th International Workshop on Semantic Evaluation (SemEVal 2014)","first-page":"73","article-title":"SemEval-2014 task 9: Sentiment analysis in Twitter","author":"Rosenthal","year":"2014"},{"key":"10.1016\/j.knosys.2025.114001_b47","series-title":"Proceedings of the 10th International Workshop on Semantic Evaluation (SemEVal-2016)","first-page":"1","article-title":"SemEval-2016 task 4: Sentiment analysis in Twitter","author":"Nakov","year":"2016"},{"key":"10.1016\/j.knosys.2025.114001_b48","series-title":"Proceedings of the 11th International Workshop on Semantic Evaluation (SemEVal-2017)","first-page":"502","article-title":"SemEval-2017 task 4: Sentiment analysis in Twitter","author":"Rosenthal","year":"2017"},{"key":"10.1016\/j.knosys.2025.114001_b49","series-title":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","first-page":"5016","article-title":"MultiWOZ - A large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling","author":"Budzianowski","year":"2018"},{"key":"10.1016\/j.knosys.2025.114001_b50","doi-asserted-by":"crossref","unstructured":"S. Bowman, G. Angeli, C. Potts, C.D. Manning, A large annotated corpus for learning natural language inference, in: Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing, 2015, pp. 632\u2013642.","DOI":"10.18653\/v1\/D15-1075"},{"key":"10.1016\/j.knosys.2025.114001_b51","series-title":"Web Information Systems Engineering\u2013WISE 2022: 23rd International Conference, Biarritz, France, November 1\u20133, 2022, Proceedings","first-page":"11","article-title":"A domain-independent method for thematic dataset building from social media: The case of tourism on Twitter","author":"Masson","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b52","series-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference","first-page":"258","article-title":"XLM-T: Multilingual language models in Twitter for sentiment analysis and beyond","author":"Barbieri","year":"2022"},{"key":"10.1016\/j.knosys.2025.114001_b53","doi-asserted-by":"crossref","unstructured":"F. Barbieri, J. Camacho-Collados, L.E. Anke, L. Neves, TweetEval: Unified Benchmark and Comparative Evaluation for Tweet Classification, in: Findings of the Association for Computational Linguistics: EMNLP 2020, 2020, pp. 1644\u20131650.","DOI":"10.18653\/v1\/2020.findings-emnlp.148"},{"key":"10.1016\/j.knosys.2025.114001_b54","series-title":"Pysentimiento: A python toolkit for sentiment analysis and SocialNLP tasks","author":"P\u00e9rez","year":"2021"},{"key":"10.1016\/j.knosys.2025.114001_b55","series-title":"Sentiment analysis generic dataset","author":"Seethal","year":"2023"},{"issue":"1","key":"10.1016\/j.knosys.2025.114001_b56","doi-asserted-by":"crossref","first-page":"75","DOI":"10.1016\/j.ijresmar.2022.05.005","article-title":"More than a feeling: Accuracy and application of sentiment analysis","volume":"40","author":"Hartmann","year":"2023","journal-title":"Int. J. Res. Mark."},{"key":"10.1016\/j.knosys.2025.114001_b57","series-title":"Fine-tune Mistral v0.2 for sentiment analysis \u2014 kaggle.com","author":"Massaron","year":"2024"},{"key":"10.1016\/j.knosys.2025.114001_b58","series-title":"Fine-tune Llama 2 for sentiment analysis \u2014 kaggle.com","author":"Massaron","year":"2024"},{"key":"10.1016\/j.knosys.2025.114001_b59","series-title":"Measuring massive multitask language understanding","author":"Hendrycks","year":"2020"},{"key":"10.1016\/j.knosys.2025.114001_b60","series-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","first-page":"38","article-title":"Transformers: State-of-the-Art natural language processing","author":"Wolf","year":"2020"},{"key":"10.1016\/j.knosys.2025.114001_b61","doi-asserted-by":"crossref","unstructured":"E.F. Tjong Kim Sang, Introduction to the CoNLL-2002 Shared Task: Language-Independent Named Entity Recognition, in: COLING-02: The 6th Conference on Natural Language Learning 2002 (CoNLL-2002), 2002.","DOI":"10.3115\/1118853.1118877"},{"key":"10.1016\/j.knosys.2025.114001_b62","doi-asserted-by":"crossref","unstructured":"M. Masson, C. Sallaberry, M.-N. Bessagnet, A.L.P. Lacayrelle, P. Roose, R. Agerri, TextBI: An Interactive Dashboard for Visualizing Multidimensional NLP Annotations in Social Media Data, in: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations, 2024, pp. 1\u20139.","DOI":"10.18653\/v1\/2024.eacl-demo.1"},{"key":"10.1016\/j.knosys.2025.114001_b63","series-title":"The Hidden Dimension","author":"Hall","year":"1966"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705125010469?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705125010469?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T09:48:02Z","timestamp":1762336082000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705125010469"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9]]},"references-count":63,"alternative-id":["S0950705125010469"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2025.114001","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2025,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Optimal strategies to perform multilingual analysis of social content for a novel dataset in the tourism domain","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2025.114001","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"114001"}}