{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T04:54:37Z","timestamp":1743051277345,"version":"3.40.3"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031647475"},{"type":"electronic","value":"9783031647482"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-64748-2_3","type":"book-chapter","created":{"date-parts":[[2024,7,25]],"date-time":"2024-07-25T07:03:08Z","timestamp":1721890988000},"page":"42-65","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["BigQA: A Software Reference Architecture for\u00a0Big Data Question Answering Systems"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9553-9978","authenticated-orcid":false,"given":"Leonardo Mauro","family":"Pereira Moraes","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9475-2526","authenticated-orcid":false,"given":"Pedro Calciolari","family":"Jardim","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7618-1405","authenticated-orcid":false,"given":"Cristina Dutra","family":"Aguiar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,7,26]]},"reference":[{"key":"3_CR1","unstructured":"Armbrust, M., Ghodsi, A., Xin, R., Zaharia, M.: Lakehouse: a new generation of open platforms that unify data warehousing and advanced analytics. In: Proceedings of the Conference on Innovative Data Systems Research, vol. 8 (2021)"},{"key":"3_CR2","doi-asserted-by":"publisher","unstructured":"Ataei, P., Litchfield, A.: NeoMycelia: a software reference architecture for big data systems. In: Proceedings of the 28th Asia-Pacific Software Engineering Conference, pp. 452\u2013462 (2021). https:\/\/doi.org\/10.1109\/APSEC53868.2021.00052","DOI":"10.1109\/APSEC53868.2021.00052"},{"key":"3_CR3","doi-asserted-by":"publisher","unstructured":"Athira, P., Sreeja, M., Reghuraj, P.: Architecture of an ontology-based domain-specific natural language question answering system. Int. J. Web Semant. Technol. 4(4), article number 31 (2013). https:\/\/doi.org\/10.48550\/ARXIV.1311.3175","DOI":"10.48550\/ARXIV.1311.3175"},{"key":"3_CR4","doi-asserted-by":"publisher","first-page":"662","DOI":"10.1162\/tacl_a_00338","volume":"8","author":"M Bartolo","year":"2020","unstructured":"Bartolo, M., Roberts, A., Welbl, J., Riedel, S., Stenetorp, P.: Beat the AI: investigating adversarial human annotation for reading comprehension. Trans. Assoc. Comput. Linguist. 8, 662\u2013678 (2020). https:\/\/doi.org\/10.1162\/tacl_a_00338","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"3_CR5","doi-asserted-by":"publisher","unstructured":"Cassavia, N., Masciari, E.: Sigma: a scalable high performance big data architecture. In: Proceedings of the 29th Euromicro International Conference on Parallel, Distributed and Network-Based Processing, pp. 236\u2013239 (2021). https:\/\/doi.org\/10.1109\/PDP52278.2021.00044","DOI":"10.1109\/PDP52278.2021.00044"},{"key":"3_CR6","doi-asserted-by":"publisher","unstructured":"Derras, M., et al.: Reference architecture design: a practical approach. In: Proceedings of the 13th International Conference on Software Technologies, pp. 633\u2013640 (2018). https:\/\/doi.org\/10.5220\/0006865006330640","DOI":"10.5220\/0006865006330640"},{"key":"3_CR7","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 4171\u20134186. Association for Computational Linguistics, Minneapolis (2019). https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"3_CR8","doi-asserted-by":"publisher","unstructured":"Galster, M., Avgeriou, P.: Empirically-grounded reference architectures: a proposal. In: Proceedings of the Joint ACM SIGSOFT Conference - QoSA and Architecting Critical Systems, pp. 153\u2013158 (2011). https:\/\/doi.org\/10.1145\/2000259.2000285","DOI":"10.1145\/2000259.2000285"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Jardim, P., Moraes, L.M.P., Aguiar, C.D.: QASports: a question answering dataset about sports. In: Proceedings of the Brazilian Symposium on Databases: Dataset Showcase Workshop. SBC, Belo Horizonte (2023)","DOI":"10.5753\/dsw.2023.233602"},{"key":"3_CR10","doi-asserted-by":"publisher","unstructured":"Ji, Z., et al.: Survey of hallucination in natural language generation. ACM Comput. Surv. (2022). https:\/\/doi.org\/10.1145\/3571730","DOI":"10.1145\/3571730"},{"key":"3_CR11","unstructured":"John, T., Misra, P.: Data Lake for Enterprises. Packt Publishing Ltd. (2017)"},{"issue":"3","key":"3_CR12","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Karpukhin, V., et al.: Dense passage retrieval for open-domain question answering. arXiv preprint arXiv:2004.04906 (2020). https:\/\/doi.org\/10.48550\/ARXIV.2004.04906","DOI":"10.48550\/ARXIV.2004.04906"},{"key":"3_CR14","doi-asserted-by":"publisher","unstructured":"Klein, J., Buglak, R., Blockow, D., Wuttke, T., Cooper, B.: A reference architecture for big data systems in the national security domain. In: Proceedings of the IEEE\/ACM 2nd International Workshop on Big Data Software Engineering, pp. 51\u201357 (2016). https:\/\/doi.org\/10.1145\/2896825.2896834","DOI":"10.1145\/2896825.2896834"},{"key":"3_CR15","doi-asserted-by":"publisher","unstructured":"Kononenko, O., Baysal, O., Holmes, R., Godfrey, M.W.: Mining modern repositories with elasticsearch. In: Proceedings of the 11th Working Conference on Mining Software Repositories, pp. 328\u2013331 (2014).https:\/\/doi.org\/10.1145\/2597073.2597091","DOI":"10.1145\/2597073.2597091"},{"issue":"70","key":"3_CR16","first-page":"1","volume":"6","author":"D Laney","year":"2001","unstructured":"Laney, D., et al.: 3D data management: controlling data volume, velocity and variety. META Group Res. Note 6(70), 1 (2001)","journal-title":"META Group Res. Note"},{"key":"3_CR17","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.ijinfomgt.2019.04.003","volume":"50","author":"K Lepenioti","year":"2020","unstructured":"Lepenioti, K., Bousdekis, A., Apostolou, D., Mentzas, G.: Prescriptive analytics: literature review and research challenges. Int. J. Inf. Manage. 50, 57\u201370 (2020)","journal-title":"Int. J. Inf. Manage."},{"key":"3_CR18","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1007\/978-3-030-40907-4_4","volume-title":"On the Move to Meaningful Internet Systems: OTM 2019 Workshops","author":"Q Li","year":"2020","unstructured":"Li, Q., Xu, Z., Wei, H., Yu, C., Wang, S.: General big data architecture and methodology: an analysis focused framework. In: Debruyne, C., et al. (eds.) OTM 2019. LNCS, vol. 11878, pp. 33\u201343. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-40907-4_4"},{"key":"3_CR19","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019). https:\/\/arxiv.org\/abs\/1907.11692"},{"issue":"1\u20133","key":"3_CR20","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1504\/IJIE.2020.104656","volume":"7","author":"EL Lydia","year":"2020","unstructured":"Lydia, E.L., Satyanarayan, S., Kumar, K.V., Ramya, D.: Indexing documents with reliable indexing techniques using Apache Lucene in Hadoop. Int. J. Intell. Enterp. 7(1\u20133), 203\u2013214 (2020). https:\/\/doi.org\/10.1504\/IJIE.2020.104656","journal-title":"Int. J. Intell. Enterp."},{"issue":"9","key":"3_CR21","doi-asserted-by":"publisher","first-page":"972","DOI":"10.1108\/02656711211272863","volume":"29","author":"S Misra","year":"2012","unstructured":"Misra, S., Kumar, V., Kumar, U., Fantazy, K., Akhter, M.: Agile software development practices: evolution, principles, and criticisms. Int. J. Qual. Reliab. Manage. 29(9), 972\u2013980 (2012)","journal-title":"Int. J. Qual. Reliab. Manage."},{"key":"3_CR22","unstructured":"M\u00f6ller, T., Reina, A., Jayakumar, R., Pietsch, M.: COVID-QA: a question answering dataset for Covid-19. In: Proceedings of the 1st Workshop on NLP for COVID-19 at Association for Computational Linguistics, p.\u00a01 (2020)"},{"key":"3_CR23","doi-asserted-by":"publisher","unstructured":"Moraes, L.M.P., Jardim, P., Aguiar, C.D.: Design principles and a software reference architecture for big data question answering systems. In: Proceedings of the 25th International Conference on Enterprise Information Systems, pp. 57\u201367. INSTICC, SciTePress (2023). https:\/\/doi.org\/10.5220\/0011842700003467","DOI":"10.5220\/0011842700003467"},{"issue":"6","key":"3_CR24","doi-asserted-by":"publisher","first-page":"1","DOI":"10.3390\/joitmc5010006","volume":"5","author":"M M\u00fcller","year":"2019","unstructured":"M\u00fcller, M., Vorraber, W., Slany, W.: Open principles in new business models for information systems. J. Open Innov.: Technol. Mark. Complexity 5(6), 1\u201313 (2019). https:\/\/doi.org\/10.3390\/joitmc5010006","journal-title":"J. Open Innov.: Technol. Mark. Complexity"},{"key":"3_CR25","doi-asserted-by":"publisher","unstructured":"Nielsen, R.D., et al.: An architecture for complex clinical question answering. In: Proceedings of the 1st ACM International Health Informatics Symposium, pp. 395-399 (2010). https:\/\/doi.org\/10.1145\/1882992.1883050","DOI":"10.1145\/1882992.1883050"},{"key":"3_CR26","doi-asserted-by":"crossref","unstructured":"Novo-Loures, M., Pavon, R., Laza, R., Ruano-Ordas, D., Mendez, J.R.: Using natural language preprocessing architecture (NLPA) for big data text sources. Hindawi Sci. Program. 1\u201313, article id 2390941 (2020)","DOI":"10.1155\/2020\/2390941"},{"key":"3_CR27","doi-asserted-by":"publisher","unstructured":"Petroni, F., et al.: Language models as knowledge bases? In: Proceedings of the Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing, pp. 2463\u20132473 (2019). https:\/\/doi.org\/10.18653\/v1\/D19-1250","DOI":"10.18653\/v1\/D19-1250"},{"key":"3_CR28","doi-asserted-by":"publisher","unstructured":"Poniszewska-Mara\u0144da, A., Czechowska, E.: Kubernetes cluster for automating software production environment. Sens. J. 21(5), article number 1910 (2021). https:\/\/doi.org\/10.3390\/s21051910","DOI":"10.3390\/s21051910"},{"key":"3_CR29","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et\u00a0al.: Improving language understanding by generative pre-training (2018)"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., Liang, P.: SQuAD: 100,000+ questions for machine comprehension of text. arXiv e-prints arXiv:1606.05250 (2016)","DOI":"10.18653\/v1\/D16-1264"},{"issue":"3","key":"3_CR31","doi-asserted-by":"publisher","first-page":"129","DOI":"10.1002\/asi.4630270302","volume":"27","author":"SE Robertson","year":"1976","unstructured":"Robertson, S.E., Jones, K.S.: Relevance weighting of search terms. J. Am. Soc. Inf. Sci. 27(3), 129\u2013146 (1976). https:\/\/doi.org\/10.1002\/asi.4630270302","journal-title":"J. Am. Soc. Inf. Sci."},{"key":"3_CR32","doi-asserted-by":"publisher","unstructured":"Romualdo, A., Real, L., Caseli, H.: Measuring Brazilian Portuguese product titles similarity using embeddings. In: Proceedings of the 13th Brazilian Symposium on Information Technology and Human Language, pp. 121\u2013132. SBC (2021). https:\/\/doi.org\/10.5753\/stil.2021.17791","DOI":"10.5753\/stil.2021.17791"},{"key":"3_CR33","doi-asserted-by":"crossref","unstructured":"Saha, A., Aralikatte, R., Khapra, M.M., Sankaranarayanan, K.: DuoRC: towards complex language understanding with paraphrased reading comprehension. CoRR abs\/1804.07927 (2018). http:\/\/arxiv.org\/abs\/1804.07927","DOI":"10.18653\/v1\/P18-1156"},{"key":"3_CR34","doi-asserted-by":"publisher","unstructured":"Sammut, C., Webb, G.I. (eds.): TF-IDF, pp. 986\u2013987. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-0-387-30164-8_832","DOI":"10.1007\/978-0-387-30164-8_832"},{"key":"3_CR35","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/s10844-020-00608-7","volume":"56","author":"P Sawadogo","year":"2021","unstructured":"Sawadogo, P., Darmont, J.: On data lake architectures and metadata management. J. Intell. Inf. Syst. 56, 97\u2013120 (2021). https:\/\/doi.org\/10.1007\/s10844-020-00608-7","journal-title":"J. Intell. Inf. Syst."},{"key":"3_CR36","unstructured":"Schaffer, N., Weking, J., St\u00e4hler, O.: Requirements and design principles for business model tools. In: Proceedings of the Americas Conference on Information Systems Proceedings, pp. 1\u201310 (2020)"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Shvachko, K., Kuang, H., Radia, S., Chansler, R.: The Hadoop distributed file system. In: Proceedings of the IEEE 26th Symposium on Mass Storage Systems and Technologies, pp. 1\u201310 (2010)","DOI":"10.1109\/MSST.2010.5496972"},{"key":"3_CR38","series-title":"Advances in Intelligent Systems and Computing","doi-asserted-by":"publisher","first-page":"335","DOI":"10.1007\/978-981-13-1498-8_30","volume-title":"Emerging Technologies in Data Mining and Information Security","author":"KN Singh","year":"2019","unstructured":"Singh, K.N., Behera, R.K., Mantri, J.K.: Big data ecosystem: review on architectural evolution. In: Abraham, A., Dutta, P., Mandal, J.K., Bhattacharya, A., Dutta, S. (eds.) Emerging Technologies in Data Mining and Information Security. Advances in Intelligent Systems and Computing, vol. 813, pp. 335\u2013345. Springer, Singapore (2019)"},{"key":"3_CR39","doi-asserted-by":"publisher","unstructured":"Sucunuta, M.E., Riofrio, G.E.: Architecture of a question-answering system for a specific repository of documents. In: Proceedings of the 2nd International Conference on Software Technology and Engineering, pp. V2-12\u2013V2-16 (2010). https:\/\/doi.org\/10.1109\/ICSTE.2010.5608753","DOI":"10.1109\/ICSTE.2010.5608753"},{"key":"3_CR40","unstructured":"Yousfi, S., Rhanoui, M., Chiadmi, D.: Towards a generic multimodal architecture for batch and streaming big data integration. arXiv preprint arXiv:2108.04343 (2021)"},{"key":"3_CR41","unstructured":"Zaharia, M., Chowdhury, M., Franklin, M.J., Shenker, S., Stoica, I.: Spark: cluster computing with working sets. In: Proceedings of the 2nd USENIX Workshop on Hot Topics in Cloud Computing, pp. 1\u20137 (2010)"},{"key":"3_CR42","doi-asserted-by":"publisher","unstructured":"Zhang, G., Jiang, T., Bie, R., Liu, X., Wang, Z., Rao, J.: The architecture of ProMe instant question answering system. In: Proceedings of the International Conference on Cyber-Enabled Distributed Computing and Knowledge Discovery, pp. 237\u2013242 (2013). https:\/\/doi.org\/10.1109\/CyberC.2013.46","DOI":"10.1109\/CyberC.2013.46"},{"issue":"1","key":"3_CR43","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1504\/IJBDI.2019.097399","volume":"6","author":"JY Zhu","year":"2019","unstructured":"Zhu, J.Y., Tang, B., Li, V.O.: A five-layer architecture for big data processing and analytics. Int. J. Big Data Intell. 6(1), 38\u201349 (2019). https:\/\/doi.org\/10.1504\/IJBDI.2019.097399","journal-title":"Int. J. Big Data Intell."}],"container-title":["Lecture Notes in Business Information Processing","Enterprise Information Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-64748-2_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,25]],"date-time":"2024-07-25T07:10:45Z","timestamp":1721891445000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-64748-2_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031647475","9783031647482"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-64748-2_3","relation":{},"ISSN":["1865-1348","1865-1356"],"issn-type":[{"type":"print","value":"1865-1348"},{"type":"electronic","value":"1865-1356"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"26 July 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICEIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Enterprise Information Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 April 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 April 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iceis2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iceis.scitevents.org\/?y=2023","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}