{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,21]],"date-time":"2025-11-21T11:24:19Z","timestamp":1763724259401},"publisher-location":"Berlin, Heidelberg","reference-count":17,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642288845"},{"type":"electronic","value":"9783642288852"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-28885-2_13","type":"book-chapter","created":{"date-parts":[[2012,3,9]],"date-time":"2012-03-09T13:24:16Z","timestamp":1331299456000},"page":"113-120","source":"Crossref","is-referenced-by-count":3,"title":["A Large Portuguese Corpus On-Line: Cleaning and Preprocessing"],"prefix":"10.1007","author":[{"given":"Michel","family":"G\u00e9n\u00e9reux","sequence":"first","affiliation":[]},{"given":"Iris","family":"Hendrickx","sequence":"additional","affiliation":[]},{"given":"Am\u00e1lia","family":"Mendes","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"13_CR1","unstructured":"Alu\u00edsio, S., Pinheiro, G.M., Manfrin, A.M.P., de Oliveira, L.H. M., Genoves Jr., L.C., Tagnin, S.E.O.: The lacio-web: Corpora and tools to advance brazilian portuguese language investigations and computational linguistic tools. In: Proceedings of 4th Conference on International LREC, pp. 1779\u20131782 (2004)"},{"key":"13_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"110","DOI":"10.1007\/3-540-45011-4_17","volume-title":"Computational Processing of the Portuguese Language","author":"S.M. Alu\u00edsio","year":"2003","unstructured":"Alu\u00edsio, S.M., Pelizzoni, J.M., Marchi, A.R., de Oliveira, L., Manenti, R., Marquiaf\u00e1vel, V.: An Account of the Challenge of Tagging a Reference Corpus for Brazilian Portuguese. In: Mamede, N.J., Baptista, J., Trancoso, I., Nunes, M.d.G.V. (eds.) PROPOR 2003. LNCS, vol.\u00a02721, pp. 110\u2013117. Springer, Heidelberg (2003)"},{"key":"13_CR3","unstructured":"Bacelar do Nascimento, M.F., Pereira, L., Saramago, J.: Portuguese Corpora at CLUL. In: Second International Conference on Language Resources and Evaluation (LREC 2000), Athens, vol.\u00a0II, pp. 1603\u20131607 (2000)"},{"key":"13_CR4","doi-asserted-by":"crossref","unstructured":"Barreto, F., Branco, A., Ferreira, E., Mendes, A., Bacelar do Nascimento, M.F.P., Nunes, F., Silva, J.: Open resources and tools for the shallow processing of portuguese. In: Proceedings of the 5th International Conference on Language Resources and Evaluation (LREC 2006), Genoa, Italy (2006)","DOI":"10.3115\/1608974.1609003"},{"key":"13_CR5","doi-asserted-by":"crossref","unstructured":"van\u00a0den Bosch, A., Daelemans, W.: Memory-based morphological analysis. In: Proceedings of the 37th Annual Meeting of the Association for Computational Linguistics, ACL 1999. pp. 285\u2013292 (1999)","DOI":"10.3115\/1034678.1034726"},{"key":"13_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/3-540-45011-4_24","volume-title":"Computational Processing of the Portuguese Language","author":"A. Branco","year":"2003","unstructured":"Branco, A., Silva, J.: Contractions: Breaking the Tokenization-Tagging Circularity. In: Mamede, N.J., Baptista, J., Trancoso, I., Nunes, M.d.G.V. (eds.) PROPOR 2003. LNCS, vol.\u00a02721, pp. 167\u2013170. Springer, Heidelberg (2003)"},{"key":"13_CR7","unstructured":"Branco, A., Silva, J.: Evaluating solutions for the rapid development of state-of-the-art pos taggers for portuguese. In: Proc. of LREC 2004, pp. 507\u2013510 (2004)"},{"key":"13_CR8","unstructured":"Cavnar, B., Trenkle, J.M.: N-gram based text categorization. In: Proceedings of the Third Annual Symposium on Document Analysis and Information Retrieval, pp. 161\u2013175 (1994); UNLV Publications\/Reprographics"},{"key":"13_CR9","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511486579","volume-title":"Memory-Based Language Processing","author":"W. Daelemans","year":"2005","unstructured":"Daelemans, W., Van den Bosch, A.: Memory-Based Language Processing. Cambridge University Press, Cambridge (2005)"},{"key":"13_CR10","unstructured":"Daelemans, W., Zavrel, J., Berck, P., Gillis, S.: Mbt: A memory-based part of speech tagger generator. In: Proceedings of the 4th ACL\/SIGDAT Workshop on Very Large Corpora, pp. 14\u201327 (1996)"},{"key":"13_CR11","unstructured":"Evert, S.: A lightweight and efficient tool for cleaning web pages. In: 6th International Conference on Language Resources and Evaluation (LREC 2008), Marrakech, Morocco (2008)"},{"key":"13_CR12","unstructured":"G\u00e9n\u00e9reux, M., Mendes, A., Pereira, L.A.S., do Nascimento, M.F.B.: Lexical analysis of pre and post revolution discourse in portugal. In: Proceedings of the 3rd Workshop on Building and Using Comparable Corpora, LREC 2010 (2010)"},{"key":"13_CR13","doi-asserted-by":"crossref","unstructured":"Joachims, T.: Learning to Classify Text Using Support Vector Machines. Ph.D. thesis, Cornell University, USA. Kluwer Academic Publishers \/ Springer (2002)","DOI":"10.1007\/978-1-4615-0907-3"},{"key":"13_CR14","unstructured":"do Nascimento, M.F.B., Estrela, A., Mendes, A., Pereira, L.: On the use of comparable corpora of african varieties of portuguese for linguistic description and teaching\/learning applications. In: Proceedings of the Workshop on Building and Using Comparable Corpora, LREC 2008 (2008)"},{"key":"13_CR15","doi-asserted-by":"crossref","unstructured":"Santos, D.: Linguateca\u2019s infrastructure for portuguese and how it allows the detailed study of language varieties. Oslo Studies in Language\u00a03(2) (2011)","DOI":"10.5617\/osla.100"},{"key":"13_CR16","doi-asserted-by":"publisher","first-page":"450","DOI":"10.3115\/1073012.1073070","volume-title":"Proceedings of 39th Annual Meeting of the Association for Computational Linguistics (ACL)","author":"D. Santos","year":"2001","unstructured":"Santos, D., Rocha, P.: Evaluating CETEMPublico, a Free Resource for Portuguese. In: Proceedings of 39th Annual Meeting of the Association for Computational Linguistics (ACL), pp. 450\u2013457. Association for Computational Linguistics, Toulouse (2001)"},{"issue":"2","key":"13_CR17","first-page":"211","volume":"28","author":"T.B. Sardinha","year":"2007","unstructured":"Sardinha, T.B.: History and compilation of a large register-diversied corpus of Portuguese at CEPRIL. The Especialist\u00a028(2), 211\u2013226 (2007)","journal-title":"The Especialist"}],"container-title":["Lecture Notes in Computer Science","Computational Processing of the Portuguese Language"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-28885-2_13.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,2]],"date-time":"2022-01-02T09:12:20Z","timestamp":1641114740000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-28885-2_13"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642288845","9783642288852"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-28885-2_13","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}