{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,16]],"date-time":"2026-03-16T10:19:17Z","timestamp":1773656357486,"version":"3.50.1"},"publisher-location":"Berlin, Heidelberg","reference-count":47,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"value":"9783642333071","type":"print"},{"value":"9783642333088","type":"electronic"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-33308-8_5","type":"book-chapter","created":{"date-parts":[[2012,9,1]],"date-time":"2012-09-01T21:33:52Z","timestamp":1346535232000},"page":"48-61","source":"Crossref","is-referenced-by-count":5,"title":["News Media Analysis Using Focused Crawl and Natural Language Processing: Case of Lithuanian News Websites"],"prefix":"10.1007","author":[{"given":"Tomas","family":"Krilavi\u010dius","sequence":"first","affiliation":[]},{"given":"\u017dygimantas","family":"Medelis","sequence":"additional","affiliation":[]},{"given":"Jurgita","family":"Kapo\u010di\u016bt\u0117-Dzikien\u0117","sequence":"additional","affiliation":[]},{"given":"Tomas","family":"\u017dalandauskas","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"5_CR1","unstructured":"Plana, A.: Text\/content analytics 2011: User perspectives on solutions and providers. Technical report, Alta Plana (September 2011)"},{"key":"5_CR2","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071","volume-title":"Introduction to Information Retrieval.","author":"C. Manning","year":"2008","unstructured":"Manning, C., Raghavan, P., Sch\u00fctze, H.: Introduction to Information Retrieval. Cambridge Univ. Press, New York (2008)"},{"key":"5_CR3","unstructured":"Baeza-Yates, R., Ribeiro-Neto, B.: Modern Information Retrieval. Addison Wesley (1999)"},{"key":"5_CR4","unstructured":"Natural Language Access to Structured Text. In: Coling 1982: Proceedings of the Ninth International Conference on Computational Linguistics (1982)"},{"key":"5_CR5","unstructured":"Tan, P.N., Steinbach, M., Kumar, V.: Introduction to Data Mining. Addison-Wesley (2005)"},{"issue":"8","key":"5_CR6","first-page":"955","volume":"3","author":"D. R\u00f6sner","year":"1997","unstructured":"R\u00f6sner, D., Grote, B., Hartmann, K., H\u00f6fling, B.: From natural language documents to sharable product knowledge: A knowledge engineering approach. Journal of Universal Computer Science\u00a03(8), 955\u2013987 (1997)","journal-title":"Journal of Universal Computer Science"},{"key":"5_CR7","unstructured":"Apache Foundation: Apache Tika. Web page (2011), http:\/\/tika.apache.org (last visited: December 10, 2011)"},{"key":"5_CR8","unstructured":"LingPipe: Lingpipe. Web page (2011), http:\/\/alias-i.com\/lingpipe\/ (last visited: December 10, 2011)"},{"key":"5_CR9","unstructured":"Cunningham, H., Maynard, D., Bontcheva, K., Tablan, V., Aswani, N., Roberts, I., Gorrell, G., Funk, A., Roberts, A., Damljanovic, D., Heitz, T., Greenwood, M.A., Saggion, H., Petrak, J., Li, Y., Peters, W.: Text Processing with GATE (Version 6) (2011)"},{"issue":"4","key":"5_CR10","doi-asserted-by":"crossref","first-page":"565","DOI":"10.15388\/Informatica.2004.079","volume":"15","author":"A. Vai\u010di\u016bnas","year":"2004","unstructured":"Vai\u010di\u016bnas, A., Kaminskas, V., Ra\u0161kinis, G.: Statistical language models of lithuanian based on word clustering and morphological decomposition. Informatica\u00a015(4), 565\u2013580 (2004)","journal-title":"Informatica"},{"key":"5_CR11","first-page":"245","volume":"34","author":"D. \u0160veikauskien\u0117","year":"2005","unstructured":"\u0160veikauskien\u0117, D.: Formal description of the syntax of the lithuanian language. Information Technologies and Control\u00a034, 245\u2013256 (2005)","journal-title":"Information Technologies and Control"},{"issue":"3","key":"5_CR12","first-page":"236","volume":"9","author":"A. Bevainyt\u0117","year":"2010","unstructured":"Bevainyt\u0117, A., But\u0117nas, L.: Document classification using weighted ontology. Materials Physics and Mechanics\u00a09(3), 236\u2013245 (2010)","journal-title":"Materials Physics and Mechanics"},{"key":"5_CR13","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"410","DOI":"10.1007\/978-3-540-74782-6_36","volume-title":"AI*IA 2007: Artificial Intelligence and Human-Oriented Computing","author":"A. Tomovi\u0107","year":"2007","unstructured":"Tomovi\u0107, A., Jani\u010di\u0107, P.: A Variant of N-Gram Based Language Classification. In: Basili, R., Pazienza, M.T. (eds.) AI*IA 2007. LNCS (LNAI), vol.\u00a04733, pp. 410\u2013421. Springer, Heidelberg (2007)"},{"key":"5_CR14","first-page":"245","volume":"24","author":"Z. Zinkevi\u010dius","year":"2000","unstructured":"Zinkevi\u010dius, Z.: Lemuoklis - tool for morphological analysis. Darbai ir Dienos\u00a0(24), 245\u2013274 (2000)","journal-title":"Darbai ir Dienos"},{"key":"5_CR15","first-page":"3","volume-title":"Proceedings of the 2010 Conference on Human Language Technologies \u2013 The Baltic Perspective: Proceedings of the Fourth International Conference Baltic HLT 2010","author":"R. Marcinkevi\u010dien\u0117","year":"2010","unstructured":"Marcinkevi\u010dien\u0117, R., Vitkut\u0117-Ad\u017egauskien\u0117, D.: Developing the human language technology infrastructure in lithuania. In: Proceedings of the 2010 Conference on Human Language Technologies \u2013 The Baltic Perspective: Proceedings of the Fourth International Conference Baltic HLT 2010, pp. 3\u201310. IOS Press, Amsterdam (2010)"},{"key":"5_CR16","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/ICMLC.2010.61","volume-title":"Proceedings of the 2010 Second International Conference on Machine Learning and Computing, ICMLC 2010","author":"U. Pandey","year":"2010","unstructured":"Pandey, U., Chakravarty, S.: A survey on text classification techniques for e-mail filtering. In: Proceedings of the 2010 Second International Conference on Machine Learning and Computing, ICMLC 2010, pp. 32\u201336. IEEE Computer Society, Washington, DC (2010)"},{"issue":"1","key":"5_CR17","doi-asserted-by":"publisher","first-page":"4","DOI":"10.4304\/jait.1.1.4-20","volume":"1","author":"B. Baharudin","year":"2010","unstructured":"Baharudin, B., Lee, L.H., Khan, K.: A review of machine learning algorithms for text-documents classification. Journal of Advances in Information Technology\u00a01(1), 4\u201320 (2010)","journal-title":"Journal of Advances in Information Technology"},{"key":"5_CR18","unstructured":"Harish, B.S., Guru, D.S., Manjunath, S.: Representation and classification of text documents: A brief review. IJCA, Special Issue on RTIPPR (2), 110\u2013119 (2010)"},{"key":"5_CR19","series-title":"Lecture Notes in Artificial Intelligence","volume-title":"Charting the Topic Maps Research and Applications Landscape","year":"2006","unstructured":"Maicher, L., Park, J. (eds.): TMRA 2005. LNCS (LNAI), vol.\u00a03873. Springer, Heidelberg (2006)"},{"issue":"7","key":"5_CR20","doi-asserted-by":"publisher","first-page":"5381","DOI":"10.1016\/j.eswa.2010.01.018","volume":"37","author":"S.Y. Yang","year":"2010","unstructured":"Yang, S.Y.: Ontocrawler: A focused crawler with ontology-supported website models for information agents. Expert Systems with Applications\u00a037(7), 5381\u20135389 (2010)","journal-title":"Expert Systems with Applications"},{"key":"5_CR21","unstructured":"Porter, M.F.: Snowball: A language for stemming algorithms. Published online (October 2001), http:\/\/snowball.tartarus.org\/texts\/introduction.html (accessed March 11, 2008)"},{"key":"5_CR22","unstructured":"The National Archives: The soundex indexing system. Web page (May 2007), http:\/\/www.archives.gov\/research\/census\/soundex.html"},{"key":"5_CR23","unstructured":"Centre of Computational Linguistics: Lithuanian digital resources. Web page (2011), http:\/\/sruoga.vdu.lt\/lituanistiniai-skaitmeniai-istekliai"},{"key":"5_CR24","unstructured":"TokenMill: Lt language pack. Web page (2012), https:\/\/github.com\/tokenmill\/ltlangpack"},{"key":"5_CR25","unstructured":"N\u00e9meth, L.: Hunspell. Web page (2012), http:\/\/hunspell.sourceforge.net"},{"key":"5_CR26","unstructured":"Luka\u0161evi\u010dius, R., Agejevas, A.: ispell-lt. Web page, ftp:\/\/ftp.akl.lt\/ispell-lt\/"},{"key":"5_CR27","unstructured":"Wikipedia: Language identification \u2014 wikipedia, the free encyclopedia (2012) (Online; accessed April 30, 2012)"},{"key":"5_CR28","unstructured":"Wikipedia: Stop words \u2014 wikipedia, the free encyclopedia (2012) (Online; accessed April 30, 2012)"},{"key":"5_CR29","unstructured":"Krilavi\u010dius, T., Kulie\u0161ien\u0117, D.: Soundex for lithuanian language. Internal report, UAB TokenMill (2010)"},{"key":"5_CR30","unstructured":"Krilavi\u010dius, T., Baltr\u016bnas, M.: Soundex for lithuanian language. Internal report and bachelor thesis, UAB TokenMill and Vytautas Magnus University (2012)"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Paliulionis, V.: Lietuvi\u0161k\u0173 adres\u0173 geokodavimo problemos ir j\u0173 sprendimo b\u016bdai. Informacijos Mokslai, 217\u2013222 (2009)","DOI":"10.15388\/Im.2009.0.3235"},{"key":"5_CR32","unstructured":"Krilavi\u010dius, T., Medelis, V.: Porter stemmer for lithuanian language. Internal report and bachelor thesis, UAB TokenMill and Vytautas Magnus University (2010)"},{"key":"5_CR33","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/3-540-28349-8_3","volume-title":"Grouping Multidimensional Data","author":"J. Ghosh","year":"2006","unstructured":"Ghosh, J., Strehl, A.: Similarity-Based Text Clustering: A Comparative Study. In: Kogan, J., Nicholas, C., Teboulle, M. (eds.) Grouping Multidimensional Data, pp. 73\u201397. Springer, Heidelberg (2006)"},{"key":"5_CR34","doi-asserted-by":"publisher","first-page":"374","DOI":"10.1007\/s10115-004-0194-1","volume":"8","author":"S. Zhong","year":"2005","unstructured":"Zhong, S., Ghosh, J.: Generative model-based document clustering: a comparative study. Knowledge and Information Systems\u00a08, 374\u2013384 (2005), doi:10.1007\/s10115-004-0194-1","journal-title":"Knowledge and Information Systems"},{"key":"5_CR35","unstructured":"Steinbach, M., Karypis, G., Kumar, V.: A comparison of document clustering techniques. In: KDD Workshop on Text Mining, vol.\u00a0400(X), pp. 1\u201320 (2000)"},{"key":"5_CR36","unstructured":"Andrews, N.O., Fox, E.A.: Recent developments in document clustering. Technical report (2007)"},{"issue":"1","key":"5_CR37","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1075\/li.30.1.01for","volume":"30","author":"D. Nadeau","year":"2007","unstructured":"Nadeau, D., Sekine, S.: A survey of named entity recognition and classification. Journal of Linguisticae Investigationes\u00a030(1), 1\u201320 (2007)","journal-title":"Journal of Linguisticae Investigationes"},{"issue":"6","key":"5_CR38","first-page":"239","volume":"7","author":"D. Kaur","year":"2010","unstructured":"Kaur, D., Gupta, V.: A survey of named entity recognition in english and other indian languages. IJCSI International Journal of Computer Science Issues\u00a07(6), 239\u2013245 (2010)","journal-title":"IJCSI International Journal of Computer Science Issues"},{"issue":"4","key":"5_CR39","first-page":"27","volume":"7","author":"S. AbdelRahman","year":"2010","unstructured":"AbdelRahman, S., Elarnaoty, M., Magdy, M., Fahmy, A.: Integrated machine learning techniques for arabic named entity recognition. IJCSI International Journal of Computer Science Issues\u00a07(4), 27\u201336 (2010)","journal-title":"IJCSI International Journal of Computer Science Issues"},{"key":"5_CR40","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"205","DOI":"10.1007\/978-3-642-12101-2_22","volume-title":"Intelligent Information and Database Systems","author":"D.B. Nguyen","year":"2010","unstructured":"Nguyen, D.B., Hoang, S.H., Pham, S.B., Nguyen, T.P.: Named Entity Recognition for Vietnamese. In: Nguyen, N.T., Le, M.T., \u015awi\u0105tek, J. (eds.) ACIIDS 2011, Part II. LNCS, vol.\u00a05991, pp. 205\u2013214. Springer, Heidelberg (2010)"},{"key":"5_CR41","first-page":"290","volume":"34","author":"J. Kapo\u010di\u016bt\u0117-Dzikien\u0117","year":"2005","unstructured":"Kapo\u010di\u016bt\u0117-Dzikien\u0117, J., Ra\u0161kinis, G.: Rule-based annotation of lithuanian text corpora. Information technology and control. Technologija\u00a034, 290\u2013296 (2005)","journal-title":"Technologija"},{"key":"5_CR42","unstructured":"Bal\u010das, J., Krilavi\u010dius, T., Medelis, V.: Lithuanian date and time identification using GATE and Jape. Internal report and bachelor thesis, UAB TokenMill and Vytautas Magnus Unviersity (2012)"},{"key":"5_CR43","unstructured":"\u0160irviskas, R., Krilavi\u010dius, T., Medelis, V.: Lithuanian citations identification using GATE and Jape. Internal report and bachelor thesis, UAB TokenMill and Vytautas Magnus University (2012)"},{"key":"5_CR44","unstructured":"Apache Foundation: Apache Nutch. Web page (2011), http:\/\/nutch.apache.org (last visited: December 10, 2011)"},{"key":"5_CR45","unstructured":"Apache Foundation: Apache Mahout. Web page (2011), http:\/\/mahout.apache.org (last visited: December 10, 2011)"},{"key":"5_CR46","unstructured":"Apache Foundation: Apache Solr. Web page (2011), http:\/\/lucene.apache.org\/solr (last visited: December 10, 2011)"},{"key":"5_CR47","unstructured":"Apache Foundation: Apache Lucene. Web page (2011), http:\/\/lucene.apache.org (last visited: December 10, 2011)"}],"container-title":["Communications in Computer and Information Science","Information and Software Technologies"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-33308-8_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,7,13]],"date-time":"2020-07-13T03:34:03Z","timestamp":1594611243000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-33308-8_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642333071","9783642333088"],"references-count":47,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-33308-8_5","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012]]}}}