@inproceedings{2f50d5bcf60c432ca6e5b253b57c5dd1,
title = "A taxonomy of Spanish nouns, a statistical algorithm to generate it and its implementation in open source code",
abstract = "In this paper we describe our work in progress in the automatic development of a taxonomy of Spanish nouns, we offer the Perl implementation we have so far, and we discuss the different problems that still need to be addressed. We designed a statistically-based taxonomy induction algorithm consisting of a combination of different strategies not involving explicit linguistic knowledge. Being all quantitative, the strategies we present are however of different nature. Some of them are based on the computation of distributional similarity coefficients which identify pairs of sibling words or co-hyponyms, while others are based on asymmetric co-occurrence and identify pairs of parent-child words or hypernym-hyponym relations. A decision making process is then applied to combine the results of the previous steps, and finally connect lexical units to a basic structure containing the most general categories of the language. We evaluate the quality of the taxonomy both manually and also using Spanish Wordnet as a gold-standard. We estimate an average of 89.07% precision and 25.49% recall considering only the results which the algorithm presents with high degree of certainty, or 77.86% precision and 33.72% recall considering all results.",
keywords = "Corpus statistics, Distributional semantics, Spanish, Taxonomy induction",
author = "Rogelio Nazar and Irene Renau",
note = "Funding Information: This research is supported by two grants from the Chilean Government: Conicyt-Fondecyt 11140686, “Inducci{\'o}n autom{\'a}tica de taxonom{\'i}as de sustantivos generales y espe-cializados a partir de corpus textuales desde el enfoque de la ling{\"u}{\'i}stica cuantitativa” (lead researcher: Rogelio Nazar) and Conicyt-Fondecyt 11140704, “Detecci{\'o}n autom{\'a}tica del significado de los verbos del castellano por medio de patrones sint{\'a}ctico-sem{\'a}nticos extra{\'i}dos con estad{\'i}stica de corpus” (lead researcher: Irene Renau). We would like to thank the anonymous reviewers of the paper for their useful comments and the students who helped us with the evaluation.; null ; Conference date: 23-05-2016 Through 28-05-2016",
year = "2016",
language = "English",
series = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
publisher = "European Language Resources Association (ELRA)",
pages = "1485--1492",
editor = "Nicoletta Calzolari and Khalid Choukri and Helene Mazo and Asuncion Moreno and Thierry Declerck and Sara Goggi and Marko Grobelnik and Jan Odijk and Stelios Piperidis and Bente Maegaard and Joseph Mariani",
booktitle = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
}