2 * Copyright (c) 1995, the EUROPAGATE consortium (see below).
4 * The EUROPAGATE consortium members are:
6 * University College Dublin
7 * Danmarks Teknologiske Videnscenter
8 * An Chomhairle Leabharlanna
9 * Consejo Superior de Investigaciones Cientificas
11 * Permission to use, copy, modify, distribute, and sell this software and
12 * its documentation, in whole or in part, for any purpose, is hereby granted,
15 * 1. This copyright and permission notice appear in all copies of the
16 * software and its documentation. Notices of copyright or attribution
17 * which appear at the beginning of any file must remain unchanged.
19 * 2. The names of EUROPAGATE or the project partners may not be used to
20 * endorse or promote products derived from this software without specific
21 * prior written permission.
23 * 3. Users of this software (implementors and gateway operators) agree to
24 * inform the EUROPAGATE consortium of their use of the software. This
25 * information will be used to evaluate the EUROPAGATE project and the
26 * software, and to plan further developments. The consortium may use
27 * the information in later publications.
29 * 4. Users of this software agree to make their best efforts, when
30 * documenting their use of the software, to acknowledge the EUROPAGATE
31 * consortium, and the role played by the software in their work.
33 * THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,
34 * EXPRESS, IMPLIED, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
35 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
36 * IN NO EVENT SHALL THE EUROPAGATE CONSORTIUM OR ITS MEMBERS BE LIABLE
37 * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF
38 * ANY KIND, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
39 * OR PROFITS, WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND
40 * ON ANY THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE
41 * USE OR PERFORMANCE OF THIS SOFTWARE.
46 * \file ccl_stop_words.c
47 * \brief Removes stop words from terms in RPN tree
56 struct ccl_stop_info {
59 struct ccl_stop_info *next;
62 struct ccl_stop_words {
64 NMEM nmem; /* memory for removed items */
65 struct ccl_stop_info *removed_items;
68 static void append_removed_item(ccl_stop_words_t csw,
70 const char *t, size_t len)
72 struct ccl_stop_info *csi = (struct ccl_stop_info *)
73 nmem_malloc(csw->nmem, sizeof(*csi));
74 struct ccl_stop_info **csip = &csw->removed_items;
76 csi->qualname = nmem_strdup(csw->nmem, qname);
80 csi->term = (char *) nmem_malloc(csw->nmem, len+1);
81 memcpy(csi->term, t, len);
82 csi->term[len] = '\0';
86 csip = &(*csip)->next;
91 ccl_stop_words_t ccl_stop_words_create(void)
93 NMEM nmem = nmem_create();
94 ccl_stop_words_t csw = (ccl_stop_words_t) xmalloc(sizeof(*csw));
96 csw->removed_items = 0;
97 csw->blank_chars = xstrdup(" \r\n\t");
101 void ccl_stop_words_destroy(ccl_stop_words_t csw)
105 nmem_destroy(csw->nmem);
106 xfree(csw->blank_chars);
111 struct ccl_rpn_node *ccl_remove_stop_r(ccl_stop_words_t csw,
113 struct ccl_rpn_node *p)
115 struct ccl_rpn_node *left, *right;
122 left = ccl_remove_stop_r(csw, bibset, p->u.p[0]);
123 right = ccl_remove_stop_r(csw, bibset, p->u.p[1]);
126 /* we must delete our binary node and return child (if any) */
144 char *cp = p->u.t.term;
148 while (*cp && strchr(csw->blank_chars, *cp))
155 while (*cp && !strchr(csw->blank_chars, *cp))
159 size_t len = cp - cp0;
160 if (ccl_search_stop(bibset, p->u.t.qual,
163 append_removed_item(csw, p->u.t.qual,
165 while (*cp && strchr(csw->blank_chars, *cp))
167 memmove(cp0, cp, strlen(cp)+1);
176 /* chop right blanks .. and see if term it gets empty */
177 if (p->u.t.term && csw->removed_items)
179 char *cp = p->u.t.term + strlen(p->u.t.term);
182 if (cp == p->u.t.term)
184 /* term is empty / blank */
188 if (!strchr(csw->blank_chars, cp[-1]))
200 int ccl_stop_words_tree(ccl_stop_words_t csw,
201 CCL_bibset bibset, struct ccl_rpn_node **t)
203 struct ccl_rpn_node *r;
205 /* remove list items */
206 nmem_reset(csw->nmem);
207 csw->removed_items = 0;
209 r = ccl_remove_stop_r(csw, bibset, *t);
211 if (csw->removed_items)
216 int ccl_stop_words_info(ccl_stop_words_t csw, int idx,
217 const char **qualname, const char **term)
219 struct ccl_stop_info *csi = csw->removed_items;
221 while (csi && i < idx)
228 *qualname = csi->qualname;
238 * indent-tabs-mode: nil
240 * vim: shiftwidth=4 tabstop=8 expandtab