/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2010 Index Data
+ * Copyright (C) 1995-2011 Index Data
* See the file LICENSE for details.
*/
/**
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
#include "cclp.h"
}
+#define REGEX_CHARS "^[]{}()|.*+?!$"
+#define CCL_CHARS "#?\\"
/**
* search_term: Parse CCL search term.
* cclp: CCL Parser
int len = 0;
int left_trunc = 0;
int right_trunc = 0;
+ int regex_trunc = 0;
+ int z3958_trunc = 0;
size_t max = 200;
if (and_list || or_list || !multi)
max = 1;
if (lookahead->name[i] == ' ')
no_spaces++;
len += 1+lookahead->len+lookahead->ws_prefix_len;
- left_trunc = lookahead->left_trunc;
- right_trunc = lookahead->right_trunc;
lookahead = lookahead->next;
}
ccl_add_attr_numeric(p, attset, CCL_BIB1_STR, 1);
}
+ if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_REGEX,
+ &attset))
+ {
+ regex_trunc = 1; /* regex trunc (102) allowed */
+ }
+ else if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_Z3958,
+ &attset))
+ {
+ z3958_trunc = 1; /* Z39.58 trunc (CCL) trunc allowed */
+ }
+
/* make the RPN token */
- p->u.t.term = (char *)xmalloc(len);
+ p->u.t.term = (char *)xmalloc(len * 2 + 2);
ccl_assert(p->u.t.term);
p->u.t.term[0] = '\0';
for (i = 0; i<no; i++)
{
const char *src_str = cclp->look_token->name;
size_t src_len = cclp->look_token->len;
-
+ int j;
+ int quote_mode = 0;
+
if (p->u.t.term[0] && cclp->look_token->ws_prefix_len)
{
- size_t len = strlen(p->u.t.term);
- memcpy(p->u.t.term + len, cclp->look_token->ws_prefix_buf,
- cclp->look_token->ws_prefix_len);
- p->u.t.term[len + cclp->look_token->ws_prefix_len] = '\0';
+ strxcat(p->u.t.term, cclp->look_token->ws_prefix_buf,
+ cclp->look_token->ws_prefix_len);
+ }
+ for (j = 0; j < src_len; j++)
+ {
+ if (j > 0 && src_str[j-1] == '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS "\\", src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS "\\", src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
+ else if (src_str[j] == '"')
+ quote_mode = !quote_mode;
+ else if (!quote_mode && src_str[j] == '?')
+ {
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".*");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "?");
+ z3958_trunc = 2;
+ }
+ else if (i == 0 && j == 0)
+ left_trunc = 1;
+ else if (i == no - 1 && j == src_len - 1)
+ right_trunc = 1;
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (!quote_mode && src_str[j] == '#')
+ {
+ if (regex_trunc)
+ {
+ strcat(p->u.t.term, ".");
+ regex_trunc = 2; /* regex trunc is really needed */
+ }
+ else if (z3958_trunc)
+ {
+ strcat(p->u.t.term, "#");
+ z3958_trunc = 2;
+ }
+ else
+ {
+ cclp->error_code = CCL_ERR_TRUNC_NOT_BOTH;
+ ccl_rpn_delete(p);
+ return NULL;
+ }
+ }
+ else if (src_str[j] != '\\')
+ {
+ if (regex_trunc && strchr(REGEX_CHARS, src_str[j]))
+ {
+ regex_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ else if (z3958_trunc && strchr(CCL_CHARS, src_str[j]))
+ {
+ z3958_trunc = 2;
+ strcat(p->u.t.term, "\\");
+ }
+ strxcat(p->u.t.term, src_str + j, 1);
+ }
}
- strxcat(p->u.t.term, src_str, src_len);
ADVANCE;
}
}
ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 2);
}
+ else if (regex_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 102);
+ }
+ else if (z3958_trunc == 2)
+ {
+ ccl_add_attr_numeric(p, attset, CCL_BIB1_TRU, 104);
+ }
else
{
if (qual_val_type(qa, CCL_BIB1_TRU, CCL_BIB1_TRU_CAN_NONE,