X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fnormalize7bit.c;h=83d52ec05271c16e57bc1d2e5dacdc9e56909b56;hb=refs%2Fheads%2Fbranch-1.4.4;hp=68fe68944b0f436718321251f1785d7e4c65e91d;hpb=b660a23f733b863332748bb2705f0050f58566e4;p=pazpar2-moved-to-github.git diff --git a/src/normalize7bit.c b/src/normalize7bit.c index 68fe689..83d52ec 100644 --- a/src/normalize7bit.c +++ b/src/normalize7bit.c @@ -1,7 +1,5 @@ -/* $Id: normalize7bit.c,v 1.2 2007-04-27 12:17:04 marc Exp $ - Copyright (c) 2006-2007, Index Data. - -This file is part of Pazpar2. +/* This file is part of Pazpar2. + Copyright (C) 2006-2010 Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -14,69 +12,50 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Pazpar2; see the file LICENSE. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. - */ +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ /** \file normalize7bit.c \brief char and string normalization for 7bit ascii only */ +#if HAVE_CONFIG_H +#include +#endif + #include #include #include -#if HAVE_CONFIG_H -#include "cconfig.h" -#endif - #include "normalize7bit.h" +/** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */ char * normalize7bit_generic(char * str, const char * rm_chars) { char *p, *pe; - for (p = str; *p && isspace(*p); p++) - ; - for (pe = p + strlen(p) - 1; - pe > p && strchr(rm_chars, *pe); pe--) - *pe = '\0'; - return p; + for (p = str; *p && isspace(*(unsigned char *)p); p++) + ; + for (pe = p + strlen(p) - 1; + pe > p && strchr(rm_chars, *pe); pe--) + *pe = '\0'; + return p; } - - -char * normalize7bit_mergekey(char *buf, int skiparticle) +char *normalize7bit_mergekey(char *buf) { char *p = buf, *pout = buf; - - if (skiparticle) - { - char firstword[64]; - char articles[] = "the den der die des an a "; // must end in space - - while (*p && !isalnum(*p)) - p++; - pout = firstword; - while (*p && *p != ' ' && pout - firstword < 62) - *(pout++) = tolower(*(p++)); - *(pout++) = ' '; - *(pout++) = '\0'; - if (!strstr(articles, firstword)) - p = buf; - pout = buf; - } - while (*p) { - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; - while (isalnum(*p)) - *(pout++) = tolower(*(p++)); + while (isalnum(*(unsigned char *)p)) + *(pout++) = tolower(*(unsigned char *)(p++)); if (*p) *(pout++) = ' '; - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; } if (buf != pout) @@ -84,13 +63,14 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) *(pout--) = '\0'; } while (pout > buf && *pout == ' '); - + return buf; } // Extract what appears to be years from buf, storing highest and // lowest values. -int extract7bit_years(const char *buf, int *first, int *last) +// longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY +int extract7bit_dates(const char *buf, int *first, int *last, int longdate) { *first = -1; *last = -1; @@ -99,14 +79,16 @@ int extract7bit_years(const char *buf, int *first, int *last) const char *e; int len; - while (*buf && !isdigit(*buf)) + while (*buf && !isdigit(*(unsigned char *)buf)) buf++; len = 0; - for (e = buf; *e && isdigit(*e); e++) + for (e = buf; *e && isdigit(*(unsigned char *)e); e++) len++; - if (len == 4) + if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8)) { int value = atoi(buf); + if (longdate && len == 4) + value *= 10000; // should really suffix 0101? if (*first < 0 || value < *first) *first = value; if (*last < 0 || value > *last) @@ -122,7 +104,9 @@ int extract7bit_years(const char *buf, int *first, int *last) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +