X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fnormalize7bit.c;h=29ee8f18d1ab45f817c1d440bc7092378f43c5e0;hb=817e3ec506c4095bc4fcc1923cee36153ef4ee43;hp=2bbfc2c93fa44bfdda605e8fef80c75412996b0b;hpb=c7e3db74117d43e0e26c6bb127960c9421045875;p=pazpar2-moved-to-github.git diff --git a/src/normalize7bit.c b/src/normalize7bit.c index 2bbfc2c..29ee8f1 100644 --- a/src/normalize7bit.c +++ b/src/normalize7bit.c @@ -1,43 +1,42 @@ -/* $Id: normalize7bit.c,v 1.3 2007-09-07 10:27:14 adam Exp $ - Copyright (c) 2006-2007, Index Data. +/* This file is part of Pazpar2. + Copyright (C) Index Data - This file is part of Pazpar2. +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. - Pazpar2 is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. - Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - You should have received a copy of the GNU General Public License - along with Pazpar2; see the file LICENSE. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ /** \file normalize7bit.c \brief char and string normalization for 7bit ascii only */ +#if HAVE_CONFIG_H +#include +#endif + #include #include #include -#if HAVE_CONFIG_H -#include "cconfig.h" -#endif - #include "normalize7bit.h" +/** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */ char * normalize7bit_generic(char * str, const char * rm_chars) { char *p, *pe; - for (p = str; *p && isspace(*p); p++) + for (p = str; *p && isspace(*(unsigned char *)p); p++) ; for (pe = p + strlen(p) - 1; pe > p && strchr(rm_chars, *pe); pe--) @@ -45,38 +44,18 @@ char * normalize7bit_generic(char * str, const char * rm_chars) return p; } - - -char * normalize7bit_mergekey(char *buf, int skiparticle) +char *normalize7bit_mergekey(char *buf) { char *p = buf, *pout = buf; - - if (skiparticle) - { - char firstword[64]; - char articles[] = "the den der die des an a "; // must end in space - - while (*p && !isalnum(*p)) - p++; - pout = firstword; - while (*p && *p != ' ' && pout - firstword < 62) - *(pout++) = tolower(*(p++)); - *(pout++) = ' '; - *(pout++) = '\0'; - if (!strstr(articles, firstword)) - p = buf; - pout = buf; - } - while (*p) { - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; - while (isalnum(*p)) - *(pout++) = tolower(*(p++)); + while (isalnum(*(unsigned char *)p)) + *(pout++) = tolower(*(unsigned char *)(p++)); if (*p) *(pout++) = ' '; - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; } if (buf != pout) @@ -84,13 +63,14 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) *(pout--) = '\0'; } while (pout > buf && *pout == ' '); - + return buf; } // Extract what appears to be years from buf, storing highest and // lowest values. -int extract7bit_years(const char *buf, int *first, int *last) +// longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY +int extract7bit_dates(const char *buf, int *first, int *last, int longdate) { *first = -1; *last = -1; @@ -99,14 +79,16 @@ int extract7bit_years(const char *buf, int *first, int *last) const char *e; int len; - while (*buf && !isdigit(*buf)) + while (*buf && !isdigit(*(unsigned char *)buf)) buf++; len = 0; - for (e = buf; *e && isdigit(*e); e++) + for (e = buf; *e && isdigit(*(unsigned char *)e); e++) len++; - if (len == 4) + if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8)) { int value = atoi(buf); + if (longdate && len == 4) + value *= 10000; // should really suffix 0101? if (*first < 0 || value < *first) *first = value; if (*last < 0 || value > *last) @@ -122,7 +104,9 @@ int extract7bit_years(const char *buf, int *first, int *last) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +