X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fnormalize7bit.c;h=6e1562abce1499656efe3e26c1b56173cc1d714a;hb=66a4b43079535dd50f228d136478646b8ef7c52e;hp=2bbfc2c93fa44bfdda605e8fef80c75412996b0b;hpb=c7e3db74117d43e0e26c6bb127960c9421045875;p=pazpar2-moved-to-github.git diff --git a/src/normalize7bit.c b/src/normalize7bit.c index 2bbfc2c..6e1562a 100644 --- a/src/normalize7bit.c +++ b/src/normalize7bit.c @@ -1,43 +1,42 @@ -/* $Id: normalize7bit.c,v 1.3 2007-09-07 10:27:14 adam Exp $ - Copyright (c) 2006-2007, Index Data. +/* This file is part of Pazpar2. + Copyright (C) 2006-2008 Index Data - This file is part of Pazpar2. +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. - Pazpar2 is free software; you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation; either version 2, or (at your option) any later - version. +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. - Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - for more details. +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - You should have received a copy of the GNU General Public License - along with Pazpar2; see the file LICENSE. If not, write to the - Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ /** \file normalize7bit.c \brief char and string normalization for 7bit ascii only */ +#if HAVE_CONFIG_H +#include +#endif + #include #include #include -#if HAVE_CONFIG_H -#include "cconfig.h" -#endif - #include "normalize7bit.h" +/** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */ char * normalize7bit_generic(char * str, const char * rm_chars) { char *p, *pe; - for (p = str; *p && isspace(*p); p++) + for (p = str; *p && isspace(*(unsigned char *)p); p++) ; for (pe = p + strlen(p) - 1; pe > p && strchr(rm_chars, *pe); pe--) @@ -56,11 +55,11 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) char firstword[64]; char articles[] = "the den der die des an a "; // must end in space - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; pout = firstword; while (*p && *p != ' ' && pout - firstword < 62) - *(pout++) = tolower(*(p++)); + *(pout++) = tolower(*(unsigned char *)(p++)); *(pout++) = ' '; *(pout++) = '\0'; if (!strstr(articles, firstword)) @@ -70,13 +69,13 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) while (*p) { - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; - while (isalnum(*p)) - *(pout++) = tolower(*(p++)); + while (isalnum(*(unsigned char *)p)) + *(pout++) = tolower(*(unsigned char *)(p++)); if (*p) *(pout++) = ' '; - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; } if (buf != pout) @@ -90,7 +89,8 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) // Extract what appears to be years from buf, storing highest and // lowest values. -int extract7bit_years(const char *buf, int *first, int *last) +// longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY +int extract7bit_dates(const char *buf, int *first, int *last, int longdate) { *first = -1; *last = -1; @@ -99,14 +99,16 @@ int extract7bit_years(const char *buf, int *first, int *last) const char *e; int len; - while (*buf && !isdigit(*buf)) + while (*buf && !isdigit(*(unsigned char *)buf)) buf++; len = 0; - for (e = buf; *e && isdigit(*e); e++) + for (e = buf; *e && isdigit(*(unsigned char *)e); e++) len++; - if (len == 4) + if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8)) { int value = atoi(buf); + if (longdate && len == 4) + value *= 10000; // should really suffix 0101? if (*first < 0 || value < *first) *first = value; if (*last < 0 || value > *last)