X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fnormalize7bit.c;h=1f993c9af5a5c4251c8e97221308d051db8798a2;hb=b4ee319ba0560f08e43e891ac5492563c7737fd7;hp=d79026b41d1c3ce6e1524a8377892873ad181222;hpb=a8f5308820cd689f5b92601038cb5d24118d5f2b;p=pazpar2-moved-to-github.git diff --git a/src/normalize7bit.c b/src/normalize7bit.c index d79026b..1f993c9 100644 --- a/src/normalize7bit.c +++ b/src/normalize7bit.c @@ -1,7 +1,5 @@ -/* $Id: normalize7bit.c,v 1.1 2007-04-26 21:33:32 marc Exp $ - Copyright (c) 2006-2007, Index Data. - -This file is part of Pazpar2. +/* This file is part of Pazpar2. + Copyright (C) 2006-2009 Index Data Pazpar2 is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -14,35 +12,36 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Pazpar2; see the file LICENSE. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. - */ +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ /** \file normalize7bit.c \brief char and string normalization for 7bit ascii only */ +#if HAVE_CONFIG_H +#include +#endif + #include #include #include -#if HAVE_CONFIG_H -#include "cconfig.h" -#endif - #include "normalize7bit.h" -char * normalize7bit_generic(char* str, char* rm_chars) +/** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */ +char * normalize7bit_generic(char * str, const char * rm_chars) { char *p, *pe; - for (p = str; *p && isspace(*p); p++) - ; - for (pe = p + strlen(p) - 1; - pe > p && strchr(rm_chars, *pe); pe--) - *pe = '\0'; - return p; + for (p = str; *p && isspace(*(unsigned char *)p); p++) + ; + for (pe = p + strlen(p) - 1; + pe > p && strchr(rm_chars, *pe); pe--) + *pe = '\0'; + return p; } @@ -56,11 +55,11 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) char firstword[64]; char articles[] = "the den der die des an a "; // must end in space - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; pout = firstword; while (*p && *p != ' ' && pout - firstword < 62) - *(pout++) = tolower(*(p++)); + *(pout++) = tolower(*(unsigned char *)(p++)); *(pout++) = ' '; *(pout++) = '\0'; if (!strstr(articles, firstword)) @@ -70,13 +69,13 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) while (*p) { - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; - while (isalnum(*p)) - *(pout++) = tolower(*(p++)); + while (isalnum(*(unsigned char *)p)) + *(pout++) = tolower(*(unsigned char *)(p++)); if (*p) *(pout++) = ' '; - while (*p && !isalnum(*p)) + while (*p && !isalnum(*(unsigned char *)p)) p++; } if (buf != pout) @@ -84,13 +83,14 @@ char * normalize7bit_mergekey(char *buf, int skiparticle) *(pout--) = '\0'; } while (pout > buf && *pout == ' '); - + return buf; } // Extract what appears to be years from buf, storing highest and // lowest values. -int extract_years(const char *buf, int *first, int *last) +// longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY +int extract7bit_dates(const char *buf, int *first, int *last, int longdate) { *first = -1; *last = -1; @@ -99,14 +99,16 @@ int extract_years(const char *buf, int *first, int *last) const char *e; int len; - while (*buf && !isdigit(*buf)) + while (*buf && !isdigit(*(unsigned char *)buf)) buf++; len = 0; - for (e = buf; *e && isdigit(*e); e++) + for (e = buf; *e && isdigit(*(unsigned char *)e); e++) len++; - if (len == 4) + if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8)) { int value = atoi(buf); + if (longdate && len == 4) + value *= 10000; // should really suffix 0101? if (*first < 0 || value < *first) *first = value; if (*last < 0 || value > *last) @@ -122,7 +124,9 @@ int extract_years(const char *buf, int *first, int *last) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +