-/* $Id: normalize7bit.c,v 1.1 2007-04-26 21:33:32 marc Exp $
- Copyright (c) 2006-2007, Index Data.
-
-This file is part of Pazpar2.
+/* This file is part of Pazpar2.
+ Copyright (C) Index Data
Pazpar2 is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
for more details.
You should have received a copy of the GNU General Public License
-along with Pazpar2; see the file LICENSE. If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
- */
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
/** \file normalize7bit.c
\brief char and string normalization for 7bit ascii only
*/
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
-#if HAVE_CONFIG_H
-#include "cconfig.h"
-#endif
-
#include "normalize7bit.h"
-char * normalize7bit_generic(char* str, char* rm_chars)
+/** \brief removes leading whitespace.. Removes suffix cahrs in rm_chars */
+char * normalize7bit_generic(char * str, const char * rm_chars)
{
char *p, *pe;
- for (p = str; *p && isspace(*p); p++)
- ;
- for (pe = p + strlen(p) - 1;
- pe > p && strchr(rm_chars, *pe); pe--)
- *pe = '\0';
- return p;
+ for (p = str; *p && isspace(*(unsigned char *)p); p++)
+ ;
+ for (pe = p + strlen(p) - 1;
+ pe > p && strchr(rm_chars, *pe); pe--)
+ *pe = '\0';
+ return p;
}
-
-
-char * normalize7bit_mergekey(char *buf, int skiparticle)
+char *normalize7bit_mergekey(char *buf)
{
char *p = buf, *pout = buf;
-
- if (skiparticle)
- {
- char firstword[64];
- char articles[] = "the den der die des an a "; // must end in space
-
- while (*p && !isalnum(*p))
- p++;
- pout = firstword;
- while (*p && *p != ' ' && pout - firstword < 62)
- *(pout++) = tolower(*(p++));
- *(pout++) = ' ';
- *(pout++) = '\0';
- if (!strstr(articles, firstword))
- p = buf;
- pout = buf;
- }
-
while (*p)
{
- while (*p && !isalnum(*p))
+ while (*p && !isalnum(*(unsigned char *)p))
p++;
- while (isalnum(*p))
- *(pout++) = tolower(*(p++));
+ while (isalnum(*(unsigned char *)p))
+ *(pout++) = tolower(*(unsigned char *)(p++));
if (*p)
*(pout++) = ' ';
- while (*p && !isalnum(*p))
+ while (*p && !isalnum(*(unsigned char *)p))
p++;
}
if (buf != pout)
// Extract what appears to be years from buf, storing highest and
// lowest values.
-int extract_years(const char *buf, int *first, int *last)
+// longdate==1, look for YYYYMMDD, longdate=0 look only for YYYY
+int extract7bit_dates(const char *buf, int *first, int *last, int longdate)
{
*first = -1;
*last = -1;
const char *e;
int len;
- while (*buf && !isdigit(*buf))
+ while (*buf && !isdigit(*(unsigned char *)buf))
buf++;
len = 0;
- for (e = buf; *e && isdigit(*e); e++)
+ for (e = buf; *e && isdigit(*(unsigned char *)e); e++)
len++;
- if (len == 4)
+ if ((len == 4 && !longdate) || (longdate && len >= 4 && len <= 8))
{
int value = atoi(buf);
+ if (longdate && len == 4)
+ value *= 10000; // should really suffix 0101?
if (*first < 0 || value < *first)
*first = value;
if (*last < 0 || value > *last)
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab
*/
+