From 404739981f08f588c670e3fc272c547bde5ac5ae Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 31 Jul 2009 12:06:52 +0200 Subject: [PATCH 1/1] Mysql indexing pluging Patch from Daine Mamacos to use Mysql storage for indexing. --- configure.ac | 4 + include/idzebra/api.h | 2 +- index/Makefile.am | 16 +++- index/index.h | 2 + index/indexplugin.h | 36 +++++++++ index/mod_indexplugin_mysql.c | 106 ++++++++++++++++++++++++++ index/update_driver.c | 166 +++++++++++++++++++++++++++++++++++++++++ index/zebraapi.c | 24 ++++-- index/zebraidx.c | 13 +++- 9 files changed, 354 insertions(+), 15 deletions(-) create mode 100644 index/indexplugin.h create mode 100644 index/mod_indexplugin_mysql.c create mode 100644 index/update_driver.c diff --git a/configure.ac b/configure.ac index e5171a9..9d706fa 100644 --- a/configure.ac +++ b/configure.ac @@ -290,6 +290,10 @@ ZEBRA_MODULE(dom,[$def], [ --enable-mod-dom XML/XSLT filter (Requires ZEBRA_MODULE(alvis,[$def], [ --enable-mod-alvis ALVIS filter (Requires libxslt)]) ZEBRA_MODULE(safari,shared,[ --enable-mod-safari Safari filter (DBC)]) + +AC_CHECK_HEADERS([mysql/mysql.h], [def="shared"], [def="disabled"], []) +ZEBRA_MODULE(indexplugin_mysql,[$def],[ --enable-mod-indexplugin-mysql indexing plugin]) + dnl ------ ANSI C Header files AC_STDC_HEADERS if test "$ac_cv_header_stdc" = "no"; then diff --git a/include/idzebra/api.h b/include/idzebra/api.h index 81a1ef4..a42c677 100644 --- a/include/idzebra/api.h +++ b/include/idzebra/api.h @@ -396,7 +396,7 @@ ZEBRA_RES zebra_compact(ZebraHandle zh); YAZ_EXPORT ZEBRA_RES zebra_repository_index(ZebraHandle zh, const char *path, - enum zebra_recctrl_action_t action); + enum zebra_recctrl_action_t action, char *useIndexDriver); YAZ_EXPORT ZEBRA_RES zebra_repository_update(ZebraHandle zh, const char *path); diff --git a/index/Makefile.am b/index/Makefile.am index 688e112..c8ab19f 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -55,6 +55,11 @@ mod_text_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version mod_text_la_LADD = mod_text_la_LIBADD = $(zebralib) $(mod_text_la_LADD) +mod_indexplugin_mysql_la_SOURCES = mod_indexplugin_mysql.c +mod_indexplugin_mysql_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version +mod_indexplugin_mysql_la_LADD = +mod_indexplugin_mysql_la_LIBADD = $(zebralib) $(mod_indexplugin_mysql_la_LADD) -lmysqlclient + modlib_LTLIBRARIES = $(SHARED_MODULE_LA) EXTRA_LTLIBRARIES = \ mod-grs-regx.la \ @@ -63,7 +68,8 @@ EXTRA_LTLIBRARIES = \ mod-safari.la \ mod-alvis.la \ mod-dom.la \ - mod-text.la + mod-text.la \ + mod-indexplugin_mysql.la EXTRA_libidzebra_2_0_la_SOURCES = \ $(mod_grs_regx_la_SOURCES) \ @@ -72,7 +78,8 @@ EXTRA_libidzebra_2_0_la_SOURCES = \ $(mod_safari_la_SOURCES) \ $(mod_alvis_la_SOURCES) \ $(mod_dom_la_SOURCES) \ - $(mod_text_la_SOURCES) + $(mod_text_la_SOURCES) \ + $(mod_indexplugin_mysql_la_SOURCES) lib_LTLIBRARIES = $(zebralib) @@ -97,7 +104,8 @@ libidzebra_2_0_la_SOURCES = \ rpnscan.c rpnsearch.c sortidx.c stream.c \ update_path.c update_file.c trunc.c untrans.c isam_methods.c \ zaptterm.c zebraapi.c zinfo.c zinfo.h zsets.c key_block.c key_block.h \ - check_res.c rset_isam.c + check_res.c rset_isam.c \ + update_driver.c bin_PROGRAMS = zebraidx zebrasrv @@ -111,7 +119,7 @@ AM_CPPFLAGS = -I$(srcdir)/../include $(YAZINC) \ -DDEFAULT_MODULE_PATH=\"$(modlibdir)\" \ $(TCL_INCLUDE) -LDADD = $(zebralib) $(YAZLALIB) +LDADD = $(zebralib) $(YAZLALIB) -ldl zebrash_LDADD= $(LDADD) $(READLINE_LIBS) diff --git a/index/index.h b/index/index.h index 606a7d8..c4a98aa 100644 --- a/index/index.h +++ b/index/index.h @@ -393,6 +393,8 @@ ZEBRA_RES zebra_sort_get_ord(ZebraHandle zh, ZEBRA_RES zebra_update_file_match(ZebraHandle zh, const char *path); ZEBRA_RES zebra_update_from_path(ZebraHandle zh, const char *path, enum zebra_recctrl_action_t action); +ZEBRA_RES zebra_update_from_driver(ZebraHandle zh, const char *path, + enum zebra_recctrl_action_t action, char *useIndexDriver); ZEBRA_RES zebra_remove_file_match(ZebraHandle zh); struct rpn_char_map_info diff --git a/index/indexplugin.h b/index/indexplugin.h new file mode 100644 index 0000000..f3de2cb --- /dev/null +++ b/index/indexplugin.h @@ -0,0 +1,36 @@ +#ifndef INDEXPLUGINH +#define INDEXPLUGINH + +#include "../index/index.h" +#include + + +typedef int (*indexList)(ZebraHandle zh, const char *driverArg, enum zebra_recctrl_action_t action); + +typedef struct +{ + indexList idxList; +} zebra_index_plugin_object; + +void addDriverFunction(indexList); +void zebraIndexBuffer(ZebraHandle zh, char *data, int dataLength, enum zebra_recctrl_action_t action, char *name); + +#endif +#ifndef INDEXPLUGINH +#define INDEXPLUGINH + +#include "../index/index.h" +#include + + +typedef int (*indexList)(ZebraHandle zh, const char *driverArg, enum zebra_recctrl_action_t action); + +typedef struct +{ + indexList idxList; +} zebra_index_plugin_object; + +void addDriverFunction(indexList); +void zebraIndexBuffer(ZebraHandle zh, char *data, int dataLength, enum zebra_recctrl_action_t action, char *name); + +#endif diff --git a/index/mod_indexplugin_mysql.c b/index/mod_indexplugin_mysql.c new file mode 100644 index 0000000..734fac0 --- /dev/null +++ b/index/mod_indexplugin_mysql.c @@ -0,0 +1,106 @@ +/* This file is part of the Zebra server. + Copyright (C) 1994-2009 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#include "indexplugin.h" +#include + +MYSQL mCon; + +static int mysqlConnect(void) +{ + mysql_init(&mCon); + + /* Set the default encoding to utf-8 so that zebra + doesn't gribe that the XML conflicts with it's encoding */ + mysql_options(&mCon, MYSQL_SET_CHARSET_NAME, "utf8"); + + mysql_options(&mCon, MYSQL_READ_DEFAULT_GROUP, "indexplugin_mysql"); + if (!mysql_real_connect(&mCon, "127.0.0.1", "test", "test", "newDatabase", 0, NULL, 0)) + { + yaz_log(YLOG_FATAL, "Failed to connect to database: %s\n", mysql_error(&mCon)); + return ZEBRA_FAIL; + } + else + { + yaz_log(YLOG_LOG, "Connected to Mysql Database"); + } + + return ZEBRA_OK; +} + + +static int repositoryExtract(ZebraHandle zh, const char *driverCommand, enum zebra_recctrl_action_t action) +{ + /* this doesn't really need to be initialised */ + int ret = ZEBRA_FAIL; + + assert(driverCommand); + + yaz_log(YLOG_LOG, "Driver command: %s", driverCommand); + + if ((ret = mysqlConnect()) == ZEBRA_OK) + { + const char *mQuery = driverCommand; + if (mysql_real_query(&mCon, mQuery, strlen(mQuery)) == 0) + { + MYSQL_RES *result; + if ((result = mysql_store_result(&mCon))) + { + MYSQL_ROW row; + unsigned int num_fields; + + num_fields = mysql_num_fields(result); + while ((row = mysql_fetch_row(result))) + { + unsigned long *lengths; + lengths = mysql_fetch_lengths(result); + + zebraIndexBuffer(zh, row[1], lengths[1], action, row[0]); + } + mysql_free_result(result); + } + } + else + { + yaz_log(YLOG_FATAL, "Failed to run query: %s\n", mysql_error(&mCon)); + ret = ZEBRA_FAIL; + } + } + + /* Drop our MYSQL connection as we don't need it anymore + and deallocate anything allocated */ + mysql_close(&mCon); + + return ret; +} + +void indexPluginRegister(void) +{ + /* register our function that gets called while indexing a document */ + addDriverFunction(repositoryExtract); +} +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/update_driver.c b/index/update_driver.c new file mode 100644 index 0000000..072112f --- /dev/null +++ b/index/update_driver.c @@ -0,0 +1,166 @@ +/* This file is part of the Zebra server. + Copyright (C) 1994-2009 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ + +#include +#include +#include +#ifdef WIN32 +#include +#define S_ISREG(x) (x & _S_IFREG) +#define S_ISDIR(x) (x & _S_IFDIR) +#endif +#if HAVE_UNISTD_H +#include +#endif +#include +#include +#include + +#include "index.h" + +/* plugin includes */ +#include +#include "indexplugin.h" +#include +#include + + + + + +zebra_index_plugin_object *pluginObj = NULL; + +static zebra_index_plugin_object *newZebraPlugin(void) +{ + zebra_index_plugin_object *newPlugin = malloc(sizeof(zebra_index_plugin_object)); + return newPlugin; +} + +static void destroyZebraPlugin(zebra_index_plugin_object *zebraIdxPlugin) +{ + free(zebraIdxPlugin); +} + +void addDriverFunction(indexList function) +{ + /* Assign the function to the object */ + pluginObj->idxList = function; +} + + +void zebraIndexBuffer(ZebraHandle zh, char *data, int dataLength, enum zebra_recctrl_action_t action, char *name) +{ + zebra_buffer_extract_record(zh, data, dataLength, action, zh->m_record_type, NULL, NULL, name); +} + + +/* I'm not even sure what this is for */ +void repositoryShowDriver(ZebraHandle zh, const char *path) +{ + char src[1024]; + int src_len; + struct dirs_entry *dst; + Dict dict; + struct dirs_info *di; + + if (!(dict = dict_open_res(zh->reg->bfs, FMATCH_DICT, 50, 0, 0, zh->res))) + { + yaz_log(YLOG_FATAL, "dict_open fail of %s", FMATCH_DICT); + return; + } + + strncpy(src, path, sizeof(src)-1); + src[sizeof(src)-1]='\0'; + src_len = strlen(src); + + if (src_len && src[src_len-1] != '/') + { + src[src_len] = '/'; + src[++src_len] = '\0'; + } + + di = dirs_open(dict, src, zh->m_flag_rw); + + while ((dst = dirs_read(di))) + yaz_log(YLOG_LOG, "%s", dst->path); + dirs_free(&di); + dict_close(dict); +} + + +ZEBRA_RES zebra_update_from_driver(ZebraHandle zh, const char *path, + enum zebra_recctrl_action_t action, char *useIndexDriver) +{ + /* delcair something to hold out remote call */ + void (*idxPluginRegister)(void); + char *dlError; + void *libHandle; + int pluginReturn; + + char driverName[100]; + sprintf(driverName, "mod-%s.so", useIndexDriver); + + yaz_log(YLOG_LOG, "Loading driver %s", useIndexDriver); + + libHandle = dlopen(driverName, RTLD_LAZY); + if (!libHandle) + { + yaz_log(YLOG_FATAL, "Unable to load index plugin %s", dlerror()); + return ZEBRA_FAIL; + } + /* clear the error buffer */ + dlerror(); + + idxPluginRegister = dlsym(libHandle, "indexPluginRegister"); + + if ((dlError = dlerror()) != NULL) + { + yaz_log(YLOG_FATAL, "Index plugin error: %s", dlError); + + /* Although the documentation says this dlclose isn't needed + it seems better to put it in, incase there were memory + allocations */ + dlclose(libHandle); + return ZEBRA_FAIL; + } + + pluginObj = newZebraPlugin(); + + /* invoke the plugin starter */ + idxPluginRegister(); + + pluginReturn = pluginObj->idxList(zh, path, action); + destroyZebraPlugin(pluginObj); + + /* close the plugin handle */ + dlclose(libHandle); + + /* repositoryExtract(zh, path, action);*/ + return pluginReturn; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/index/zebraapi.c b/index/zebraapi.c index d23f8ca..ec6a660 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1999,16 +1999,18 @@ ZEBRA_RES zebra_end_transaction(ZebraHandle zh, ZebraTransactionStatus *status) ZEBRA_RES zebra_repository_update(ZebraHandle zh, const char *path) { - return zebra_repository_index(zh, path, action_update); + /* Both of these probably need to be thought out better */ + return zebra_repository_index(zh, path, action_update, NULL); } ZEBRA_RES zebra_repository_delete(ZebraHandle zh, const char *path) { - return zebra_repository_index(zh, path, action_delete); + /* Both of these probably need to be thought out better */ + return zebra_repository_index(zh, path, action_delete, NULL); } ZEBRA_RES zebra_repository_index(ZebraHandle zh, const char *path, - enum zebra_recctrl_action_t action) + enum zebra_recctrl_action_t action, char *useIndexDriver) { ASSERTZH; assert(path); @@ -2022,10 +2024,20 @@ ZEBRA_RES zebra_repository_index(ZebraHandle zh, const char *path, else yaz_log(log_level, "update action=%d", (int) action); - if (zh->m_record_id && !strcmp(zh->m_record_id, "file")) - return zebra_update_file_match(zh, path); + if(!useIndexDriver) + { + if (zh->m_record_id && !strcmp(zh->m_record_id, "file")) + return zebra_update_file_match(zh, path); + else + return zebra_update_from_path(zh, path, action); + } else - return zebra_update_from_path(zh, path, action); + { + /* This is used if we indicate we'll be indexing from the plugin + rather than any of the file input systems */ + zebra_update_from_driver(zh, path, action, useIndexDriver); + } + } ZEBRA_RES zebra_repository_show(ZebraHandle zh, const char *path) diff --git a/index/zebraidx.c b/index/zebraidx.c index 4ca8b5f..6048aea 100644 --- a/index/zebraidx.c +++ b/index/zebraidx.c @@ -72,6 +72,8 @@ int main(int argc, char **argv) sprintf(nbuf, "%.40s(%ld)", *argv, (long) getpid()); yaz_log_init_prefix(nbuf); #endif + /* For indexing driver support */ + char *useIndexDriver = NULL; prog = *argv; if (argc < 2) { @@ -96,13 +98,14 @@ int main(int argc, char **argv) " -l Write log to .\n" " -L Don't follow symbolic links.\n" " -f Display information for the first records.\n" + " -i Select which index driver to use.\n" " -V Show version.\n", *argv ); exit(1); } res_set(default_res, "profilePath", DEFAULT_PROFILE_PATH); res_set(default_res, "modulePath", DEFAULT_MODULE_PATH); - while ((ret = options("sVt:c:g:d:m:v:nf:l:L", argv, argc, &arg)) != -2) + while ((ret = options("sVt:c:g:d:m:v:nf:l:L:i:", argv, argc, &arg)) != -2) { if (ret == 0) { @@ -195,13 +198,13 @@ int main(int argc, char **argv) switch (cmd) { case 'u': - res = zebra_repository_index(zh, arg, action_update); + res = zebra_repository_index(zh, arg, action_update, useIndexDriver); break; case 'd': - res = zebra_repository_index(zh, arg, action_delete); + res = zebra_repository_index(zh, arg, action_delete, useIndexDriver); break; case 'a': - res = zebra_repository_index(zh, arg, action_a_delete); + res = zebra_repository_index(zh, arg, action_a_delete, useIndexDriver); break; case 's': res = zebra_repository_show(zh, arg); @@ -253,6 +256,8 @@ int main(int argc, char **argv) } else if (ret == 'v') yaz_log_init_level(yaz_log_mask_str(arg)); + else if (ret == 'i') + useIndexDriver = arg; else if (ret == 'l') yaz_log_init_file(arg); else if (ret == 'm') -- 1.7.10.4