More examples and explanations on the man page

[mp-sparql-moved-to-github.git] / doc / sparql.xml
diff --git a/doc/sparql.xml b/doc/sparql.xml

index e23b149..2859458 100644 (file)
--- a/doc/sparql.xml
+++ b/doc/sparql.xml
@@ -98,19 +98,18 @@
     </literallayout>
   </refsect1>
  
- <refsect1><title>EXAMPLES</title>
+ <refsect1><title>EXAMPLE</title>
    <para>
     Configuration for database "Default" that allows searching works. Only
-   the field (use attribute) "bf.wtitle" is supported.
+   the field (use attribute) "bf.wtitle" is supported. 
     <screen><![CDATA[
    <filter type="sparql">
      <db path="Default"
          uri="http://bibframe.indexdata.com/sparql/"
-        schema="sparql-results"
-    >
+        schema="sparql-results">
        <prefix>rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns</prefix>
        <prefix>bf: http://bibframe.org/vocab/</prefix>
-      <field>SELECT ?work ?wtitle</field>
+      <form>SELECT ?work ?wtitle</form>
        <criteria>?work a bf:Work</criteria>
        <criteria>?work bf:workTitle ?wt</criteria>
        <criteria>?wt bf:titleValue ?wtitle</criteria>
@@ -119,9 +118,123 @@
    </filter>
  ]]>
     </screen>
+   The matching is done by a simple case-sensitive substring match. There is
+   no deduplication, so if a work has two titles, we get two rows.
    </para>
   </refsect1>
  
+ <refsect1><title>EXAMPLE</title>
+  <para>
+   A more complex configuration for database "work". This could be included in
+   the same filter section as the "Default" db above.
+   <screen><![CDATA[
+    <db path="work" schema="sparql-results">
+      <prefix>rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns</prefix>
+      <prefix>bf: http://bibframe.org/vocab/</prefix>
+      <form>SELECT
+              ?work
+              (sql:GROUP_DIGEST (?wtitle, ' ; ', 1000, 1)) AS ?title
+              (sql:GROUP_DIGEST (?creatorlabel, ' ; ', 1000, 1))AS ?creator
+              (sql:GROUP_DIGEST (?subjectlabel, ' ; ', 1000, 1))AS ?subject
+      </form>
+      <criteria>?work a bf:Work</criteria>
+
+      <criteria> OPTIONAL {
+          ?work bf:workTitle ?wt .
+          ?wt bf:titleValue ?wtitle }
+      </criteria>
+      <criteria> OPTIONAL {
+          ?work bf:creator ?creator .
+          ?creator bf:label ?creatorlabel }
+      </criteria>
+      <criteria>OPTIONAL {
+          ?work bf:subject ?subject .
+          ?subject bf:label ?subjectlabel }
+      </criteria>
+      <index type="4">?wt bf:titleValue %v FILTER(contains(%v, %s))</index>
+      <index type="1003">?creator bf:label %v FILTER(contains(%v, %s))</index>
+      <index type="21">?subject bf:label %v FILTER(contains(%v, %s))</index>
+      <index type="1016"> {
+            ?work ?op1 ?child .
+            ?child ?op2 %v FILTER(contains(STR(%v), %s))
+          }
+      </index>
+      <modifier>GROUP BY $work</modifier>
+    </db>
+]]>
+   </screen>
+   </para>
+   <para>
+    This returns one row for each work. Titles, authors, and subjects
+    are all optional. If they repeat, the repeated values are concatenated into
+    a single field, separated by semicolons. This is done by the GROUP_DIGEST 
+    function that is specific to the Virtuoso back end. 
+   </para>
+   <para>
+    This example supports use attributes 4 (title), 1003 (author), 21 (subject), 
+    and 1016 (keyword) which matches any literal in a triplet that refers to the 
+    work, so it works for the titleValue in the workTitle, as well as the label 
+    in the subject, and what ever else there may be. Like the preceding example, 
+    the matching is by a simple substring, case sensitive. A more realistic term 
+    matching could be done with regular expressions, at the cost of some readability 
+    portability, and performance.
+   </para>
+ </refsect1>
+
+ <refsect1><title>EXAMPLE</title>
+   <para>
+    Configuration for database "works". This uses CONSTRUCT to produce rdf.
+   <screen><![CDATA[
+    <db path="works" schema="rdf">
+      <prefix>rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns</prefix>
+      <prefix>bf: http://bibframe.org/vocab/</prefix>
+      <form>CONSTRUCT { 
+          ?work bf:title ?wtitle . 
+          ?work bf:instanceTitle ?title .
+          ?work bf:author ?creator . 
+          ?work bf:subject ?subjectlabel }
+      </form>
+      <criteria>?work a bf:Work</criteria>
+
+      <criteria>?work bf:workTitle ?wt</criteria>
+      <criteria>?wt bf:titleValue ?wtitle</criteria>
+      <index type="4">?wt bf:titleValue %v FILTER(contains(%v, %s))</index>
+      <criteria>?work bf:creator ?creator</criteria>
+      <criteria>?creator bf:label ?creatorlabel</criteria>
+      <index type="1003">?creator bf:label %v FILTER(contains(%v, %s))</index>
+      <criteria>?work bf:subject ?subject</criteria>
+      <criteria>?subject bf:label ?subjectlabel</criteria>
+      <index type="21">?subject bf:label %v FILTER(contains(%v, %s))</index>
+    </db>
+ ]]>
+   </screen>
+  </para>
+ </refsect1>
+
+ <refsect1><title>EXAMPLE</title>
+   <para>
+    Configuration for database "instance". Like "work" above this uses SELECT
+    to return row-based data, this time from the instances. This is not deduplicated,
+    so if an instance has two titles, we get two rows, and if it also has 
+    two formats, we get four rows. The DISTINCT in the SELECT 
+   <screen><![CDATA[
+    <db path="instance" schema="sparql-results">
+      <prefix>rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns</prefix>
+      <prefix>bf: http://bibframe.org/vocab/</prefix>
+      <form>SELECT DISTINCT ?instance ?title ?format</form>
+      <criteria>?instance a bf:Instance</criteria>
+      <criteria>?instance bf:title ?title</criteria>
+      <index type="4">?instance bf:title %v FILTER(contains(%v, %s))</index>
+      <criteria>?instance bf:format ?format</criteria>
+      <index type="1013">?instance bf:format %s</index>
+    </db>
+ ]]>
+   </screen>
+  </para>
+
+  
+ </refsect1>
+ 
   <refsect1><title>SEE ALSO</title>
    <para>
     <citerefentry>