1   package eu.fbk.knowledgestore.data;
2   
3   import java.io.Serializable;
4   import java.util.Arrays;
5   import java.util.Collection;
6   import java.util.Collections;
7   import java.util.Iterator;
8   import java.util.List;
9   import java.util.Map;
10  import java.util.Set;
11  import java.util.UUID;
12  
13  import javax.annotation.Nullable;
14  
15  import com.google.common.base.Charsets;
16  import com.google.common.base.Function;
17  import com.google.common.base.Objects;
18  import com.google.common.base.Preconditions;
19  import com.google.common.base.Strings;
20  import com.google.common.base.Throwables;
21  import com.google.common.collect.ImmutableList;
22  import com.google.common.collect.ImmutableSet;
23  import com.google.common.collect.Iterables;
24  import com.google.common.collect.Lists;
25  import com.google.common.collect.Maps;
26  import com.google.common.collect.Ordering;
27  import com.google.common.collect.Sets;
28  import com.google.common.hash.Hasher;
29  import com.google.common.hash.Hashing;
30  
31  import org.openrdf.model.BNode;
32  import org.openrdf.model.Literal;
33  import org.openrdf.model.Resource;
34  import org.openrdf.model.Statement;
35  import org.openrdf.model.URI;
36  import org.openrdf.model.Value;
37  import org.openrdf.model.vocabulary.RDF;
38  import org.openrdf.rio.RDFHandlerException;
39  
40  import eu.fbk.knowledgestore.vocabulary.KS;
41  
42  /**
43   * A record structure characterized by an ID and a generic set of properties.
44   * <p>
45   * A record is a structured identified by an {@link URI} ID and having a number of key-value
46   * properties, where the key is a {@code URI} and the value is a non-empty list of objects, which
47   * can be other {@code Record}s, {@link URI}s, {@link BNode}s, {@link Literal}s or
48   * {@link Statement}s. Records are used to carry the data of resources, representations, mentions,
49   * entities, axioms, contexts and of any structured property value.
50   * </p>
51   * <p>
52   * Records are created via factory methods {@code create()}:
53   * </p>
54   * <ul>
55   * <li>method {@link #create()} creates an empty record, without ID and properties;</li>
56   * <li>method {@link #create(URI, URI...)} creates a new record with the ID and the types (values
57   * of property {@code rdf:type} supplied;</li>
58   * <li>method {@link #create(Record, boolean)} creates a copy of a supplied method, possibly
59   * performing deep-cloning of its properties.</li>
60   * </ul>
61   * <p>
62   * Record equality ({@link #equals(Object)}, {@link #hashCode()}) is defined in terms of the
63   * record ID only, while {@link #toString()} emits only the record type and ID. Beware that the ID
64   * can change during a {@code Record} lifetime (via method {@link #setID(URI)}): this provides for
65   * increased flexibility, but pay attention not to change the ID when storing records in indexed
66   * data structure such as {@code Set}s and {@code Map}s, which rely on {@code hashCode()} and
67   * {@code equals()} to produce constant outcomes. Additional method {@link #toString(boolean)}
68   * allows for emitting a complete record representation including its properties, while equality
69   * of (selected) properties in different records can be checked by comparing the respective
70   * hashes, computed via {@link #hash(URI...)}; the same {@code hash()} method can help in creating
71   * syntetic IDs based on the values of some properties (e.g., following a pattern
72   * {@code PREFIX + record.hash(p1, p2, ...)}.
73   * </p>
74   * <p>
75   * Access to and manipulation of properties is performed as follows:
76   * <ul>
77   * <li><b>Listing available properties</b>. Method {@link #getProperties()} returns the list of
78   * properties having some value for a record instance.</li>
79   * <li><b>Reading properties</b>. The main method is {@link #get(URI)}, which is complemented by a
80   * number of auxiliary methods for ease of use. They are described below:
81   * <ul>
82   * <li>{@link #get(URI)}, {@link #get(URI, Class)} and {@link #get(URI, Class, List)} allow
83   * retrieving all the values of a specific property, either as a list of objects or converted to a
84   * specific target class; the last methed supports also the specification of a default value,
85   * which is returned if the property has no values or if conversion fails.</li>
86   * <li>{@link #getUnique(URI)}, {@link #getUnique(URI, Class)},
87   * {@link #getUnique(URI, Class, Object)} allow retrieving the unique value of a property, either
88   * as an object or converted to a specific class; unless a default value is specified, the methods
89   * fail in case multiple values are associated to the property, thus helping enforcing the
90   * uniqueness expectation.</li>
91   * <li>{@link #isTrue(URI)}, {@link #isFalse(URI)} are convenience methods that can be used for
92   * boolean properties; they fail if used on properties having multiple or non-boolean values.</li>
93   * <li>{@link #isNull(URI)}, {@link #isUnique(URI)} are convenience methods that can be used to
94   * test whether a property has at least or at most one value.</li>
95   * <li>{@link #count(URI)} is a convenience method for counting the values of a property; it may
96   * be faster than using {@code get()}.</li>
97   * </ul>
98   * </li>
99   * <li><b>Modifying properties</b>. Two types of methods are offered:
100  * <ul>
101  * <li>modification of individual properties is done via {@link #set(URI, Object, Object...)},
102  * {@link #add(URI, Object, Object...)} and {@link #remove(URI, Object, Object...)}, that allow,
103  * respectively, to set all the values of a property, to add some new values to a property or to
104  * remove existing values from the values of a property. For ease of use, these methods accept (at
105  * least) an argument object which can be a {@code Record}, {@code URI}, {@code BNode},
106  * {@code Statement}, {@code Literal}, an object convertible to {@code Literal} or any array or
107  * iterable of the former types. A list of values is extracted from the supplied objects, and used
108  * for modifying the values of the property.</li>
109  * <li>modification of multiple properties at once is done via {@link #clear(URI...)} and
110  * {@link #retain(URI...)}, which remove all the properties respectively matching or not matching
111  * a supplied list, allowing as a special case (no properties specified) to remove all the
112  * properties of a record instance.</li>
113  * </ul>
114  * </ul>
115  * </p>
116  * <p>
117  * Instances of this interface are thread safe. Cloning of record instances (via
118  * {@link #create(Record, boolean)}) is supported and is a relatively inexpensive operation; a
119  * copy-on-write approach is adopted to reduce the memory usage of cloned objects, which share
120  * their state with the source object as long as one of the two is changed.
121  * </p>
122  */
123 public final class Record implements Serializable, Comparable<Record> {
124 
125     private static final long serialVersionUID = 1L;
126 
127     private static final int LENGTH_INCREMENT = 8;
128 
129     private static final int OFFSET_OF_ID = 0;
130 
131     private static final int OFFSET_OF_SHARED = 1;
132 
133     private static final int OFFSET_OF_PROPERTIES = 2;
134 
135     private static final ThreadLocal<Integer> INDENT_LEVEL = new ThreadLocal<Integer>();
136 
137     private static final String INDENT_STRING = "  ";
138 
139     private Object[] state;
140 
141     private Record(final URI id) {
142         this.state = new Object[OFFSET_OF_PROPERTIES + LENGTH_INCREMENT];
143         this.state[OFFSET_OF_ID] = id;
144         this.state[OFFSET_OF_SHARED] = Boolean.FALSE;
145     }
146 
147     private Record(final Record record, final boolean deepClone) {
148         synchronized (record) {
149             Object[] state = record.state;
150             if (deepClone) {
151                 state = cloneRecursively(state);
152             }
153             if (state != record.state) {
154                 state[OFFSET_OF_SHARED] = Boolean.FALSE;
155             } else if (state[OFFSET_OF_SHARED] == Boolean.FALSE) {
156                 state[OFFSET_OF_SHARED] = Boolean.TRUE;
157             }
158             this.state = state;
159         }
160     }
161 
162     private static Object[] cloneRecursively(final Object[] array) {
163         Object[] result = array;
164         for (int i = 0; i < array.length; ++i) {
165             final Object element = array[i];
166             Object newElement = element;
167             if (element instanceof Record) {
168                 newElement = new Record((Record) element, true);
169             } else if (element instanceof Object[]) {
170                 newElement = cloneRecursively((Object[]) element);
171             }
172             if (newElement != element) {
173                 if (result == array) {
174                     result = array.clone();
175                 }
176                 result[i] = newElement;
177             }
178         }
179         return result;
180     }
181 
182     private static Object encode(final Object object) {
183         // the node unchanged is stored; this may change in order to save some memory
184         return object;
185     }
186 
187     private static <T> T decode(final Object object, final Class<T> clazz) {
188         return Data.convert(object, clazz);
189     }
190 
191     @Nullable
192     private URI doGetID() {
193         return (URI) this.state[OFFSET_OF_ID];
194     }
195 
196     private void doSetID(@Nullable final URI id) {
197         if (!Objects.equal(id, this.state[OFFSET_OF_ID])) {
198             if ((Boolean) this.state[OFFSET_OF_SHARED]) {
199                 this.state = this.state.clone();
200             }
201             this.state[OFFSET_OF_ID] = id;
202         }
203     }
204 
205     private List<URI> doGetProperties() {
206         final int capacity = this.state.length / 2;
207         final List<URI> properties = Lists.newArrayListWithCapacity(capacity);
208         for (int i = OFFSET_OF_PROPERTIES; i < this.state.length; i += 2) {
209             final URI property = (URI) this.state[i];
210             if (property != null) {
211                 properties.add(property);
212             }
213         }
214         return properties;
215     }
216 
217     private int doCount(final URI property) {
218         final int length = this.state.length;
219         for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) {
220             if (property.equals(this.state[i])) {
221                 final Object object = this.state[i + 1];
222                 if (object instanceof Object[]) {
223                     return ((Object[]) object).length;
224                 } else {
225                     return 1;
226                 }
227             }
228         }
229         return 0;
230     }
231 
232     @Nullable
233     private <T> Object doGet(final URI property, final Class<T> clazz) {
234         final int length = this.state.length;
235         for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) {
236             if (property.equals(this.state[i])) {
237                 final Object object = this.state[i + 1];
238                 if (object instanceof Object[]) {
239                     final Object[] array = (Object[]) object;
240                     final List<T> list = Lists.newArrayListWithCapacity(array.length);
241                     for (final Object element : array) {
242                         list.add(decode(element, clazz));
243                     }
244                     return list;
245                 } else {
246                     return decode(object, clazz);
247                 }
248             }
249         }
250         return null;
251     }
252 
253     private void doSet(final URI property, final Collection<Object> nodes) {
254         if ((Boolean) this.state[OFFSET_OF_SHARED]) {
255             this.state = this.state.clone();
256             this.state[OFFSET_OF_SHARED] = Boolean.FALSE;
257         }
258         final int length = this.state.length;
259         if (nodes.isEmpty()) {
260             for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) {
261                 if (property.equals(this.state[i])) {
262                     this.state[i] = null;
263                     this.state[i + 1] = null;
264                     return;
265                 }
266             }
267             return;
268         }
269         final Object value;
270         final int size = nodes.size();
271         if (size == 1) {
272             value = encode(Iterables.get(nodes, 0));
273         } else {
274             final Object[] array = new Object[size];
275             int index = 0;
276             for (final Object node : nodes) {
277                 array[index++] = encode(node);
278             }
279             value = array;
280         }
281         int nullIndex = -1;
282         for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) {
283             if (this.state[i] == null) {
284                 if (nullIndex < 0) {
285                     nullIndex = i;
286                 }
287             } else if (property.equals(this.state[i])) {
288                 this.state[i + 1] = value;
289                 return;
290             }
291         }
292         if (nullIndex >= 0) {
293             this.state[nullIndex] = property;
294             this.state[nullIndex + 1] = value;
295         } else {
296             final Object[] oldState = this.state;
297             this.state = new Object[length + LENGTH_INCREMENT];
298             System.arraycopy(oldState, 0, this.state, 0, length);
299             this.state[length] = property;
300             this.state[length + 1] = value;
301         }
302     }
303 
304     /**
305      * Creates a new record with no properties and ID assigned.
306      * 
307      * @return the created record
308      */
309     public static Record create() {
310         return new Record(null);
311     }
312 
313     /**
314      * Creates a new record with the ID and the types specified (property {@code rdf:type}), and
315      * no additional properties.
316      * 
317      * @param id
318      *            the ID of the new record, possibly null in order not to assign it
319      * @param types
320      *            the types of the record, assigned to property {@code rdf:type}
321      * @return the created record
322      */
323     public static Record create(final URI id, final URI... types) {
324         final Record record = new Record(id);
325         if (types.length > 0) {
326             record.set(RDF.TYPE, types);
327         }
328         return record;
329     }
330 
331     /**
332      * Creates a new record having the same ID and properties of the supplied record, possibly
333      * performing a deep-cloning (copy constructor). The difference between shallow- and
334      * deep-cloning lies in the handling of property values of {@code Record} type, which are
335      * shared by the source and cloned object in case of shallow-cloning, and cloned themselves in
336      * case of deep-cloning.
337      * 
338      * @param record
339      *            the reference record to clone
340      * @param deepClone
341      *            true to perform a deep-cloning, false to perform a shallow-cloning
342      * @return the created record clone
343      */
344     public static Record create(final Record record, final boolean deepClone) {
345         return new Record(record, deepClone);
346     }
347 
348     /**
349      * Returns the ID of this record.
350      * 
351      * @return the ID of this record, possibly null if not previously assigned
352      */
353     @Nullable
354     public synchronized URI getID() {
355         return doGetID();
356     }
357 
358     /**
359      * Sets the ID of this record.
360      * 
361      * @param id
362      *            the new ID of this record or null to clear it
363      * @return this record object, for call chaining
364      */
365     public synchronized Record setID(@Nullable final URI id) {
366         doSetID(id);
367         return this;
368     }
369 
370     /**
371      * Returns the system type for this record, i.e., the {@code rdf:type} URI under the
372      * {@code ks:} namespace, if any.
373      * 
374      * @return the system type or null if not set
375      * @throws IllegalArgumentException
376      *             in case multiple system types are bound to the record
377      */
378     @Nullable
379     public synchronized URI getSystemType() throws IllegalArgumentException {
380         URI result = null;
381         for (final URI type : get(RDF.TYPE, URI.class)) {
382             if (type.getNamespace().equals(KS.NAMESPACE)) {
383                 Preconditions.checkArgument(result == null, "Multiple system types: " + result
384                         + ", " + type);
385                 result = type;
386             }
387         }
388         return result;
389     }
390 
391     /**
392      * Returns all the properties currently defined for this record.
393      * 
394      * @return an immutable list with the properties currently defined for this record, without
395      *         repetitions and in no particular order
396      */
397     public synchronized List<URI> getProperties() {
398         return doGetProperties();
399     }
400 
401     /**
402      * Determines whether the property specified is null, i.e., it has no value.
403      * 
404      * @param property
405      *            the property to read
406      * @return true if the property has no value
407      */
408     public synchronized boolean isNull(final URI property) {
409         return doCount(property) == 0;
410     }
411 
412     /**
413      * Determines whether the property specified has at most one value.
414      * 
415      * @param property
416      *            the property to read
417      * @return true if the property has at most value; false if it has multiple values
418      */
419     public synchronized boolean isUnique(final URI property) {
420         return doCount(property) <= 1;
421     }
422 
423     /**
424      * Determines whether the property specified has been set to true. The method fails if the
425      * property has multiple values or has a non-boolean type; if this behaviour is not desired,
426      * use {@link #getUnique(URI, Class, Object)} specifying {@code Boolean.class} as the class
427      * and an appropriate default value to be returned in case of failure.
428      * 
429      * @param property
430      *            the property to read
431      * @return true if the property is set to true; false if the property has no value or has been
432      *         set to false
433      * @throws IllegalStateException
434      *             in case the property has multiple values
435      * @throws IllegalArgumentException
436      *             in case the property value is not of boolean type
437      */
438     public boolean isTrue(final URI property) throws IllegalStateException,
439             IllegalArgumentException {
440         final Boolean value = getUnique(property, Boolean.class);
441         return value != null && value.booleanValue();
442     }
443 
444     /**
445      * Determines whether the property specified has been set to false. The method fails if the
446      * property has multiple values or has a non-boolean type; if this behaviour is not desired,
447      * use {@link #getUnique(URI, Class, Object)} specifying {@code Boolean.class} as the class
448      * and an appropriate default value to be returned in case of failure.
449      * 
450      * @param property
451      *            the property to read
452      * @return true if the property is set to false; false if the property has no value or has
453      *         been set to true
454      * @throws IllegalStateException
455      *             in case the property has multiple values
456      * @throws IllegalArgumentException
457      *             in case the property value is not of boolean type
458      */
459     public boolean isFalse(final URI property) throws IllegalStateException,
460             IllegalArgumentException {
461         final Boolean value = getUnique(property, Boolean.class);
462         return value != null && !value.booleanValue();
463     }
464 
465     /**
466      * Returns the number of values assigned to the property specified. Calling this method may be
467      * faster that using {@link #get(URI)}.
468      * 
469      * @param property
470      *            the property
471      * @return the number of values
472      */
473     public synchronized int count(final URI property) {
474         return doCount(property);
475     }
476 
477     /**
478      * Returns the unique {@code Object} value of a property, or null if it has no value. Note
479      * that this method fails if the property has multiple values; if this is not the desired
480      * behaviour, use {@link #getUnique(URI, Class, Object)} supplying an appropriate type (could
481      * be {@code Object.class}) and default value to be returned in case of failure.
482      * 
483      * @param property
484      *            the property to read
485      * @return the unique {@code Object} value of the property; null if it has no value
486      * @throws IllegalStateException
487      *             in case the property has multiple values
488      */
489     @Nullable
490     public Object getUnique(final URI property) throws IllegalStateException {
491         return getUnique(property, Object.class);
492     }
493 
494     /**
495      * Returns the unique value of the property converted to an instance of a certain class, or
496      * null if the property has no value. Note that this method fails if the property has multiple
497      * values or its unique value cannot be converted to the requested class; if this is not the
498      * desired behavior, use {@link #getUnique(URI, Class, Object)} supplying an appropriate
499      * default value to be returned in case of failure.
500      * 
501      * @param property
502      *            the property to read
503      * @param valueClass
504      *            the class to convert the value to
505      * @param <T>
506      *            the type of result
507      * @return the unique value of the property, converted to the class specified; null if the
508      *         property has no value
509      * @throws IllegalStateException
510      *             in case the property has multiple values
511      * @throws IllegalArgumentException
512      *             in case the unique property value cannot be converted to the class specified
513      */
514     @SuppressWarnings("unchecked")
515     @Nullable
516     public <T> T getUnique(final URI property, final Class<T> valueClass)
517             throws IllegalStateException, IllegalArgumentException {
518         final Object result;
519         synchronized (this) {
520             result = doGet(property, valueClass);
521         }
522         if (result == null) {
523             return null;
524         } else if (result instanceof List<?>) {
525             final List<T> list = (List<T>) result;
526             final StringBuilder builder = new StringBuilder("Expected one value for property ")
527                     .append(property).append(", found ").append(list.size()).append(" values: ");
528             for (int i = 0; i < Math.min(3, list.size()); ++i) {
529                 builder.append(i > 0 ? ", " : "").append(list.get(i));
530             }
531             builder.append(list.size() > 3 ? ", ..." : "");
532             throw new IllegalStateException(builder.toString());
533         } else {
534             return (T) result;
535         }
536     }
537 
538     /**
539      * Returns the unique value of the property converted to an instance of a certain class, or
540      * the default value supplied in case of failure.
541      * 
542      * @param property
543      *            the property to read
544      * @param valueClass
545      *            the class to convert the value to
546      * @param defaultValue
547      *            the default value to return in case the property has no value
548      * @param <T>
549      *            the type of result
550      * @return the unique value of the property converted to the class specified, on success; the
551      *         default value supplied in case the property has no value, has multiple values or
552      *         its unique value cannot be converted to the class specified
553      */
554     @Nullable
555     public <T> T getUnique(final URI property, final Class<T> valueClass,
556             @Nullable final T defaultValue) {
557         try {
558             final T value = getUnique(property, valueClass);
559             return value == null ? defaultValue : value;
560         } catch (final IllegalStateException ex) {
561             return defaultValue;
562         } catch (final IllegalArgumentException ex) {
563             return defaultValue;
564         }
565     }
566 
567     /**
568      * Returns the {@code Object} values of the property specified.
569      * 
570      * @param property
571      *            the property to read
572      * @return an immutable list with the {@code Object} values of the property, without
573      *         repetitions, in no particular order and possibly empty
574      */
575     public List<Object> get(final URI property) {
576         return get(property, Object.class);
577     }
578 
579     /**
580      * Returns the values of the property converted to instances of a certain class. Note that
581      * this method fails if conversion is not possible for one or more of the property values; if
582      * this is not the desired behavior, use {@link #get(URI, Class, List)} specifying an
583      * appropriate default value to be returned in case of conversion failure.
584      * 
585      * @param property
586      *            the property to read
587      * @param valueClass
588      *            the class values have to be converted to
589      * @param <T>
590      *            the type of property values
591      * @return an immutable list with the values of the property, converted to the class
592      *         specified, possibly empty
593      * @throws IllegalArgumentException
594      *             in case one of the property values cannot be converted to the class specified
595      */
596     @SuppressWarnings("unchecked")
597     public <T> List<T> get(final URI property, final Class<T> valueClass)
598             throws IllegalArgumentException {
599         final Object result;
600         synchronized (this) {
601             result = doGet(property, valueClass);
602         }
603         if (result == null) {
604             return ImmutableList.of();
605         } else if (result instanceof List<?>) {
606             return (List<T>) result;
607         } else {
608             return ImmutableList.of((T) result);
609         }
610     }
611 
612     /**
613      * Returns the values of the property converted to instances of a certain class, or the
614      * default value supplied in case of failure or if the property has no values.
615      * 
616      * @param property
617      *            the property to read
618      * @param valueClass
619      *            the class values have to be converted to
620      * @param defaultValue
621      *            the default value to return in case conversion fails
622      * @param <T>
623      *            the type of property values
624      * @return an immutable list with the values of the property, converted to the class specified
625      *         and possibly empty, on success; the default value supplied in case the property has
626      *         no value or conversion fails for some value
627      */
628     public <T> List<T> get(final URI property, final Class<T> valueClass,
629             final List<T> defaultValue) {
630         try {
631             final List<T> values = get(property, valueClass);
632             return values.isEmpty() ? defaultValue : values;
633         } catch (final IllegalArgumentException ex) {
634             return defaultValue;
635         }
636     }
637 
638     /**
639      * Sets the values of the property specified. The method accepts one or more objects as the
640      * values; these objects can be {@code Record}s, {@code URI}s, {@code BNode}s,
641      * {@code Statement}s, {@code Literal}s, objects convertible to {@code Literal} or any array
642      * or iterable of the former types. Setting a property to null has the effect of clearing it.
643      * 
644      * @param property
645      *            the property to set
646      * @param first
647      *            the first value, array or iterable of values to set, possibly null
648      * @param other
649      *            additional values, arrays or iterables of values to set (if specified, will be
650      *            merged with {@code first}).
651      * @return this record object, for call chaining
652      * @throws IllegalArgumentException
653      *             if one of the supplied values has an unsupported type
654      */
655     public Record set(final URI property, @Nullable final Object first, final Object... other)
656             throws IllegalArgumentException {
657         Preconditions.checkNotNull(property);
658         final Set<Object> values = Sets.<Object>newHashSet();
659         Data.normalize(first, values);
660         Data.normalize(other, values);
661         synchronized (this) {
662             doSet(property, values);
663         }
664         return this;
665     }
666 
667     /**
668      * Adds one or more values to the property specified. The method accepts one or more objects
669      * as the values; these objects can be {@code Record}s, {@code URI}s, {@code BNode}s,
670      * {@code Statement}s, {@code Literal}s, objects convertible to {@code Literal} or any array
671      * or iterable of the former types.
672      * 
673      * @param property
674      *            the property to modify
675      * @param first
676      *            the first value, array or iterable of values to add, possibly null
677      * @param other
678      *            additional values, arrays or iterables of values to set (if specified, will be
679      *            merged with {@code first}).
680      * @return this record object, for call chaining
681      * @throws IllegalArgumentException
682      *             if one of the supplied values has an unsupported type
683      */
684     public Record add(final URI property, @Nullable final Object first, final Object... other)
685             throws IllegalArgumentException {
686         Preconditions.checkNotNull(property);
687         final List<Object> added = Lists.newArrayList();
688         Data.normalize(first, added);
689         Data.normalize(other, added);
690         if (!Iterables.isEmpty(added)) {
691             synchronized (this) {
692                 final Set<Object> values = Sets.newHashSet(get(property));
693                 final boolean changed = values.addAll(added);
694                 if (changed) {
695                     doSet(property, values);
696                 }
697             }
698         }
699         return this;
700     }
701 
702     /**
703      * Removes one or more values from the property specified. The method accepts one or more
704      * objects as the values; these objects can be {@code Record}s, {@code URI}s, {@code BNode}s,
705      * {@code Statement}s, {@code Literal}s, objects convertible to {@code Literal} or any array
706      * or iterable of the former types.
707      * 
708      * @param property
709      *            the property to modify
710      * @param first
711      *            the first value, array or iterable of values to remove, possibly null
712      * @param other
713      *            additional values, arrays or iterables of values to remove (if specified, will
714      *            be merged with {@code first}).
715      * @return this record object, for call chaining
716      * @throws IllegalArgumentException
717      *             if one of the supplied values has an unsupported type
718      */
719     public Record remove(final URI property, @Nullable final Object first, final Object... other)
720             throws IllegalArgumentException {
721         Preconditions.checkNotNull(property);
722         final List<Object> removed = Lists.newArrayList();
723         Data.normalize(first, removed);
724         Data.normalize(other, removed);
725         if (!removed.isEmpty()) {
726             synchronized (this) {
727                 final Set<Object> values = Sets.newHashSet(get(property));
728                 final boolean changed = values.removeAll(removed);
729                 if (changed) {
730                     doSet(property, values);
731                 }
732             }
733         }
734         return this;
735     }
736 
737     /**
738      * Retains only the properties specified, clearing the remaining ones. Note that the ID is not
739      * affected.
740      * 
741      * @param properties
742      *            an array with the properties to retain, possibly empty (in which case all the
743      *            stored properties will be cleared)
744      * @return this record object, for call chaining
745      */
746     public synchronized Record retain(final URI... properties) {
747         for (final URI property : doGetProperties()) {
748             boolean retain = false;
749             for (int i = 0; i < properties.length; ++i) {
750                 if (property.equals(properties[i])) {
751                     retain = true;
752                     break;
753                 }
754             }
755             if (!retain) {
756                 doSet(property, ImmutableSet.<Object>of());
757             }
758         }
759         return this;
760     }
761 
762     /**
763      * Clears the properties specified, or all the stored properties if no property is specified.
764      * Note that the ID is not affected.
765      * 
766      * @param properties
767      *            an array with the properties to retain, possibly empty (in which case all the
768      *            stored properties will be cleared)
769      * @return this record object, for call chaining
770      */
771     public synchronized Record clear(final URI... properties) {
772         final List<URI> propertiesToClear;
773         if (properties == null || properties.length == 0) {
774             propertiesToClear = doGetProperties();
775         } else {
776             propertiesToClear = Arrays.asList(properties);
777         }
778         for (final URI property : propertiesToClear) {
779             doSet(property, ImmutableSet.<Object>of());
780         }
781         return this;
782     }
783 
784     /**
785      * {@inheritDoc} Comparison is based on the record IDs only.
786      */
787     @Override
788     public int compareTo(final Record other) {
789         final URI thisID = getID();
790         final URI otherID = other.getID();
791         if (thisID == null) {
792             return otherID == null ? 0 : -1;
793         } else {
794             return otherID == null ? 1 : thisID.stringValue().compareTo(otherID.stringValue());
795         }
796     }
797 
798     /**
799      * {@inheritDoc} Two records are equal if they have the same IDs.
800      */
801     @Override
802     public boolean equals(final Object object) {
803         if (object == this) {
804             return true;
805         }
806         if (!(object instanceof Record)) {
807             return false;
808         }
809         final Record other = (Record) object;
810         return Objects.equal(getID(), other.getID());
811     }
812 
813     /**
814      * {@inheritDoc} The returned hash code depends only on the record ID.
815      */
816     @Override
817     public int hashCode() {
818         return Objects.hashCode(getID());
819     }
820 
821     /**
822      * Computes a string-valued hash code of the properties specified, or of all the available
823      * properties, if no URI is specified. Order of selected properties and order of values of
824      * each property do not matter. A cryptographic hash function is used. Collision probability
825      * is negligible. This method can be used to check whether two records have the same (subsets
826      * of) properties, by computing and comparing the respective hashes.
827      * 
828      * @param properties
829      *            the properties to hash.
830      * @return the computed hash code
831      */
832     public synchronized String hash(final URI... properties) {
833         final List<URI> propertiesToHash;
834         if (properties == null || properties.length == 0) {
835             propertiesToHash = doGetProperties();
836         } else {
837             propertiesToHash = Arrays.asList(properties);
838         }
839         final Hasher hasher = Hashing.md5().newHasher();
840         for (final URI property : propertiesToHash) {
841             final Object object = doGet(property, Object.class);
842             @SuppressWarnings("unchecked")
843             final Iterable<Object> nodes = object instanceof List<?> ? (List<Object>) object
844                     : ImmutableList.of(object);
845             for (final Object node : ((Ordering<Object>) Data.getTotalComparator())
846                     .sortedCopy(nodes)) {
847                 // TODO: this is not efficient! add Node.toBytes
848                 hasher.putString(Data.toString(node, null, true), Charsets.UTF_16LE);
849             }
850             hasher.putByte((byte) 0);
851         }
852         final StringBuilder builder = new StringBuilder(16);
853         final byte[] bytes = hasher.hash().asBytes();
854         int max = 52;
855         for (int i = 0; i < bytes.length; ++i) {
856             final int n = (bytes[i] & 0x7F) % max;
857             if (n < 26) {
858                 builder.append((char) (65 + n));
859             } else if (n < 52) {
860                 builder.append((char) (71 + n));
861             } else {
862                 builder.append((char) (n - 4));
863             }
864             max = 62;
865         }
866         return builder.toString();
867     }
868 
869     /**
870      * Returns a string representation of the record, optionally using the namespaces supplied and
871      * emitting record properties. This method extends {@code #toString()}, optionally allowing to
872      * emit also record properties and, recursively, properties of records nested in this record.
873      * 
874      * @param namespaces
875      *            the prefix-to-namespace mappings to be used when emitting property and value
876      *            URIs; if null, only non-abbreviated, full URIs will be emitted
877      * @param includeProperties
878      *            true if record properties should be emitted too
879      * @return a string representation of the record, computed based on the
880      *         {@code includeProperties} setting
881      */
882     public synchronized String toString(@Nullable final Map<String, String> namespaces,
883             final boolean includeProperties) {
884         final URI id = getID();
885         final String base = "Record " + (id == null ? "<no id>" : Data.toString(id, namespaces));
886         if (!includeProperties) {
887             return base;
888         }
889         final Integer oldIndent = INDENT_LEVEL.get();
890         try {
891             final int indent = oldIndent == null ? 1 : oldIndent + 1;
892             INDENT_LEVEL.set(indent + 1);
893             final StringBuilder builder = new StringBuilder(base).append(" {");
894             String propertySeparator = "\n";
895             final Ordering<Object> ordering = Ordering.from(Data.getTotalComparator());
896             for (final URI property : ordering.sortedCopy(doGetProperties())) {
897                 builder.append(propertySeparator).append(Strings.repeat(INDENT_STRING, indent));
898                 builder.append(Data.toString(property, namespaces));
899                 builder.append(" = ");
900                 final List<Object> values = ordering.sortedCopy(get(property));
901                 String valueSeparator = values.size() == 1 ? "" : "\n"
902                         + Strings.repeat(INDENT_STRING, indent + 1);
903                 for (final Object value : values) {
904                     builder.append(valueSeparator).append(Data.toString(value, namespaces, true));
905                     valueSeparator = ",\n" + Strings.repeat(INDENT_STRING, indent + 1);
906                 }
907                 propertySeparator = ";\n";
908             }
909             builder.append(" }");
910             return builder.toString();
911         } finally {
912             INDENT_LEVEL.set(oldIndent);
913         }
914     }
915 
916     /**
917      * {@inheritDoc} The returned string contains only the ID of the record.
918      */
919     @Override
920     public String toString() {
921         return toString(null, false);
922     }
923 
924     /**
925      * Performs record-to-RDF encoding by converting a stream of records in a stream of RDF
926      * statements. Parameter {@code types} specify additional types to be added to encoded
927      * records. Type information may be set to null (e.g., because unknown at the time the method
928      * is called): in this case, it will be read from metadata attribute {@code "types"} attached
929      * to the stream; reading will happen just before decoding will take place, i.e., when a
930      * terminal stream operation will be called.
931      * 
932      * @param stream
933      *            the stream of records to encode.
934      * @param types
935      *            the types to be added to each record of the stream, null if to be read from
936      *            stream metadata
937      * @return the resulting stream of statements
938      */
939     @SuppressWarnings("unchecked")
940     public static Stream<Statement> encode(final Stream<? extends Record> stream,
941             @Nullable final Iterable<? extends URI> types) {
942         Preconditions.checkNotNull(stream);
943         if (types != null) {
944             stream.setProperty("types", types);
945         }
946         final Stream<Record> records = (Stream<Record>) stream;
947         return records.transform(null, new Function<Handler<Statement>, Handler<Record>>() {
948 
949             @Override
950             public Handler<Record> apply(final Handler<Statement> handler) {
951                 final Iterable<? extends URI> types = stream.getProperty("types", Iterable.class);
952                 return new Encoder(handler, types);
953             }
954 
955         });
956     }
957 
958     /**
959      * Performs RDF-to-record decoding by converting a stream of RDF statements in a stream of
960      * records. Parameter {@code types} specify the types of records that have to be extracted
961      * from the statement stream, while parameter {@code chunked} specifies whether the input
962      * statement stream is chunked, i.e., organized as a sequence of statement chunks with each
963      * chunk containing the statements for a record (and its nested records). Chunked RDF streams
964      * noticeably speed up decoding, and are always produced by the KnowledgeStore API. Type and
965      * chunking information may be set to null (e.g., because unknown at the time the method is
966      * called): in this case, they will be read from metadata attributes attached to the stream,
967      * named {@code "types"} and {@code "chunked"}; reading will happen just before decoding will
968      * take place, i.e., when a terminal stream operation will be called.
969      * 
970      * @param stream
971      *            the stream of statements to decode
972      * @param types
973      *            the types of records to extract from the statement stream, null if to be read
974      *            from stream metadata
975      * @param chunked
976      *            true if the input statement stream is chunked, null if to be read from stream
977      *            metadata
978      * @return the resulting stream of records
979      */
980     public static Stream<Record> decode(final Stream<Statement> stream,
981             @Nullable final Iterable<? extends URI> types, @Nullable final Boolean chunked) {
982         Preconditions.checkNotNull(stream);
983         if (types != null) {
984             stream.setProperty("types", types);
985         }
986         if (chunked != null) {
987             stream.setProperty("chunked", chunked);
988         }
989         return stream.transform(null, new Function<Handler<Record>, Handler<Statement>>() {
990 
991             @SuppressWarnings("unchecked")
992             @Override
993             public Handler<Statement> apply(final Handler<Record> handler) {
994                 final Iterable<? extends URI> types = stream.getProperty("types", Iterable.class);
995                 final Boolean chunked = stream.getProperty("chunked", Boolean.class);
996                 return new Decoder(handler, types, chunked);
997             }
998 
999         });
1000     }
1001 
1002     private static class Encoder implements Handler<Record> {
1003 
1004         private final Handler<? super Statement> handler;
1005 
1006         private final Set<URI> types;
1007 
1008         Encoder(final Handler<? super Statement> handler, final Iterable<? extends URI> types) {
1009             this.handler = Preconditions.checkNotNull(handler);
1010             this.types = ImmutableSet.copyOf(types);
1011         }
1012 
1013         @Override
1014         public void handle(final Record record) throws Throwable {
1015             if (record != null) {
1016                 emit(record, getID(record), true);
1017             } else {
1018                 this.handler.handle(null);
1019             }
1020         }
1021 
1022         private void emit(final Record record, final URI subject, final boolean addType)
1023                 throws Throwable {
1024 
1025             if (addType) {
1026                 for (final URI type : this.types) {
1027                     emit(subject, RDF.TYPE, type);
1028                 }
1029             }
1030 
1031             final List<URI> properties = record.getProperties();
1032             final List<Record> subRecords = Lists.newArrayList();
1033 
1034             for (final URI property : properties) {
1035                 final List<Object> values = record.get(property);
1036                 for (final Object value : values) {
1037                     if (value instanceof Value) {
1038                         final Value v = (Value) value;
1039                         if (!addType || !property.equals(RDF.TYPE) || !this.types.contains(v)) {
1040                             emit(subject, property, v);
1041                         }
1042                     } else if (value instanceof Record) {
1043                         final Record rv = (Record) value;
1044                         emit(subject, property, getID(rv));
1045                         subRecords.add(rv);
1046                     } else if (value instanceof Statement) {
1047                         final Statement s = (Statement) value;
1048                         final URI id = hash(s);
1049                         emit(subject, property, id);
1050                         emit(id, RDF.SUBJECT, s.getSubject());
1051                         emit(id, RDF.PREDICATE, s.getPredicate());
1052                         emit(id, RDF.OBJECT, s.getObject());
1053                     } else {
1054                         throw new Error("Unexpected type for value: " + value);
1055                     }
1056                 }
1057             }
1058 
1059             for (final Record subRecord : subRecords) {
1060                 emit(subRecord, getID(subRecord), false);
1061             }
1062         }
1063 
1064         private void emit(final Resource s, final URI p, final Value o) throws Throwable {
1065             this.handler.handle(Data.getValueFactory().createStatement(s, p, o));
1066         }
1067 
1068         private URI hash(final Statement statement) {
1069             return Data.getValueFactory().createURI("triples:" + Data.hash(statement.toString()));
1070         }
1071 
1072         private URI getID(final Record record) {
1073             final URI id = record.getID();
1074             if (id == null) {
1075                 return Data.getValueFactory().createURI("bnode:" + record.hash());
1076             }
1077             return id;
1078         }
1079 
1080     }
1081 
1082     private static class Decoder implements Handler<Statement> {
1083 
1084         private final Handler<? super Record> handler;
1085 
1086         private final Set<URI> types;
1087 
1088         private final boolean chunked;
1089 
1090         private final UUID uuid;
1091 
1092         private final Map<URI, Node> nodes;
1093 
1094         private final List<Node> roots;
1095 
1096         private Node current;
1097 
1098         Decoder(final Handler<? super Record> handler, final Iterable<? extends URI> types,
1099                 final boolean chunked) {
1100             this.handler = Preconditions.checkNotNull(handler);
1101             this.types = ImmutableSet.copyOf(types);
1102             this.chunked = chunked;
1103             this.uuid = UUID.randomUUID(); // for skolemization
1104             this.nodes = this.chunked ? Maps.<URI, Node>newLinkedHashMap() : Maps
1105                     .<URI, Node>newHashMap();
1106             this.roots = Lists.newArrayList();
1107             this.current = null;
1108         }
1109 
1110         @Override
1111         public void handle(final Statement statement) throws RDFHandlerException {
1112 
1113             if (statement == null) {
1114                 flush(true);
1115                 return;
1116             }
1117 
1118             final Statement s = skolemize(statement);
1119 
1120             final URI subj = (URI) s.getSubject();
1121             final URI pred = s.getPredicate();
1122             final Value obj = s.getObject();
1123 
1124             if (this.current == null || !this.current.id().equals(subj)) {
1125                 this.current = this.nodes.get(subj);
1126                 if (this.current == null) {
1127                     this.current = new Node(subj);
1128                     this.nodes.put(subj, this.current);
1129                 }
1130             }
1131 
1132             this.current.add(s);
1133 
1134             if (pred.equals(RDF.TYPE) && this.types.contains(obj)) {
1135                 this.current.mark();
1136                 if (this.chunked && !this.roots.isEmpty()) {
1137                     flush(false);
1138                     final URI threshold = this.roots.get(this.roots.size() - 1).id();
1139                     final Iterator<URI> iterator = this.nodes.keySet().iterator();
1140                     while (true) {
1141                         final URI id = iterator.next();
1142                         iterator.remove();
1143                         if (id.equals(threshold)) {
1144                             break;
1145                         }
1146                     }
1147                     this.roots.clear();
1148                 }
1149                 this.roots.add(this.current);
1150             }
1151         }
1152 
1153         private Statement skolemize(final Statement statement) {
1154             boolean skolemized = false;
1155             Resource subj = statement.getSubject();
1156             if (subj instanceof BNode) {
1157                 subj = skolemize((BNode) subj);
1158                 skolemized = true;
1159             }
1160             Value obj = statement.getObject();
1161             if (obj instanceof BNode) {
1162                 obj = skolemize((BNode) obj);
1163                 skolemized = true;
1164             }
1165             if (skolemized) {
1166                 final URI pred = statement.getPredicate();
1167                 return Data.getValueFactory().createStatement(subj, pred, obj);
1168             }
1169             return statement;
1170         }
1171 
1172         private URI skolemize(final BNode bnode) {
1173             final String hash = Data.hash(this.uuid.getLeastSignificantBits(),
1174                     this.uuid.getMostSignificantBits(), bnode.getID());
1175             return Data.getValueFactory().createURI("bnode:" + hash);
1176         }
1177 
1178         private void flush(final boolean complete) throws RDFHandlerException {
1179             try {
1180                 final List<Node> queue = Lists.newLinkedList();
1181                 for (final Node root : this.roots) {
1182                     final Record record = (Record) root.visit(root, queue);
1183                     while (!queue.isEmpty()) {
1184                         final Node node = queue.remove(0);
1185                         node.complete(root, this.nodes, queue);
1186                     }
1187                     this.handler.handle(record);
1188                     if (Thread.interrupted()) {
1189                         throw new RDFHandlerException("Interrupted");
1190                     }
1191                 }
1192                 if (complete) {
1193                     this.handler.handle(null);
1194                 }
1195             } catch (final Throwable ex) {
1196                 Throwables.propagateIfPossible(ex, RDFHandlerException.class);
1197                 throw new RDFHandlerException(ex);
1198             }
1199         }
1200 
1201         private static class Node {
1202 
1203             private final URI id;
1204 
1205             private final List<Statement> statements;
1206 
1207             private Object value;
1208 
1209             private Node root;
1210 
1211             private boolean reified;
1212 
1213             private boolean result;
1214 
1215             Node(final URI id) {
1216                 this.id = id;
1217                 this.statements = Lists.newArrayList();
1218                 this.result = false;
1219             }
1220 
1221             URI id() {
1222                 return this.id;
1223             }
1224 
1225             void mark() {
1226                 this.result = true;
1227             }
1228 
1229             void add(final Statement statement) {
1230                 this.statements.add(statement);
1231                 final URI pred = statement.getPredicate();
1232                 this.reified = this.reified || pred.equals(RDF.SUBJECT)
1233                         || pred.equals(RDF.PREDICATE) || pred.equals(RDF.OBJECT);
1234             }
1235 
1236             Object visit(final Node root, final List<Node> queue) {
1237                 if (this.root != root) {
1238                     this.root = root;
1239                     if (this.reified) {
1240                         this.value = unreify();
1241                     } else {
1242                         this.value = Record.create((URI) this.statements.get(0).getSubject());
1243                         queue.add(this); // register in the queue so to fill the record next
1244                     }
1245                     return !this.result || this == root ? this.value : this.statements.get(0)
1246                             .getSubject();
1247                 } else if (this.value instanceof Statement) {
1248                     return this.value;
1249                 }
1250                 return this.statements.get(0).getSubject();
1251             }
1252 
1253             void complete(final Node root, final Map<URI, Node> nodes, final List<Node> queue) {
1254 
1255                 final Record record = (Record) this.value;
1256 
1257                 URI property = null;
1258                 final List<Object> values = Lists.newArrayList();
1259 
1260                 Collections.sort(this.statements, Data.getTotalComparator());
1261                 for (final Statement statement : this.statements) {
1262                     if (!statement.getPredicate().equals(property)) {
1263                         if (property != null) {
1264                             record.set(property, values);
1265                         }
1266                         property = statement.getPredicate();
1267                         values.clear();
1268                     }
1269                     Object value = statement.getObject();
1270                     if (value instanceof URI) {
1271                         final Node n = nodes.get(value);
1272                         if (n != null) {
1273                             value = n.visit(root, queue);
1274                         }
1275                     }
1276                     values.add(value);
1277                 }
1278                 record.set(property, values);
1279             }
1280 
1281             private Statement unreify() {
1282                 Resource subj = null;
1283                 URI pred = null;
1284                 Value obj = null;
1285                 for (final Statement statement : this.statements) {
1286                     final URI property = statement.getPredicate();
1287                     if (property.equals(RDF.SUBJECT)) {
1288                         subj = (Resource) statement.getObject();
1289                     } else if (property.equals(RDF.PREDICATE)) {
1290                         pred = (URI) statement.getObject();
1291                     } else if (property.equals(RDF.OBJECT)) {
1292                         obj = statement.getObject();
1293                     }
1294                 }
1295                 return Data.getValueFactory().createStatement(subj, pred, obj);
1296             }
1297 
1298         }
1299 
1300     }
1301 
1302 }