1   package eu.fbk.knowledgestore.datastore;
2   
3   import java.io.IOException;
4   import java.util.Map;
5   import java.util.Set;
6   
7   import javax.annotation.Nullable;
8   
9   import org.openrdf.model.URI;
10  
11  import eu.fbk.knowledgestore.data.Record;
12  import eu.fbk.knowledgestore.data.Stream;
13  import eu.fbk.knowledgestore.data.XPath;
14  import eu.fbk.knowledgestore.runtime.DataCorruptedException;
15  import eu.fbk.knowledgestore.vocabulary.KS;
16  
17  /**
18   * A {@code DataStore} transaction.
19   * <p>
20   * A {@code DataTransaction} is a unit of work over the contents of a {@link DataStore} that
21   * provides atomicity (i.e., changes are either completely stored or discarded), isolation (i.e.,
22   * other transaction do not see the modifications of this transaction) and durability (i.e.,
23   * changes are persisted across different program runs) guarantees.
24   * </p>
25   * <p>
26   * A <tt>DataTransaction</tt> supports the following features:
27   * </p>
28   * <ul>
29   * <li>Lookup of records by ID, via method {@link #lookup(URI, Set, Set)};</li>
30   * <li>Matching of records based on type and optional condition, consisting either in the
31   * retrieval of (selected properties of) matching records (method
32   * {@link #retrieve(URI, XPath, Set)}) or of their count (method {@link #count(URI, XPath)});</li>
33   * <li>Matching of record combinations (method {@link #match(Map, Map, Map)});</li>
34   * <li>Storing and deletion of single records (methods {@link #store(URI, Record)},
35   * {@link #delete(URI, URI)}).</li>
36   * </ul>
37   * <p>
38   * Note that the latter modification methods are not available for read-only transactions (an
39   * {@link IllegalStateException} is thrown in that case); moreover, they can return to the caller
40   * even if the operation has not yet completed (e.g., due to buffering), in which case it is
41   * however guaranteed that following read operation will be able to see newly written data. For
42   * all methods accepting a type URI parameter, that parameter can only be one of the supported
43   * record types listed in {@link DataStore#SUPPORTED_TYPES}.
44   * </p>
45   * <p>
46   * Transactions are terminated via {@link #end(boolean)}, whose parameter specifies whether
47   * changes should be committed or not (this doesn't matter for read-only transactions). Method
48   * {@code end()} has always the effect of terminating the transaction: if it throws an exception a
49   * rollback must be assumed, even if a commit was asked. In case the JVM is abruptly shutdown
50   * during a transaction, the effects of the transaction should be the same as if a rollback was
51   * performed. As a particular case of <tt>IOException</tt>, method {@code end()} may throw a
52   * {@link DataCorruptedException} in case neither a commit or rollback were possible and the
53   * {@code DataStore} is left in some unpredictable state with no possibility of automatic
54   * recovery.
55   * </p>
56   * <p>
57   * {@code DataTransaction} objects are not required to be thread safe. Access by at most one
58   * thread at a time is guaranteed externally. However, it must be allowed for operations to be
59   * issued while streams from previous operations are still open; if a stream is open and a write
60   * operations is performed that affects one of the objects still to be returned by the stream (or
61   * made an object returnable/not returnable by the stream), then it is allowed for the stream both
62   * to return the previous state of the object or to return the new state.
63   * </p>
64   */
65  public interface DataTransaction {
66  
67      /**
68       * Returns a stream of records having the type and IDs specified.
69       * 
70       * @param type
71       *            the URI of the type of records to return
72       * @param ids
73       *            a set with the IDs of the records to return, not to be modified by the method
74       * @param properties
75       *            a set with the properties to return for matching records, not modified by the
76       *            method; if null, all the available properties must be returned
77       * @return a stream with the records matching the IDs and type specified, possibly empty and
78       *         in no particular order
79       * @throws IOException
80       *             in case some IO error occurs
81       * @throws IllegalArgumentException
82       *             in case the type specified is not supported
83       * @throws IllegalStateException
84       *             if the {@code DataTransaction} has been already ended
85       */
86      Stream<Record> lookup(URI type, Set<? extends URI> ids, @Nullable Set<? extends URI> properties)
87              throws IOException, IllegalArgumentException, IllegalStateException;
88  
89      /**
90       * Returns a stream of records having the type and matching the optional condition specified.
91       * 
92       * @param type
93       *            the URI of the type of records to return
94       * @param condition
95       *            an optional condition to be satisfied by matching records; if null, no condition
96       *            must be checked
97       * @param properties
98       *            a set with the properties to return for matching records, not modified by the
99       *            method; if null, all the available properties must be returned
100      * @return a stream over the records matching the condition and type specified, possibly empty
101      *         and in no particular order
102      * @throws IOException
103      *             in case some IO error occurs
104      * @throws IllegalArgumentException
105      *             in case the type specified is not supported
106      * @throws IllegalStateException
107      *             if the {@code DataTransaction} has been already ended
108      */
109     Stream<Record> retrieve(URI type, @Nullable XPath condition,
110             @Nullable Set<? extends URI> properties) throws IOException, IllegalArgumentException,
111             IllegalStateException;
112 
113     /**
114      * Counts the records having the type and matching the optional condition specified. This
115      * method performs similarly to {@link #retrieve(URI, XPath, Set)}, but returns only the
116      * number of matching instances instead of retrieving the corresponding {@code Record}
117      * objects.
118      * 
119      * @param type
120      *            the URI of the type of records to return
121      * @param condition
122      *            an optional condition to be satisfied by matching records; if null, no condition
123      *            must be checked
124      * @return the number of records matching the optional condition and type specified
125      * @throws IOException
126      *             in case some IO error occurs
127      * @throws IllegalArgumentException
128      *             in case the type specified is not supported
129      * @throws IllegalStateException
130      *             if the {@code DataTransaction} has been already ended
131      */
132     long count(URI type, @Nullable XPath condition) throws IOException, IllegalArgumentException,
133             IllegalStateException;
134 
135     /**
136      * Evaluates a {@code match} request with the parameters supplied. The operation:
137      * <ol>
138      * <li>Considers all the combinations {@code <resource, mention, entity, axiom>} such that
139      * <ul>
140      * <li>{@code mention} {@link KS#MENTION_OF} {@code resource};</li>
141      * <li>{@code mention} {@link KS#REFERS_TO} {@code entity} (optional if no condition or
142      * projection on entities);</li>
143      * <li>{@code mention} {@link KS#EXPRESSES} {@code axiom} (optional if no condition or
144      * projection on axioms).</li>
145      * </ul>
146      * </li>
147      * <li>Filters the combinations so that optional {@code conditions} / {@code ids} selections
148      * on resource, mention, entity and axiom components are satisfied.</li>
149      * <li>Perform projection with duplicate removal of filtered combinations, keeping only the
150      * components occurring in {@code properties.keySet()}, returning for each component the
151      * subset of properties of {@code properties.get(component_type_URI)}.</li>
152      * </ol>
153      * In the maps supplied as parameters, components are identified by their type URI, that is
154      * {@link KS#RESOURCE}, {@link KS#MENTION}, {@link KS#ENTITY} and {@link KS#AXIOM}.
155      * 
156      * @param conditions
157      *            a non-null map with optional component conditions, indexed by the component type
158      *            URI; note the map may be possibly empty or contain conditions only for a subset
159      *            of components
160      * @param ids
161      *            a non-null map with optional ID selections for different components, indexed by
162      *            the component type URI; note the map may be possibly empty or contain selections
163      *            only for a subset of components
164      * @param properties
165      *            a non-null, non-empty map with the properties to return for different
166      *            components, indexed by the component type URI; if the set of property URIs
167      *            mapped to a component is null or empty, then all the properties of the component
168      *            should be returned; if a component is not referenced in the map, then it must
169      *            not be returned
170      * @return a {@code Stream} of combination records
171      * @throws IOException
172      *             in case some IO error occurs
173      * @throws IllegalStateException
174      *             if the {@code DataTransaction} has been already ended
175      */
176     Stream<Record> match(Map<URI, XPath> conditions, final Map<URI, Set<URI>> ids,
177             final Map<URI, Set<URI>> properties) throws IOException, IllegalStateException;
178 
179     /**
180      * Stores a record in the {@code DataStore}. A record may or may not exist for the same ID; in
181      * case it exists, it is replaced by the newly specified record. In case the method call
182      * returns successfully, there is no guarantee that the write operation completed (e.g.,
183      * because of internal buffering); however, it is guaranteed (e.g., via internal flushing)
184      * that read operations called subsequently will see the result of the modification. In case
185      * the method call fails with an {@code IOException}, there is no guarantee that the
186      * {@code DataStore} is left in the same state it was at the time of calling.
187      * 
188      * @param type
189      *            the URI of the type of record to store, not null
190      * @param record
191      *            the record to store, not null
192      * @throws IOException
193      *             in case the operation failed, with no guarantee that the {@code DataStore} is
194      *             left in the same state if was when the method was called; note that this
195      *             exception may trigger a rollback on the caller side
196      * @throws IllegalStateException
197      *             if the {@code DataTransaction} has been already ended, or if it is read-only
198      */
199     void store(URI type, Record record) throws IOException, IllegalStateException;
200 
201     /**
202      * Deletes the record stored in the {@code DataStore} with the ID specified.A record may or
203      * may not be stored for the specified ID; in case it exists, it is deleted by the operation.
204      * In case the method call returns successfully, there is no guarantee that the write
205      * operation completed (e.g., because of internal buffering); however, it is guaranteed (e.g.,
206      * via internal flushing) that read operations called subsequently will see the result of the
207      * modification. In case the method call fails with an {@code IOException}, there is no
208      * guarantee that the {@code DataStore} is left in the same state it was at the time of
209      * calling.
210      * 
211      * @param type
212      *            the URI of the type of record to store, not null
213      * @param id
214      *            the ID of the record to delete, not null
215      * @throws IOException
216      *             in case the operation failed, with no guarantee that the {@code DataStore} is
217      *             left in the same state if was when the method was called; note that this
218      *             exception may trigger a rollback on the caller side
219      * @throws IllegalStateException
220      *             if the {@code DataTransaction} has been already ended, or if it is read-only
221      */
222     void delete(URI type, URI id) throws IOException, IllegalStateException;
223 
224     /**
225      * Ends the transaction, either committing or rolling back its changes (if any). This method
226      * always tries to terminate the transaction: if commit is requested but fails, a rollback is
227      * forced by the method and an {@code IOException} is thrown. If it is not possible either to
228      * commit or rollback, then the {@code DataStore} is possibly left in an unknown state and a
229      * {@code DataCorruptedException} is thrown to signal a data corruption situation that cannot
230      * be automatically recovered.
231      * 
232      * @param commit
233      *            <tt>true</tt> in case changes made by the transaction should be committed
234      * @throws IOException
235      *             in case some IO error occurs or the commit request cannot be satisfied for any
236      *             reason; it is however guaranteed that a forced rollback has been performed
237      * @throws DataCorruptedException
238      *             in case it was not possible either to commit or rollback, which implies the
239      *             state of the {@code DataStore} is unknown and automatic recovery is not
240      *             possible (hence, data is corrupted)
241      * @throws IllegalStateException
242      *             if the {@code DataTransaction} has been already ended
243      */
244     void end(boolean commit) throws DataCorruptedException, IOException, IllegalStateException;
245 
246 }
247 
248 // DESIGN NOTES
249 //
250 // XXX 'union' merge criteria has a natural mapping in a HBase layout where the value is
251 // incorporated in the column name; when writing an attribute, this layout avoid the need to
252 // retrieve the previous values of an attribute in order to do the merge and compute the new
253 // values, which is more efficient in case a large number of values can be associated to the
254 // attribute; however, we do not expect this to be the case (apart from the 'isReferredBy'
255 // attribute, that is not stored however) -> ignoring this consideration, it seems OK to move all
256 // the logic related to merge criteria in the frontend
257 //
258 // XXX Coprocessors could be used in order to implement the merge and update primitives (the
259 // latter via a custom CoprocessorProtocol); still, they would need to implement: merge criteria,
260 // validation, update of related object (e.g., to manipulate bidirectional relations). If we avoid
261 // coprocessors, then the KS server (= HBase client) would need to fetch the previous values for
262 // the object being modified and handle merge criteria, validation and enforcing of
263 // bidirectionality locally. This would require an additional communication between the KS server
264 // and the affected region server(s), whose cost depend on round-trip latency and bandwidth. We
265 // may ignore bandwidth (100+MBits/sec in a LAN) and use batching techniques (HBase batch calls)
266 // to distribute latency (~1ms) over multiple calls, so to make it almost irrelevant. By adopting
267 // this approach, the benefits of using coprocessors seems greatly overcome by their far greater
268 // implementation costs, hence they are not adopted
269 //
270 // XXX AggregateClient can be used to implement count (in future, we may extract more elaborated
271 // statistics introducing some kind of 'stats' primitive and a corresponding coprocessor)
272 //
273 // XXX an alternative way to delete records would be something like delete(condition), which would
274 // allow deleting a bunch of objects satisfying a condition without first retrieving them; still,
275 // it is unlikely the frontend may delete objects without looking at their data and fixing related
276 // objects, so a retrieval would still be needed in most cases; given also that delete
277 // performances are not so important as the performances of other operations, the decisions is to
278 // stick with delete(object) which seems simpler to implement