1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
//! COLT from Wang/Willsey/Suciu

use std::hash::Hash;

use variadics::variadic_collections::VariadicCollection;
use variadics::{var_expr, var_type, PartialEqVariadic, SplitBySuffix, VariadicExt};

use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtInner, GhtLeaf};

/// Data structure design for our COLT is unique.
///
/// In the paper, the COLT is an unbalanced trie that "grows upward" from leaves lazily
/// on access via the `force` method.
/// Unfortunately, unbalanced tries break our types: a node's type to be defined via the
/// type of its children, recursively -- meaning all paths need to be the same type (and length)!
///
/// To work around this, our COLT is a variadic *list* GHTs (a forest) of increasing height,
/// starting with a trie of height 0 and continuing until a trie of height |key| - 1.
/// Our `force` method does not add a node above a leaf L as in the paper. Instead
/// it `take`s L from the current trie and merges it into the next trie to the right which is 1 taller.
//
/// The following trait provides the behavior we need from the nodes in a COLT forest. Every
/// `ColtForestNode` is a `GeneralizedHashTrieNode` with some extra methods.
pub trait ColtForestNode: GeneralizedHashTrieNode {
    /// result of `force`ing a node
    type Force: GeneralizedHashTrieNode;

    /// Force the generation of a parent node, as in the Wang/Willsey/Suciu COLT structure,
    /// to be merged into the next trie to the right.
    fn force(self) -> Option<Self::Force>;

    /// Force the generation of a parent node but retain ref to this node
    fn force_drain(&mut self) -> Option<Self::Force>;
}

// Force only acts on leaves
impl<Head, Node> ColtForestNode for GhtInner<Head, Node>
where
    Head: 'static + Hash + Eq + Clone,
    Node: 'static + ColtForestNode,
    <Node as GeneralizedHashTrieNode>::Schema:
        SplitBySuffix<var_type!(Head,  ...<Node as GeneralizedHashTrieNode>::SuffixSchema)>,
{
    type Force = Node; // where Node:GeneralizedHashTrieNode;
    fn force(self) -> Option<Self::Force> {
        None
    }

    fn force_drain(&mut self) -> Option<Self::Force> {
        None
    }
}

// Leaf case
impl<Schema, Head, Rest, Storage> ColtForestNode
    for GhtLeaf<Schema, var_type!(Head, ...Rest), Storage>
where
    Head: 'static + Clone + Hash + Eq,
    Rest: 'static + Clone + Hash + Eq + VariadicExt,
    Schema: 'static + Hash + Eq + Clone + VariadicExt + PartialEqVariadic,
    Rest: PartialEqVariadic,
    Schema: SplitBySuffix<var_type!(Head, ...Rest)>,
    Schema: SplitBySuffix<Rest>,
    <Schema as SplitBySuffix<(Head, Rest)>>::Prefix: Eq + Hash + Clone,
    <Schema as SplitBySuffix<Rest>>::Prefix: Eq + Hash + Clone,
    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
    GhtLeaf<Schema, Rest, Storage>: GeneralizedHashTrieNode<Schema = Schema, Storage = Storage>,
    GhtInner<Head, GhtLeaf<Schema, Rest, Storage>>:
        GeneralizedHashTrieNode<Schema = Schema, Storage = Storage>,
{
    type Force = GhtInner<Head, GhtLeaf<Schema, Rest, Storage>>;
    fn force(mut self) -> Option<Self::Force> {
        let mut retval = Self::Force::default();
        self.forced = true;
        for row in self.into_iter().unwrap() {
            retval.insert(row);
        }
        Some(retval)
    }

    fn force_drain(&mut self) -> Option<GhtInner<Head, GhtLeaf<Schema, Rest, Storage>>> {
        let mut retval = Self::Force::default();
        self.forced = true;
        for row in self.elements.drain() {
            retval.insert(row);
        }
        Some(retval)
    }
}

/// Emulate the `get` and iter` functions for a single Ght node
/// [`GhtGet`] across a forest of ColtForestNodes.
///
/// The "current" ColtGet node (corresponding to the "current" GhtGet node) at depth
/// d from the root is a variadic list of nodes, each at depth d in its their
/// respective trie in the forest, Tries of height d or smaller are omitted,
/// hence the first element in any ColtGet is a GhtLeaf.
pub trait ColtGet {
    /// Schema variadic: the schema of the relation stored in this COLT.
    /// This type is the same in all Tries and nodes of the COLT.
    type Schema: VariadicExt + Eq + Hash + Clone;
    /// The type of Storage
    /// This type is the same in all Tries and nodes of the COLT
    type Storage: VariadicCollection;
    /// SuffixSchema variadic: the suffix of the schema *from this node of the trie
    /// downward*. The first entry in this variadic is of type Head.
    /// This type is the same in all Tries of the COLT (but changes as we traverse downward)
    type SuffixSchema: VariadicExt + Eq + Hash + Clone;
    /// The type of the first column in the SuffixSchema
    /// This type is the same in all Tries of the COLT (but changes as we traverse downward)
    type Head: Eq + Hash;

    /// Type returned by [`Self::get`].
    type Get;

    /// Following the spec in Wang/Willsey/Suciu, on an Inner node this retrieves the value
    /// (child) associated with the given "head" key. It returns an `Option` containing a
    /// reference to the value if found, or `None` if not found.
    /// On a Leaf node, returns None.
    fn get(self, head: &Self::Head) -> Self::Get;

    /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes).
    fn iter(&self) -> impl Iterator<Item = Self::Head>;
}

/// `ColtGet` without the first (head) trie.
pub trait ColtGetTail<InnerToMerge>: ColtGet {
    /// merge an inner node into the head of this tail of the forest
    fn merge(&mut self, inner_to_merge: InnerToMerge);
}

impl<'a, Rest, Schema, SuffixSchema, Storage> ColtGet for var_type!(&'a mut GhtLeaf<Schema, SuffixSchema, Storage>, ...Rest)
where
    Rest: ColtGetTail<
        <GhtLeaf<Schema, SuffixSchema, Storage> as ColtForestNode>::Force,
        Storage = Storage,
    >,
    <Rest as ColtGet>::SuffixSchema: 'a,
    GhtLeaf<Schema, SuffixSchema, Storage>: ColtForestNode,
    Schema: Clone + Hash + Eq + VariadicExt,
    SuffixSchema: Clone + Hash + Eq + VariadicExt,
    Storage: VariadicCollection<Schema = Schema>,
{
    type Schema = Schema;
    type Head = Rest::Head;
    type SuffixSchema = SuffixSchema;
    type Get = Rest::Get;
    type Storage = Rest::Storage;

    fn get(self, head: &Self::Head) -> Self::Get {
        let (first, mut rest) = self;
        let forced = first.force_drain().unwrap();
        ColtGetTail::merge(&mut rest, forced);
        Rest::get(rest, head)
    }

    fn iter(&self) -> impl Iterator<Item = Self::Head> {
        std::iter::empty()
    }
}

// we only merge in GhtInner<Head, GhtLeaf<_>> nodes, so this
// should never be called.
impl<'a, Rest, Schema, SuffixSchema, T, Storage> ColtGetTail<T> for var_type!(&'a mut GhtLeaf<Schema, SuffixSchema, Storage>, ...Rest)
where
    Rest: ColtGetTail<
        <GhtLeaf<Schema, SuffixSchema, Storage> as ColtForestNode>::Force,
        Storage = Storage,
    >,
    <Rest as ColtGet>::SuffixSchema: 'a,
    GhtLeaf<Schema, SuffixSchema, Storage>: ColtForestNode,
    Schema: Clone + Hash + Eq + VariadicExt,
    SuffixSchema: Clone + Hash + Eq + VariadicExt,
    Storage: VariadicCollection<Schema = Schema>,
{
    fn merge(&mut self, _inner_to_merge: T) {
        panic!();
    }
}

impl<'a, Head, Head2, Rest, Node> ColtGet for var_type!(&'a mut GhtInner<Head, GhtInner<Head2, Node>>, ...Rest)
where
    Rest: ColtGet<Head = Head>,
    Head: Eq + Hash + Clone,
    Head2: Eq + Hash + Clone,
    Node: GeneralizedHashTrieNode,
    GhtInner<Head, GhtInner<Head2, Node>>: GeneralizedHashTrieNode<
        Head = Rest::Head,
        SuffixSchema = Rest::SuffixSchema,
        Schema = Rest::Schema,
        Storage = Rest::Storage,
    >,
    GhtInner<Head2, Node>: GeneralizedHashTrieNode<Schema = Rest::Schema, Storage = Rest::Storage>,
{
    type Schema = Rest::Schema;
    type Head = Rest::Head;
    type SuffixSchema = Rest::SuffixSchema;
    type Get = var_type!(&'a mut GhtInner<Head2, Node>, ...Rest::Get);
    type Storage = Rest::Storage;

    fn get(self, head: &Self::Head) -> Self::Get {
        let (first, rest) = self;
        // create a child entry here for this get, to absorb future forces
        // TODO(mingwei): extra clone here if entry already exists.
        let child = first.children.entry(head.clone()).or_default();
        var_expr!(child, ...Rest::get(rest, head))
    }

    fn iter(&self) -> impl Iterator<Item = Self::Head> {
        self.0.children.keys().cloned().chain(Rest::iter(&self.1))
    }
}

impl<'a, Head, Rest, Schema, ValType, Storage> ColtGet for var_type!(&'a mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>, ...Rest)
where
    Rest: ColtGet<Head = Head>,
    Head: Eq + Hash + Clone,
    Schema: Eq + Hash + Clone + PartialEqVariadic,
    ValType: Eq + Hash + Clone + PartialEqVariadic,
    Storage: VariadicCollection<Schema = Schema>,
    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode,
    Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix<ValType> + PartialEqVariadic,
    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
        GeneralizedHashTrieNode<Head = Head> + GhtGet,
    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
        GeneralizedHashTrieNode<Head = Rest::Head, Schema = Rest::Schema, Storage = Rest::Storage>,
    GhtLeaf<Schema, ValType, Storage>:
        GeneralizedHashTrieNode<Schema = Rest::Schema, Storage = Rest::Storage> + GhtGet,
{
    type Schema = Rest::Schema;
    type Head = Rest::Head;
    type SuffixSchema = Rest::SuffixSchema;
    type Get = var_type!(&'a mut GhtLeaf<Schema, ValType, Storage>, ...Rest::Get);
    type Storage = Rest::Storage;

    fn get(self, head: &Self::Head) -> Self::Get {
        let (first, rest) = self;
        let child = first.children.entry(head.clone()).or_default();
        var_expr!(child, ...Rest::get(rest, head))
    }

    fn iter(&self) -> impl Iterator<Item = Self::Head> {
        self.0.children.keys().cloned().chain(Rest::iter(&self.1))
    }
}

impl<'a, Head, Rest, Schema, ValType, Storage>
    ColtGetTail<GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>> for var_type!(&'a mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>, ...Rest)
where
    Rest: ColtGet<Head = Head, Schema = Schema, Storage = Storage>,
    Head: Eq + Hash + Clone,
    Schema: Eq + Hash + Clone + PartialEqVariadic,
    ValType: Eq + Hash + Clone + PartialEqVariadic,
    Storage: VariadicCollection<Schema = Schema>,
    var_type!(&'a mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>, ...Rest):
        ColtGet<Head = Head, Schema = Schema, Storage = Storage>,
    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema>,
    Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix<ValType> + PartialEqVariadic,
    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
        GeneralizedHashTrieNode<Head = Head, Schema = Schema, Storage = Storage> + GhtGet,
{
    fn merge(&mut self, inner_to_merge: GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>) {
        let (head, _rest) = self;
        // can't use Merge with COLT bc columnstore is not a lattice!!
        head.merge_node(inner_to_merge);
    }
}

impl<'a, Head, Node> ColtGet for var_type!(&'a mut GhtInner<Head, Node>)
where
    GhtInner<Head, Node>: GeneralizedHashTrieNode,
    Head: Clone + Eq + Hash,
    Node: GeneralizedHashTrieNode,
{
    type Schema = <GhtInner<Head, Node> as GeneralizedHashTrieNode>::Schema;
    type SuffixSchema = <GhtInner<Head, Node> as GeneralizedHashTrieNode>::SuffixSchema;
    type Head = Head;
    type Get = var_type!(&'a mut Node);
    type Storage = Node::Storage;

    fn get(self, head: &Self::Head) -> Self::Get {
        let child = self.0.children.entry(head.clone()).or_default();
        var_expr!(child)
    }

    fn iter(&self) -> impl Iterator<Item = Self::Head> {
        self.0.children.keys().cloned()
    }
}
impl<Head, Schema, ValType, Storage> ColtGetTail<GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>> for var_type!(&mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>)
where
    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
        GeneralizedHashTrieNode<Head = Head> + GhtGet,
    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema, Storage = Storage>,
    Head: Clone + Eq + Hash,
    Schema: Clone + Eq + Hash + VariadicExt,
    Storage: VariadicCollection<Schema = Schema>,
{
    fn merge(&mut self, inner_to_merge: GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>) {
        let (head, _rest) = self;
        // can't use Merge with COLT bc columnstore is not a lattice!!
        head.merge_node(inner_to_merge);
    }
}