1 module universal.core.coproduct;
2 
3 @("EXAMPLES") unittest
4 {
5 	import std.stdio;
6 	import std.exception;
7 
8 	auto u1 = Union!(
9 		q{a}, int,
10 		q{b}, int,
11 	)();
12 
13 	/*
14 		default initializes to first constructor
15 	*/
16 	assert(u1.a == 0);
17 
18 	/*
19 		accessing uninhabited ctor asserts
20 	*/
21 	assertThrown!Error(u1.b);
22 
23 	/*
24 		capable of builder-like syntax
25 	*/
26 	auto u2 = typeof(u1)().a(100);
27 	assert(u2.a == 100);
28 
29 	/*
30 		the inhabited ctor is conveniently checkable
31 	*/
32 	assert(u2.isCase!q{a});
33 	assert(! u2.isCase!q{b});
34 	/*
35 		note: the checked ctor must be a member of the union
36 	*/
37 	assert(!__traits(compiles, u2.isCase!q{none}));
38 
39 	/*
40 		unions are writable in-place
41 	*/
42 	u1.b = 9;
43 	assertThrown!Error(u1.a);
44 	assert(u1.b == 9);
45 
46 	/*
47 		so beware of accidental assignment
48 	*/
49 	auto u3 = u1.a(7);
50 	assert(u3.a == 7);
51 	assert(u1.a == 7);
52 
53 	/*
54 		unions without named fields identify their injections by a numerical index.
55 	*/
56 	auto u4 = Union!(int, int, string)();
57 	u4.inj!0(2);
58 	assert(u4.isCase!0);
59 	u4.inj!2("hello");
60 	assert(u4.isCase!2);
61 
62 	/*
63 		the generic universal coproduct function, visit
64 	*/
65 	assert(u1.visit!(
66 		q{a}, a => 100 * a,
67 		q{b}, b => 0,
68 	) == 700);
69 
70 	/*
71 		unions can be visited without naming fields, in this case functions are matched to injections by declaration order
72 	*/
73 	assert(u4.visit!(
74 		a => 100 * a,
75 		a => 50 * a,
76 		a => 0,
77 	) == 0);
78 
79 	/*
80 		for expressiveness and type safety, custom unions are easy to build
81 		using the Definition mixin also enables recursive data types
82 			note: this example is a class for expository purposes
83 			we could have just as easily done this with a struct and its pointer
84 	*/
85 	static class List(A) { mixin UnionInstance!(q{nil}, q{cons}, A, List); }
86 
87 	/*
88 		functions over unions can then be declared as recursive aliases
89 	*/
90 	alias sum = visit!(
91 		q{nil},  () => 0,
92 		q{cons}, (y,ys) => y + sum(ys)
93 	);
94 
95 	/*
96 		for convenience, we define helper ctors (generally a good practice in D)
97 	*/
98 	List!int nil() { return new List!int().nil; }
99 	List!int cons(int x, List!int xs) { return new List!int().cons(x, xs); }
100 
101 	assert(sum(nil) == 0);
102 	assert(sum(cons(42, nil)) == 42);
103 	assert(sum(cons(1, cons(5, cons(7, nil)))) == 13);
104 
105 	/*
106 		unions can also represent classification results
107 	*/
108 	with(
109 		2.classify!(
110 			q{odd}, a => a % 2,
111 			q{even}, _=> true,
112 		)
113 	) assert(even);
114 
115 	with(
116 		3.classify!(
117 			q{odd}, a => a % 2,
118 			q{even}, _=> true,
119 		)
120 	) assert(odd);
121 }
122 
123 /*
124 	This module implements the coproduct of normalized D types, here called Union, also known as disjoint union, tagged union, sum type, variant, Algebraic, or ADT.
125 
126 	A Union is basically a portable switch. Whereas Tuple contains several values at once, Union contains one value of several possible values.
127 
128 	This pattern is simple, useful, and commonplace. D doesn't quite support them out-of-the-box, though. There's the union keyword, which behaves like C's union, but it's only really useful when combined with a tag denoting which of the possible fields is actually inhabited. There's a std.variant module implementing this pattern, but it doesn't meet my needs for a number of reasons, which mainly revolve around expressiveness and introspective reasoning.
129 
130 	Expressiveness is important for unions for the same reason it's important for tuples - if you use them at all, you use them a lot. And unions pair well with tuples - with the former, you encapsulate "possibility" (specifically, a branch in your program), and with the latter, "multiplicity". Between the two, they can comfortably and concisely express a lot of ideas, and their simplicity (when taken individually) allows them to compose well.
131 
132 	In D, tuples are already a little awkward to use. I try to address that in the core.product module. I wanted to do the same for unions, but found that I couldn't really just add on top of Algebraic the way I did with Tuple. I want (1) unions to be as expressive as tuples, and (2) uniform expressive power with all functions involving unions and tuples.
133 
134 	First, to put "expressiveness" of a "pattern" on some solid ground, let's call it the number of instances of the template encapsulating that pattern.
135 
136 	In order to achieve goal (2), this implementation is based on the categorical product and coproduct. The definitions are the most minimal and generic definitions for these patterns, and so it stands to reason that, if uniform expressiveness is achieved throughout the definition, it can probably be extended to every other thing you could possibly do with it.
137 
138 	Tuple is an instance of a product while Algebraic and Union are coproducts. Products and corproducts are duals, which is probably why they pair so well together. The practical upshot of this is that they have a lot of inherent symmetries, and symmetries can be exploited to save work, complexity, and lines of code.
139 
140 	Looking at the diagrams in the definitions for product and coproduct on wikipedia, you can make the following identifications:
141 		In both diagrams, X₁,X₂,... identify the template parameters to Tuple or Algebraic.
142 		For the coproduct, the canonical injections are the constructors of Algebraic. f is visit, and f₁,f₂,... are the arguments to visit.
143 		For the product, the canonical projections are indexing each of the tuple elements, f is adjoin, and f₁,f₂,... are the arguments to adjoin.
144 
145 	Again, the goal is to make those identifications to be equal in expressiveness. So, how expressive is Tuple?
146 	There's one type of tuple for every possible list of types, at least. But in Phobos, tuples can have named fields, which are essentially aliasing the canonical projections. While otherwise identical tuples with different field names are practically isomorphic in D in many contexts, they do still represent a different type. So the tuple's expressiveness is equal to the number of lists of types, multiplied by every possible assignment of field names to the elements of those lists.
147 
148 	The templates Tuple and tuple can both be supplied named fields. But adjoin can't, and so there are less instances of adjoin than there are of tuple or Tuple. It falls short of its potential expressiveness. This is the reason for reimplementing it in core.product.
149 
150 	Algebraic also doesn't take field names, and so is immediately less expressive than Tuple. In addition, visit matches on types, and does not accept two functions with the same domain. This means that Algebraic's expressiveness is equal to the number of lists of types without repetition. Still infinite, but far less than Tuple.
151 
152 	So, if I were to lean heavily on Algebraic to be the union in a tuple/union combo, I wouldn't be able to propagate field name information, which hampers the scope of application for traits-based interfaces.  In the case of something like known in advance to be important, like ".length" in range, the definition is propagated from the top down. But take, say, a range representing some signal sampled in time, which has a "frequency" property. You may want to pass it through stride to perform some rough resampling, but you will lose frequency information. To handle cases like this, you either need some uniformity, an incredibly complex introspection mechanism, or to resign yourself to writing boilerplate.
153 
154 	Algebraic also can't naturally support recursive data types; it uses This, a special symbol defined which signals to the implementation of Algebraic to replace it, during compilation, with typeof(this). It's not that big of a deal, but it adds a little extra weight and, if the union is implemented a different way, it is unnecessary.
155 
156 	To put achieve goal (1), visit's matching behavior is completely different. For unions with unnamed fields, Union!(X1,X2...), each X represents a different case, even if some Xi == Xj. visit!(f1,f2,...) then matches f1 to case X1, and so on.
157 	Out-of-order matching is also possible. Unions can be given field names, which act as canonical injections when being set, and assert inhabitance upon being read. Each function argument of visit can be preceded by the injection's name, and this will override the index-based matching.
158 	Total Union expressiveness is the number of lists of any types, multiplied by every assignment of field names. Equal to the tuple.
159 
160 	For convenience, if the fields are named, they act as delimeters for the types, meaning that n-ary injections (and by extension, cases) can be defined without repeating Tuple over and over (and then relying on the normalization system).
161 
162 	To enable natural recursive data type definitions (as well as uniquely typed unions), the Union definition is available as a mixin template, UnionInstance. The custom union type (or its pointer type) can be passed as a type argument as though it were defined directly in the custom type. Functions meant to operate on unions will also operate on the custom union, while preserving its type and field names when applicable.
163 
164 	PS. I think that the main reason Algebraic falls short of my needs is because it is conflating two processes that I need separated: matching functions to arguments, and representing a union type. The latter is accomplished here, while the former is accomplished in core.match.
165 
166 */
167 template Union(ctors...)
168 {
169 	struct Union { mixin UnionInstance!ctors; }
170 }
171 mixin template UnionInstance(ctors...)
172 {
173 	union { mixin(Union.CodeGen.declUnion); }
174 	ubyte tag;
175 
176 	mixin(Union.CodeGen.declInjections);
177 
178 	static if(is(typeof(this) == struct))
179 		string toString() { return Union.toString(this); }
180 	static if(is(typeof(this) == class))
181 		override string toString() { return Union.toString(this); }
182 
183 	static struct Union
184 	{
185 		import std.meta;
186 		import universal.meta;
187 		import std.typetuple;
188 		import std.typecons;
189 		import std.range;
190 		import universal.core.product;
191 		import universal.core.apply;
192 		import std.conv : text;
193 		import std.format : format;
194 
195 		static:
196 
197 		alias ctorNames = Filter!(isString, ctors);
198 		enum width = ctorNames.length > 0? ctorNames.length : ctors.length;
199 
200 		template Args(uint i)
201 		{
202 			static if(ctorNames.length > 0)
203 			{
204 				enum idx(uint i) = staticIndexOf!(ctorNames[i], ctors);
205 
206 				static if(i == ctorNames.length-1)
207 					alias Args = ctors[ idx!i +1 .. $         ];
208 				else
209 					alias Args = ctors[ idx!i +1 .. idx!(i+1) ];
210 			}
211 			else
212 			{
213 				alias Args = ctors[i];
214 
215 			}
216 		}
217 		alias InArg(uint i) = Tuple!(Args!i);
218 		alias OutArg(uint i) = Universal!(Args!i);
219 
220 		template inj(uint i)
221 		{
222 			template inj(U) if(is(U.Union == Union))
223 			{
224 				auto ref U inj(auto ref U u, Args!i a)
225 				{
226 					mixin(text(q{u._}, i)) 
227 					= Tuple!(Args!i)(a);
228 
229 					u.tag = i;
230 
231 					return u;
232 				}
233 			}
234 		}
235 		template invInj(uint i)
236 		{
237 			template invInj(U) if(is(U.Union == Union))
238 			{
239         string errMsg(uint j)
240         {
241           static if(ctorNames.length > 0)
242             return format(
243               "%s accessed %s while inhabited by %s",
244               U.stringof, ctorNames[i], [ctorNames][j], 
245             );
246           else
247             return format(
248               "%s accessed case %d while inhabited by case %d",
249               U.stringof, i, j
250             );
251         }
252 
253 				auto ref OutArg!i invInj(auto ref U u) 
254         in{ assert(u.tag == i, errMsg(u.tag)); }body
255 				{ return mixin(text(q{u._}, i))[].identity; }
256 			}
257 		}
258 
259 		static string toString(U)(auto ref U u)
260 		if(is(U.Union == Union))
261 		{
262 			static if(ctorNames.length > 0)
263 				auto header = text(
264 					ctorNames.only[u.tag], ": ",
265 				);
266 			else
267 				enum header = "";
268 			
269 			return text(header, u.ulift!text); 
270 		}
271 
272 		struct CodeGen
273 		{
274             import std.range: iota;
275 
276 			enum declUnion = declare!unionDecl;
277 			enum declInjections = declare!injectionDecl;
278 
279 			enum declare(alias decl) = [staticMap!(decl, aliasSeqOf!(Union.width.iota))].join("\n");
280 
281 			static unionDecl(uint i)()
282 			{
283 				return format(q{
284 					Union.InArg!%d _%d;
285 				}, i, i);
286 			}
287 			static injectionDecl(uint i)()
288 			{
289 				static if(ctorNames.length > 0)
290 					enum name = ctorNames[i];
291 				else
292 					enum name = format(q{inj(uint i : %d)}, i);
293 
294 				auto injection = format(q{
295 					typeof(this) %s(Union.Args!%d args)
296 					{ return Union.inj!%d(this, args); }
297 				}, name, i, i);
298 
299 				auto inverse = format(q{
300 					Union.OutArg!%d %s()
301 					{ return Union.invInj!%d(this); }
302 				}, i, name, i);
303 
304 				return [ // avoid ambiguous setter definition for nullary ctors
305 					injection,
306 					InArg!i.length == 0 || is(OutArg!i == Unit)? 
307 						"" : inverse
308 				].join("\n");
309 			}
310 		}
311 	}
312 }
313 
314 /*
315 	Canonical injection function. For structs, injection constructs a new value. For class types, it operates in place. For in-place injection on some struct `x`, use `x.inj!i`.
316 */
317 template inject(uint i)
318 {
319 	template inject(U, A...) if(is(U.Union))
320 	{
321 		U inject(U u, A a)
322 		{ return U.Union.inj!i(U(), a); }
323 	}
324 }
325 
326 /*
327 	Universal property function. Takes a set of functions and applies the one which matches the inhabited value of the union. If the arguments to `visit` are interleaved with strings, the matching is performed against the union's field names. Otherwise, the functions are matched to injections by the order of their respective declarations (NOT by type as is the case with `std.variant.visit`).
328 */
329 template visit(dtors...)
330 {
331   import std.typetuple;
332   import std.typecons;
333   import universal.meta;
334   import universal.core.apply;
335   import std.range;
336 
337   alias dtorNames = Filter!(isString, dtors);
338   alias dtorFuncs = Filter!(isParameterized, dtors);
339 
340 	template visit(U, A...) if(is(U.Union))
341 	{
342 		auto visit(U u, A args)
343 		{
344 			with(U.Union)
345 			foreach(i; aliasSeqOf!(width.iota))
346 				if(i == u.tag)
347 				{
348 					static if(dtorNames.length > 0)
349 						enum j = staticIndexOf!(ctorNames[i], dtorNames);
350 					else
351 						enum j = i;
352 
353 					static assert(size_t(j) < width, format(
354 						"couldn't find %s among %s",
355 						ctorNames[i], [dtorNames].join(", ")
356 					));
357 
358 					return U.Union.invInj!i(u).apply!(dtorFuncs[j])(args);
359 				}
360 
361 			assert(0);
362 		}
363 	}
364 }
365 
366 /*
367 	Classifies a value according to the first given predicate which it matches.
368 	The classification is represented by a union inhabited by the injection associated (either by name or declaration order) with the predicate that it passed.
369 	Strings can be intereleaved in the predicates to name the fields of the tuple. 
370 	If the value cannot be classified, an assertion is thrown. To define a default classification which cannot fail, return true from the last predicate.
371 */
372 template classify(preds...)
373 {
374   import std.typetuple;
375   import universal.meta;
376   import universal.core.apply;
377 
378   alias predNames = Filter!(isString, preds);
379   alias predFuncs = Filter!(isParameterized, preds);
380 
381 	template classify(A...)
382 	{
383 		alias U = Union!(DeclSpecs!(
384 			predNames, Repeat!(predFuncs.length, Universal!A)
385 		));
386 
387 		U classify(A args)
388 		{
389 			U u;
390 
391 			foreach(i, pred; predFuncs)
392 				if(pred(args))
393 					return u.inject!i(args);
394 
395 			assert(0, "no predicates match");
396 		}
397 	}
398 }
399 
400 /*
401 	Returns true if the union is inhavited by the specified injection. If the injection is identified by a number, this refers to its order of declaration in the union. Otherwise the injection can be identified by name.
402 */
403 template isCase(uint ctor)
404 {
405 	template isCase(U) if(is(U.Union))
406 	{
407 		enum i = ctor;
408 
409 		bool isCase(U u) { return u.tag == i; }
410 	}
411 }
412 template isCase(string ctor)
413 {
414 	import std.meta;
415 	import std.range;
416 	import std.algorithm.searching;
417 	import std.typecons;
418 
419 	template isCase(U) if(is(U.Union) && is(typeof((){ static assert(U.Union.ctorNames.length > 0); })))
420 	{
421 		enum i = [U.Union.ctorNames].countUntil!(name => name == ctor);
422 		static assert(i > -1, ctor~" is not a case of "~[U.Union.ctorNames].stringof);
423 
424 		bool isCase(U u) { return u.tag == i; }
425 	}
426 }
427 
428 /*
429 	Applies a function (more likely a template) to whichever value inhabits the union.
430 	This helper function is useful for meta things, like diagnostic printing.
431 */
432 template ulift(alias f)
433 {
434   import universal.meta;
435   import std.meta;
436 
437   template ulift(U, A...) if(is(U.Union))
438   {
439     auto ulift(U u, A a)
440     { return u.visit!(Repeat!(U.Union.width, f))(a); }
441   }
442 }