1 module universal.core.coproduct; 2 3 @("EXAMPLES") unittest 4 { 5 import std.stdio; 6 import std.exception; 7 8 auto u1 = Union!( 9 q{a}, int, 10 q{b}, int, 11 )(); 12 13 /* 14 default initializes to first constructor 15 */ 16 assert(u1.a == 0); 17 18 /* 19 accessing uninhabited ctor asserts 20 */ 21 assertThrown!Error(u1.b); 22 23 /* 24 capable of builder-like syntax 25 */ 26 auto u2 = typeof(u1)().a(100); 27 assert(u2.a == 100); 28 29 /* 30 the inhabited ctor is conveniently checkable 31 */ 32 assert(u2.isCase!q{a}); 33 assert(! u2.isCase!q{b}); 34 /* 35 note: the checked ctor must be a member of the union 36 */ 37 assert(!__traits(compiles, u2.isCase!q{none})); 38 39 /* 40 unions are writable in-place 41 */ 42 u1.b = 9; 43 assertThrown!Error(u1.a); 44 assert(u1.b == 9); 45 46 /* 47 so beware of accidental assignment 48 */ 49 auto u3 = u1.a(7); 50 assert(u3.a == 7); 51 assert(u1.a == 7); 52 53 /* 54 unions without named fields identify their injections by a numerical index. 55 */ 56 auto u4 = Union!(int, int, string)(); 57 u4.inj!0(2); 58 assert(u4.isCase!0); 59 u4.inj!2("hello"); 60 assert(u4.isCase!2); 61 62 /* 63 the generic universal coproduct function, visit 64 */ 65 assert(u1.visit!( 66 q{a}, a => 100 * a, 67 q{b}, b => 0, 68 ) == 700); 69 70 /* 71 unions can be visited without naming fields, in this case functions are matched to injections by declaration order 72 */ 73 assert(u4.visit!( 74 a => 100 * a, 75 a => 50 * a, 76 a => 0, 77 ) == 0); 78 79 /* 80 for expressiveness and type safety, custom unions are easy to build 81 using the Definition mixin also enables recursive data types 82 note: this example is a class for expository purposes 83 we could have just as easily done this with a struct and its pointer 84 */ 85 static class List(A) { mixin UnionInstance!(q{nil}, q{cons}, A, List); } 86 87 /* 88 functions over unions can then be declared as recursive aliases 89 */ 90 alias sum = visit!( 91 q{nil}, () => 0, 92 q{cons}, (y,ys) => y + sum(ys) 93 ); 94 95 /* 96 for convenience, we define helper ctors (generally a good practice in D) 97 */ 98 List!int nil() { return new List!int().nil; } 99 List!int cons(int x, List!int xs) { return new List!int().cons(x, xs); } 100 101 assert(sum(nil) == 0); 102 assert(sum(cons(42, nil)) == 42); 103 assert(sum(cons(1, cons(5, cons(7, nil)))) == 13); 104 105 /* 106 unions can also represent classification results 107 */ 108 with( 109 2.classify!( 110 q{odd}, a => a % 2, 111 q{even}, _=> true, 112 ) 113 ) assert(even); 114 115 with( 116 3.classify!( 117 q{odd}, a => a % 2, 118 q{even}, _=> true, 119 ) 120 ) assert(odd); 121 } 122 123 /* 124 This module implements the coproduct of normalized D types, here called Union, also known as disjoint union, tagged union, sum type, variant, Algebraic, or ADT. 125 126 A Union is basically a portable switch. Whereas Tuple contains several values at once, Union contains one value of several possible values. 127 128 This pattern is simple, useful, and commonplace. D doesn't quite support them out-of-the-box, though. There's the union keyword, which behaves like C's union, but it's only really useful when combined with a tag denoting which of the possible fields is actually inhabited. There's a std.variant module implementing this pattern, but it doesn't meet my needs for a number of reasons, which mainly revolve around expressiveness and introspective reasoning. 129 130 Expressiveness is important for unions for the same reason it's important for tuples - if you use them at all, you use them a lot. And unions pair well with tuples - with the former, you encapsulate "possibility" (specifically, a branch in your program), and with the latter, "multiplicity". Between the two, they can comfortably and concisely express a lot of ideas, and their simplicity (when taken individually) allows them to compose well. 131 132 In D, tuples are already a little awkward to use. I try to address that in the core.product module. I wanted to do the same for unions, but found that I couldn't really just add on top of Algebraic the way I did with Tuple. I want (1) unions to be as expressive as tuples, and (2) uniform expressive power with all functions involving unions and tuples. 133 134 First, to put "expressiveness" of a "pattern" on some solid ground, let's call it the number of instances of the template encapsulating that pattern. 135 136 In order to achieve goal (2), this implementation is based on the categorical product and coproduct. The definitions are the most minimal and generic definitions for these patterns, and so it stands to reason that, if uniform expressiveness is achieved throughout the definition, it can probably be extended to every other thing you could possibly do with it. 137 138 Tuple is an instance of a product while Algebraic and Union are coproducts. Products and corproducts are duals, which is probably why they pair so well together. The practical upshot of this is that they have a lot of inherent symmetries, and symmetries can be exploited to save work, complexity, and lines of code. 139 140 Looking at the diagrams in the definitions for product and coproduct on wikipedia, you can make the following identifications: 141 In both diagrams, X₁,X₂,... identify the template parameters to Tuple or Algebraic. 142 For the coproduct, the canonical injections are the constructors of Algebraic. f is visit, and f₁,f₂,... are the arguments to visit. 143 For the product, the canonical projections are indexing each of the tuple elements, f is adjoin, and f₁,f₂,... are the arguments to adjoin. 144 145 Again, the goal is to make those identifications to be equal in expressiveness. So, how expressive is Tuple? 146 There's one type of tuple for every possible list of types, at least. But in Phobos, tuples can have named fields, which are essentially aliasing the canonical projections. While otherwise identical tuples with different field names are practically isomorphic in D in many contexts, they do still represent a different type. So the tuple's expressiveness is equal to the number of lists of types, multiplied by every possible assignment of field names to the elements of those lists. 147 148 The templates Tuple and tuple can both be supplied named fields. But adjoin can't, and so there are less instances of adjoin than there are of tuple or Tuple. It falls short of its potential expressiveness. This is the reason for reimplementing it in core.product. 149 150 Algebraic also doesn't take field names, and so is immediately less expressive than Tuple. In addition, visit matches on types, and does not accept two functions with the same domain. This means that Algebraic's expressiveness is equal to the number of lists of types without repetition. Still infinite, but far less than Tuple. 151 152 So, if I were to lean heavily on Algebraic to be the union in a tuple/union combo, I wouldn't be able to propagate field name information, which hampers the scope of application for traits-based interfaces. In the case of something like known in advance to be important, like ".length" in range, the definition is propagated from the top down. But take, say, a range representing some signal sampled in time, which has a "frequency" property. You may want to pass it through stride to perform some rough resampling, but you will lose frequency information. To handle cases like this, you either need some uniformity, an incredibly complex introspection mechanism, or to resign yourself to writing boilerplate. 153 154 Algebraic also can't naturally support recursive data types; it uses This, a special symbol defined which signals to the implementation of Algebraic to replace it, during compilation, with typeof(this). It's not that big of a deal, but it adds a little extra weight and, if the union is implemented a different way, it is unnecessary. 155 156 To put achieve goal (1), visit's matching behavior is completely different. For unions with unnamed fields, Union!(X1,X2...), each X represents a different case, even if some Xi == Xj. visit!(f1,f2,...) then matches f1 to case X1, and so on. 157 Out-of-order matching is also possible. Unions can be given field names, which act as canonical injections when being set, and assert inhabitance upon being read. Each function argument of visit can be preceded by the injection's name, and this will override the index-based matching. 158 Total Union expressiveness is the number of lists of any types, multiplied by every assignment of field names. Equal to the tuple. 159 160 For convenience, if the fields are named, they act as delimeters for the types, meaning that n-ary injections (and by extension, cases) can be defined without repeating Tuple over and over (and then relying on the normalization system). 161 162 To enable natural recursive data type definitions (as well as uniquely typed unions), the Union definition is available as a mixin template, UnionInstance. The custom union type (or its pointer type) can be passed as a type argument as though it were defined directly in the custom type. Functions meant to operate on unions will also operate on the custom union, while preserving its type and field names when applicable. 163 164 PS. I think that the main reason Algebraic falls short of my needs is because it is conflating two processes that I need separated: matching functions to arguments, and representing a union type. The latter is accomplished here, while the former is accomplished in core.match. 165 166 */ 167 template Union(ctors...) 168 { 169 struct Union { mixin UnionInstance!ctors; } 170 } 171 mixin template UnionInstance(ctors...) 172 { 173 union { mixin(Union.CodeGen.declUnion); } 174 ubyte tag; 175 176 mixin(Union.CodeGen.declInjections); 177 178 static if(is(typeof(this) == struct)) 179 string toString() { return Union.toString(this); } 180 static if(is(typeof(this) == class)) 181 override string toString() { return Union.toString(this); } 182 183 static struct Union 184 { 185 import std.meta; 186 import universal.meta; 187 import std.typetuple; 188 import std.typecons; 189 import std.range; 190 import universal.core.product; 191 import universal.core.apply; 192 import std.conv : text; 193 import std.format : format; 194 195 static: 196 197 alias ctorNames = Filter!(isString, ctors); 198 enum width = ctorNames.length > 0? ctorNames.length : ctors.length; 199 200 template Args(uint i) 201 { 202 static if(ctorNames.length > 0) 203 { 204 enum idx(uint i) = staticIndexOf!(ctorNames[i], ctors); 205 206 static if(i == ctorNames.length-1) 207 alias Args = ctors[ idx!i +1 .. $ ]; 208 else 209 alias Args = ctors[ idx!i +1 .. idx!(i+1) ]; 210 } 211 else 212 { 213 alias Args = ctors[i]; 214 215 } 216 } 217 alias InArg(uint i) = Tuple!(Args!i); 218 alias OutArg(uint i) = Universal!(Args!i); 219 220 template inj(uint i) 221 { 222 template inj(U) if(is(U.Union == Union)) 223 { 224 auto ref U inj(auto ref U u, Args!i a) 225 { 226 mixin(text(q{u._}, i)) 227 = Tuple!(Args!i)(a); 228 229 u.tag = i; 230 231 return u; 232 } 233 } 234 } 235 template invInj(uint i) 236 { 237 template invInj(U) if(is(U.Union == Union)) 238 { 239 string errMsg(uint j) 240 { 241 static if(ctorNames.length > 0) 242 return format( 243 "%s accessed %s while inhabited by %s", 244 U.stringof, ctorNames[i], [ctorNames][j], 245 ); 246 else 247 return format( 248 "%s accessed case %d while inhabited by case %d", 249 U.stringof, i, j 250 ); 251 } 252 253 auto ref OutArg!i invInj(auto ref U u) 254 in{ assert(u.tag == i, errMsg(u.tag)); }body 255 { return mixin(text(q{u._}, i))[].identity; } 256 } 257 } 258 259 static string toString(U)(auto ref U u) 260 if(is(U.Union == Union)) 261 { 262 static if(ctorNames.length > 0) 263 auto header = text( 264 ctorNames.only[u.tag], ": ", 265 ); 266 else 267 enum header = ""; 268 269 return text(header, u.ulift!text); 270 } 271 272 struct CodeGen 273 { 274 import std.range: iota; 275 276 enum declUnion = declare!unionDecl; 277 enum declInjections = declare!injectionDecl; 278 279 enum declare(alias decl) = [staticMap!(decl, aliasSeqOf!(Union.width.iota))].join("\n"); 280 281 static unionDecl(uint i)() 282 { 283 return format(q{ 284 Union.InArg!%d _%d; 285 }, i, i); 286 } 287 static injectionDecl(uint i)() 288 { 289 static if(ctorNames.length > 0) 290 enum name = ctorNames[i]; 291 else 292 enum name = format(q{inj(uint i : %d)}, i); 293 294 auto injection = format(q{ 295 typeof(this) %s(Union.Args!%d args) 296 { return Union.inj!%d(this, args); } 297 }, name, i, i); 298 299 auto inverse = format(q{ 300 Union.OutArg!%d %s() 301 { return Union.invInj!%d(this); } 302 }, i, name, i); 303 304 return [ // avoid ambiguous setter definition for nullary ctors 305 injection, 306 InArg!i.length == 0 || is(OutArg!i == Unit)? 307 "" : inverse 308 ].join("\n"); 309 } 310 } 311 } 312 } 313 314 /* 315 Canonical injection function. For structs, injection constructs a new value. For class types, it operates in place. For in-place injection on some struct `x`, use `x.inj!i`. 316 */ 317 template inject(uint i) 318 { 319 template inject(U, A...) if(is(U.Union)) 320 { 321 U inject(U u, A a) 322 { return U.Union.inj!i(U(), a); } 323 } 324 } 325 326 /* 327 Universal property function. Takes a set of functions and applies the one which matches the inhabited value of the union. If the arguments to `visit` are interleaved with strings, the matching is performed against the union's field names. Otherwise, the functions are matched to injections by the order of their respective declarations (NOT by type as is the case with `std.variant.visit`). 328 */ 329 template visit(dtors...) 330 { 331 import std.typetuple; 332 import std.typecons; 333 import universal.meta; 334 import universal.core.apply; 335 import std.range; 336 337 alias dtorNames = Filter!(isString, dtors); 338 alias dtorFuncs = Filter!(isParameterized, dtors); 339 340 template visit(U, A...) if(is(U.Union)) 341 { 342 auto visit(U u, A args) 343 { 344 with(U.Union) 345 foreach(i; aliasSeqOf!(width.iota)) 346 if(i == u.tag) 347 { 348 static if(dtorNames.length > 0) 349 enum j = staticIndexOf!(ctorNames[i], dtorNames); 350 else 351 enum j = i; 352 353 static assert(size_t(j) < width, format( 354 "couldn't find %s among %s", 355 ctorNames[i], [dtorNames].join(", ") 356 )); 357 358 return U.Union.invInj!i(u).apply!(dtorFuncs[j])(args); 359 } 360 361 assert(0); 362 } 363 } 364 } 365 366 /* 367 Classifies a value according to the first given predicate which it matches. 368 The classification is represented by a union inhabited by the injection associated (either by name or declaration order) with the predicate that it passed. 369 Strings can be intereleaved in the predicates to name the fields of the tuple. 370 If the value cannot be classified, an assertion is thrown. To define a default classification which cannot fail, return true from the last predicate. 371 */ 372 template classify(preds...) 373 { 374 import std.typetuple; 375 import universal.meta; 376 import universal.core.apply; 377 378 alias predNames = Filter!(isString, preds); 379 alias predFuncs = Filter!(isParameterized, preds); 380 381 template classify(A...) 382 { 383 alias U = Union!(DeclSpecs!( 384 predNames, Repeat!(predFuncs.length, Universal!A) 385 )); 386 387 U classify(A args) 388 { 389 U u; 390 391 foreach(i, pred; predFuncs) 392 if(pred(args)) 393 return u.inject!i(args); 394 395 assert(0, "no predicates match"); 396 } 397 } 398 } 399 400 /* 401 Returns true if the union is inhavited by the specified injection. If the injection is identified by a number, this refers to its order of declaration in the union. Otherwise the injection can be identified by name. 402 */ 403 template isCase(uint ctor) 404 { 405 template isCase(U) if(is(U.Union)) 406 { 407 enum i = ctor; 408 409 bool isCase(U u) { return u.tag == i; } 410 } 411 } 412 template isCase(string ctor) 413 { 414 import std.meta; 415 import std.range; 416 import std.algorithm.searching; 417 import std.typecons; 418 419 template isCase(U) if(is(U.Union) && is(typeof((){ static assert(U.Union.ctorNames.length > 0); }))) 420 { 421 enum i = [U.Union.ctorNames].countUntil!(name => name == ctor); 422 static assert(i > -1, ctor~" is not a case of "~[U.Union.ctorNames].stringof); 423 424 bool isCase(U u) { return u.tag == i; } 425 } 426 } 427 428 /* 429 Applies a function (more likely a template) to whichever value inhabits the union. 430 This helper function is useful for meta things, like diagnostic printing. 431 */ 432 template ulift(alias f) 433 { 434 import universal.meta; 435 import std.meta; 436 437 template ulift(U, A...) if(is(U.Union)) 438 { 439 auto ulift(U u, A a) 440 { return u.visit!(Repeat!(U.Union.width, f))(a); } 441 } 442 }