Language Builder
This end-to-end guide walks you through building a complete new programming language with CDTk, from token declarations through to an SDXF bundle that the UAB engine can compile.
Overview
We'll build ArrowLang — a small functional language that compiles to Python. Features: function declarations (fn), let bindings, return, and arrow-typed expressions.
Step 1 — Create the Project
dotnet new classlib -n ArrowLang --framework net10.0
cd ArrowLang
dotnet add reference ../CSharp.CDTk/CSharp.CDTk.csproj
Step 2 — Declare Tokens & Roles
Create ArrowLangGrammar.cs. Declare all tokens as public static fields, most-specific first, and add a Map Structural for role assignments:
using CDTk;
public class ArrowLangGrammar : Grammar {
// Keywords (most-specific, declared first)
public static Token KW_FN = Kw("fn");
public static Token KW_LET = Kw("let");
public static Token KW_RETURN = Kw("return");
public static Token KW_IF = Kw("if");
public static Token KW_ELSE = Kw("else");
public static Token KW_INT = Kw("int");
public static Token KW_VOID = Kw("void");
// Multi-char operators first
public static Token ARROW = Op("->");
public static Token OP_EQ = Op("==");
public static Token OP_NEQ = Op("!=");
public static Token OP_LEQ = Op("<=");
public static Token PLUS = Op("+");
public static Token MINUS = Op("-");
public static Token STAR = Op("*");
public static Token ASSIGN = Op("=");
public static Token LT = Op("<");
// Delimiters
public static Token LBRACE = Punct("{");
public static Token RBRACE = Punct("}");
public static Token LPAREN = Punct("(");
public static Token RPAREN = Punct(")");
public static Token COMMA = Punct(",");
public static Token SEMI = Punct(";");
public static Token COLON = Punct(":");
// Catch-all (lowest priority)
public static Token IDENT = Id();
public static Token INT = Num();
public static Token STR = Str();
// Structural roles — auto-applied by CDTk
public static Map Structural = new() {
{ KW_FN, "FuncKeyword" },
{ KW_RETURN, "ReturnKeyword" },
{ KW_IF, "IfKeyword" },
{ KW_ELSE, "ElseKeyword" },
{ KW_INT, "TypeKeyword" },
{ KW_VOID, "TypeKeyword" },
};
}
Step 3 — Define Grammar Rules
Add rules as public static Rule fields. Use Ref() for recursive or forward references, and set RootRule:
// Expression (right-recursive)
public static Rule ExprRule = Alt(
Seq(IDENT, PLUS, Ref(() => ExprRule)),
Seq(IDENT, MINUS, Ref(() => ExprRule)),
Seq(IDENT, STAR, Ref(() => ExprRule)),
Seq(IDENT, OP_EQ, Ref(() => ExprRule)),
Seq(IDENT, LT, Ref(() => ExprRule)),
Seq(LPAREN, Ref(() => ExprRule), RPAREN),
INT,
IDENT
);
// Let binding: let x = expr;
public static Rule LetStmt = Seq(KW_LET, IDENT, ASSIGN, ExprRule, SEMI);
// Return statement: return expr;
public static Rule RetStmt = Seq(KW_RETURN, ExprRule, SEMI);
// If/else
public static Rule IfStmt = Seq(
KW_IF, LPAREN, ExprRule, RPAREN, Ref(() => Block),
Opt(Seq(KW_ELSE, Ref(() => Block)))
);
// Statement
public static Rule Statement = Alt(LetStmt, RetStmt, IfStmt);
// Block: { stmt* }
public static Rule Block = Seq(LBRACE, Rep(Statement), RBRACE);
// Parameter: type name
public static Rule Param = Seq(Alt(KW_INT, IDENT), IDENT);
public static Rule Params = Opt(Seq(Param, Rep(Seq(COMMA, Param))));
// Function: fn name(params): rettype { body }
public static Rule FnDecl = Seq(
KW_FN, IDENT, LPAREN, Params, RPAREN,
Opt(Seq(ARROW, Alt(KW_INT, KW_VOID, IDENT))),
Block
);
// Program is one or more function declarations
public static Rule Program = Rep1(FnDecl);
public override Rule RootRule => Program;
Step 4 — Implement Render()
Override Render() to emit Python from the translated SemanticTable:
public override string Render(SemanticTable table) {
var sb = new StringBuilder();
foreach (var fn in table.Morphisms) {
// Python def with comma-separated params
sb.AppendLine($"def {fn.Name}({fn.Domain}):");
// Indent body lines
foreach (var line in fn.Body.Split('\n',
StringSplitOptions.RemoveEmptyEntries))
sb.AppendLine($" {line.Trim()}");
sb.AppendLine();
}
return sb.ToString();
}
Step 5 — Wire Up the Compiler
Create a Program.cs that reads ArrowLang source and translates it to Python:
using CDTk;
var source = File.ReadAllText("hello.arrow");
var python = Compiler.CompileText(
new ArrowLangGrammar(),
new PythonGrammar(),
source
);
Console.Write(python);
Step 6 — Bundle as SDXF
To distribute your language for use with the UAB engine without shipping C# DLLs, serialize the grammar pair as an SDXF binary:
// Serialize input + output grammar to SDXF binary
var encoder = new SdfxEncoder(new ArrowLangGrammar(), new PythonGrammar());
byte[] bundle = encoder.Encode(sourceFiles);
File.WriteAllBytes("arrowlang.sdxf", bundle);
// Or just get the grammar bytes for embedding:
byte[] grammarBytes = Compiler.EncodeGrammarSdfx(new ArrowLangGrammar());
Step 7 — Test the Pipeline
Write a smoke test. CDTk's built-in testing infrastructure makes it easy to verify round-trips:
// Input ArrowLang source
var input = """
fn add(int a, int b) -> int {
return a + b;
}
""";
// Translate to Python
var py = Compiler.CompileText(new ArrowLangGrammar(), new PythonGrammar(), input);
// Verify output contains expected Python constructs
Debug.Assert(py.Contains("def add"));
Debug.Assert(py.Contains("return a + b"));
Console.WriteLine("✓ ArrowLang → Python");
Console.WriteLine(py);
Complete Grammar Class
The full source is available on GitHub in the Testing/Grammars/ directory as examples for C#, Python, WASM, and LLVM IR. Each grammar is ≈200 lines.