File: Grammar\GrammarGenerator.vb
Web Access
Project: src\src\Tools\Source\CompilerGeneratorTools\Source\VisualBasicSyntaxGenerator\VisualBasicSyntaxGenerator.vbproj (VisualBasicSyntaxGenerator)
' Licensed to the .NET Foundation under one or more agreements.
' The .NET Foundation licenses this file to you under the MIT license.
' See the LICENSE file in the project root for more information.
 
Imports System.Collections.Immutable
Imports System.Reflection
Imports System.Text.RegularExpressions
Imports Microsoft.CodeAnalysis.VisualBasic.Internal.VBSyntaxGenerator.Microsoft.CodeAnalysis.VisualBasic
Imports Microsoft.CodeAnalysis.VisualBasic.Internal.VBSyntaxGenerator.Microsoft.CodeAnalysis.VisualBasic.Symbols
 
Friend Class GrammarGenerator
    Inherits WriteUtils
 
    Private ReadOnly _seen As New HashSet(Of String)()
 
    Private _rules As Dictionary(Of String, List(Of Production))
    Private _lexicalRules As ImmutableArray(Of String)
    Private _majorRules As ImmutableArray(Of String)
 
    Public Sub New(parseTree As ParseTree)
        MyBase.New(parseTree)
    End Sub
 
    Friend Function Run() As String
        ' Syntax.xml refers to a special pseudo-element 'Modifier'.  Synthesize that for the grammar.
        Dim modifiers = GetMembers(Of DeclarationModifiers)().
            Select(Function(m) m.ToString() + "Keyword").Where(Function(n) GetSyntaxKind(n) <> SyntaxKind.None).ToList()
        Dim modifiersString = String.Join("|", modifiers.OrderBy(Function(a) a))
 
        _parseTree.NodeStructures.Add("Modifier", New ParseNodeStructure(
            New XElement("node-structure",
                         New XAttribute("name", "Modifier"),
                         New XAttribute("parent", "VisualBasicSyntaxNode"),
                         New XElement(
                            XName.Get("child", "http://schemas.microsoft.com/VisualStudio/Roslyn/Compiler"),
                            New XAttribute("name", "Keyword"),
                            New XAttribute("kind", modifiersString))), _parseTree))
 
        _rules = Me._parseTree.NodeStructures.Values.ToDictionary(
            Function(n) n.Name, Function(n) New List(Of Production)())
 
        Dim nonTokens = _parseTree.NodeStructures.Values.Where(Function(n) Not n.IsToken)
        Dim tokens = _parseTree.NodeStructures.Values.Where(Function(n) n.IsToken)
 
        ' Initial pass.  Set up base/inheritance rules and break out compound production
        ' rules so they're available for other productions to reference.
        Dim compoundNodes = New List(Of ParseNodeStructure)
 
        For Each structureNode In nonTokens.Concat(tokens)
            Dim parent = structureNode.ParentStructure
            If parent IsNot Nothing Then
                _rules(parent.Name).Add(RuleReference(structureNode.Name))
            End If
 
            ' VB syntax has a ton of nodes that are effectively 'compound' nodes.  i.e. there is
            ' EndBlockStatement (which is referenced by tons of other nodes, like EnumDeclaration,
            ' ClassDeclaration, WhileStatement, etc. etc.)  However, all these nodes reference a
            ' specific node-kind specialization of that compound node.  These specializations are
            ' what spell out that while EndBlock is defined as `'End' ('Class' | 'Enum' | 'Structure
            ' | ...)` that there are really N different specializations like `'End' 'Class'` `'End'
            ' 'Enum'` etc.  In these cases to make the generated grammar much nicer, we emit as those
            ' individual productions.
            '
            ' So, we first check if this production actually defines N different sub-kinds
            If structureNode.NodeKinds.Count >= 2 Then
                ' Then we see if there are children that having a matching number of kinds. When
                ' this happens, it's the case that each kind the child could be corresponds 1:1
                ' with all the different sub-kinds of hte production.
                Dim correspondingChildren = structureNode.Children.Where(
                    Function(c)
                        Dim childKinds = TryCast(c.ChildKind, List(Of ParseNodeKind))
                        Return childKinds IsNot Nothing AndAlso childKinds.Count = structureNode.NodeKinds.Count
                    End Function).ToArray()
 
                If correspondingChildren.Length > 0 Then
                    compoundNodes.Add(structureNode)
 
                    Dim correspondingChildrenKinds = correspondingChildren.ToDictionary(
                        Function(c) c,
                        Function(c) DirectCast(c.ChildKind, List(Of ParseNodeKind)))
 
                    ' We then iterate each different kind, pickin out the specific subkinds of of
                    ' each child that correspond to it.
                    For i = 0 To structureNode.NodeKinds.Count - 1
                        Dim nodeKind = structureNode.NodeKinds(i)
                        Dim local_i = i
                        Dim mappedChildren = structureNode.Children.Select(
                        Function(c)
                            If correspondingChildren.Contains(c) Then
                                Return c.WithChildKind(
                                    If(c.ChildKind(nodeKind.Name),
                                       correspondingChildrenKinds(c)(local_i)))
                            End If
 
                            Return c
                        End Function).ToList()
 
                        ' We then emit a production for the child kind.
                        _rules.Add(nodeKind.Name, New List(Of Production)())
                        _rules(nodeKind.Name).Add(HandleChildren(structureNode, mappedChildren))
 
                        ' And then point the top level production at each of these child productions.
                        _rules(structureNode.Name).Add(RuleReference(nodeKind.Name))
                    Next
                End If
            End If
        Next
 
        For Each structureNode In nonTokens.Concat(tokens)
            ' ignore compound nodes.  we handled it in the above loop.
            If compoundNodes.Contains(structureNode) Then
                Continue For
            End If
 
            If Not structureNode.Abstract Then
                Dim children = GetAllChildrenOfStructure(structureNode)
 
                ' Convert rules Like `a: (x | y) ...` into:
                ' a: x ...
                '  | y ...
                If children.Count > 0 Then
                    Dim child = children(0)
                    Dim kinds = TryCast(child.ChildKind, List(Of ParseNodeKind))
                    If kinds IsNot Nothing Then
                        Dim childStructure = GetCommonStructure(kinds)
                        If childStructure.IsToken Then
                            For Each kind In kinds
                                _rules(structureNode.Name).Add(HandleChildren(structureNode, children.Select(
                                    Function(c) If(c Is child, child.WithChildKind(kind), c))))
                            Next
                            Continue For
                        End If
                    End If
 
                    _rules(structureNode.Name).Add(HandleChildren(structureNode, children))
                End If
            End If
        Next
 
        ' Clear out this production as it's not really helpful to have a rule
        ' that points outward to all other tokens
        _rules("SkippedTokensTriviaSyntax").Clear()
        _rules("SkippedTokensTriviaSyntax").Add(RuleReference("SyntaxToken").Suffix("*"))
 
        _rules("PunctuationSyntax").Clear()
 
        ' The grammar will bottom out with certain lexical productions. Create rules for these.
        Dim lexicalRules = _rules.Values.SelectMany(Function(ps) ps).SelectMany(Function(p) p.ReferencedRules).
            Where(Function(r)
                      Dim productions As List(Of Production) = Nothing
                      Return Not _rules.TryGetValue(r, productions) OrElse productions.Count = 0
                  End Function).ToArray()
        For Each name In lexicalRules
            _rules(name) = New List(Of Production) From {New Production("/* see lexical specification */")}
        Next
 
        ' The grammar will bottom out with certain lexical productions. Create rules for these.
        _lexicalRules = tokens.Select(Function(t) t.Name).ToImmutableArray()
 
        ' Define a few major sections to help keep the grammar file naturally grouped.
        _majorRules = ImmutableArray.Create(
            "CompilationUnitSyntax", "StatementSyntax", "ExpressionSyntax", "TypeSyntax", "NameSyntax", "XmlNodeSyntax", "StructuredTriviaSyntax",
            "Modifier", "SyntaxToken", "PunctuationSyntax", "EmptyToken")
 
        Dim result = "// <auto-generated />" + Environment.NewLine + "grammar vb;" + Environment.NewLine
 
        ' Handle each major section first And then walk any rules Not hit transitively from them.
        For Each rule In _majorRules.Concat(_rules.Keys.OrderBy(Function(a) a))
            ProcessRule(rule, result)
        Next
 
        Return result
    End Function
 
    Private Shared Function Join(delim As String, productions As IEnumerable(Of Production)) As Production
        Return New Production(
            String.Join(delim, productions.Where(Function(p) p.Text.Length > 0)),
            productions.SelectMany(Function(p) p.ReferencedRules))
    End Function
 
    Private Function HandleChildren(structureNode As ParseNodeStructure, children As IEnumerable(Of ParseNodeChild), Optional delim As String = " ") As Production
        Return Join(delim, children.Select(
                    Function(child) HandleField(structureNode, child)))
    End Function
 
    Private Function HandleField(structureNode As ParseNodeStructure, child As ParseNodeChild) As Production
        If child.IsSeparated Then
            Return HandleSeparatedList(structureNode, child)
        ElseIf child.IsList Then
            Return HandleList(structureNode, child)
        Else
            Return HandleChildKind(structureNode, child, child.ChildKind).Suffix("?", [when]:=child.IsOptional)
        End If
    End Function
 
    Private Function HandleSeparatedList(structureNode As ParseNodeStructure, child As ParseNodeChild) As Production
        Dim childProduction = HandleChildKind(structureNode, child, child.ChildKind)
        Dim separatorProd = HandleChildKind(structureNode, child, child.SeparatorsKind)
 
        Return childProduction.Suffix(" (" + separatorProd.Text + " " + childProduction.Text + ")").
            Suffix("*", [when]:=child.MinCount < 2).Suffix("+", [when]:=child.MinCount >= 2).
            Parenthesize([when]:=child.MinCount = 0).Suffix("?", [when]:=child.MinCount = 0)
    End Function
 
    Private Function HandleList(structureNode As ParseNodeStructure, child As ParseNodeChild) As Production
        Return HandleChildKind(structureNode, child, child.ChildKind).Suffix("*")
    End Function
 
    Private Function HandleChildKind(structureNode As ParseNodeStructure,
                                     child As ParseNodeChild,
                                     childKind As Object) As Production
        If child.Name = "Modifiers" Then
            Return RuleReference("Modifier")
        End If
 
        ' VB syntax model is interesting, it will have base types that support certain kinds of
        ' child tokens/nodes.  Then, it will have derived types that then support a subset of those
        ' kinds.  For example, CastExpressionSyntax says that its keyword can be
        ' CType/DirectCast/TryCast.  However, there is information in Syntax.xml that says that if
        ' you have a DirectCastExpressionSyntax that the keyword is DirectCast only.  So when we're
        ' emitting the code for direct_cast_expression we don't want to say that it starts with
        ' 'DirectCast' and not either of the other keywords.
        Dim mappedKinds = GetMappedKinds(structureNode, child)
        Dim nodeKind = If(mappedKinds.nodeKind, TryCast(childKind, ParseNodeKind))
        Dim nodeKindList = If(mappedKinds.nodeKinds, TryCast(childKind, List(Of ParseNodeKind)))
 
        If nodeKind IsNot Nothing Then
            Return HandleNodeKind(nodeKind)
        ElseIf nodeKindList IsNot Nothing Then
            Dim common = GetCommonStructure(nodeKindList)
            If common.IsRoot Then
                Dim commons = nodeKindList.Select(Function(n) GetNonRootParent(n))
                Return Join(" | ",
                    commons.Select(Function(c) RuleReference(c.Name)).Distinct()).Parenthesize()
            ElseIf common IsNot Nothing AndAlso Not common.IsToken Then
                Return RuleReference(common.Name)
            Else
                Return Join(" | ",
                    nodeKindList.Select(Function(nk) HandleNodeKind(nk)).Distinct()).Parenthesize()
            End If
        End If
 
        Throw New NotImplementedException()
    End Function
 
    Private Function GetMappedKinds(structureNode As ParseNodeStructure,
                                    child As ParseNodeChild) As (nodeKind As ParseNodeKind, nodeKinds As List(Of ParseNodeKind))
        ' Ensure certain deferred collections have been computed.
        Dim unused1 = child.ChildKind()
        Dim unused2 = child.ChildKind("")
 
        If structureNode.NodeKinds IsNot Nothing Then
            If structureNode.NodeKinds.Count = 1 Then
                If child.KindForNodeKind IsNot Nothing Then
                    Dim kind As ParseNodeKind = Nothing
                    If child.KindForNodeKind.TryGetValue(structureNode.NodeKinds(0).Name, kind) Then
                        Return (kind, Nothing)
                    End If
                End If
            ElseIf structureNode.NodeKinds.Count > 1 Then
                Dim kindList = New List(Of ParseNodeKind)()
                For Each structureNodeKind In structureNode.NodeKinds
                    Dim kind As ParseNodeKind = Nothing
                    If child.KindForNodeKind.TryGetValue(structureNodeKind.Name, kind) Then
                        kindList.Add(kind)
                    End If
                Next
 
                If kindList.Count > 0 Then
                    Return (Nothing, kindList)
                End If
            End If
        End If
 
        Return Nothing
    End Function
 
    Private Function GetNonRootParent(n As ParseNodeKind) As ParseNodeStructure
        Dim current = n.NodeStructure
        While Not current.ParentStructure.IsRoot
            current = current.ParentStructure
        End While
 
        Return current
    End Function
 
    Private Function HandleNodeKind(nodeKind As ParseNodeKind) As Production
        If Not String.IsNullOrEmpty(nodeKind.TokenText) Then
            If nodeKind.TokenText = "\" Then
                Return New Production("'\\'")
            ElseIf nodeKind.TokenText = "'" Then
                Return New Production("'\''")
            Else
                Return New Production("'" + nodeKind.TokenText + "'")
            End If
        End If
 
        If nodeKind.Name = "EmptyToken" Then
            Return RuleReference("EmptyToken")
        ElseIf nodeKind.Name = "EndOfFileToken" Then
            Return New Production("")
        End If
 
        If _rules.ContainsKey(nodeKind.Name) Then
            Return RuleReference(nodeKind.Name)
        End If
 
        'If nodeKind.NodeStructure.Name = "EndBlockStatementSyntax" Then
        '    Return RuleReference(nodeKind.Name)
        'End If
 
        Return RuleReference(nodeKind.NodeStructure.Name)
    End Function
 
    Private Sub ProcessRule(name As String, ByRef result As String)
        If name <> "VisualBasicSyntaxNode" AndAlso _seen.Add(name) Then
            Dim sorted = _rules(name).Distinct().OrderBy(Function(v) v)
            If sorted.Any() Then
                result += Environment.NewLine + RuleReference(name).Text + Environment.NewLine + "  : " +
                          String.Join(Environment.NewLine + "  | ", sorted) + Environment.NewLine + "  ;" + Environment.NewLine
 
                ' Now proceed in depth-first fashion through the referenced rules to keep related rules
                ' close by. Don't recurse into major-sections to help keep them separated in grammar file.
                Dim references = sorted.SelectMany(Function(t) t.ReferencedRules)
                For Each referencedRule In references.Where(Function(r) Not _majorRules.Concat(_lexicalRules).Contains(r))
                    ProcessRule(referencedRule, result)
                Next
            End If
        End If
    End Sub
 
    Private Function RuleReference(name As String) As Production
        Dim trimmed = If(name.EndsWith("Syntax"), name.Substring(0, name.Length - "Syntax".Length), name)
        Return New Production(
            s_normalizationRegex.Replace(trimmed, "_").ToLower(),
            ImmutableArray.Create(name))
    End Function
 
    Private Shared Function GetSyntaxKind(name As String) As SyntaxKind
        Return GetMembers(Of SyntaxKind)().Where(Function(k) k.ToString() = name).SingleOrDefault()
    End Function
 
    Private Shared Function GetMembers(Of TEnum)() As IEnumerable(Of TEnum)
        Return GetType(TEnum).GetFields(BindingFlags.Public Or BindingFlags.Static).
            Select(Function(f) f.GetValue(Nothing)).OfType(Of TEnum)()
    End Function
 
    ' Converts a PascalCased name into snake_cased name.
    Private Shared ReadOnly s_normalizationRegex As New Regex(
        "(?<=[A-Z])(?=[A-Z][a-z]) | (?<=[^A-Z])(?=[A-Z]) | (?<=[A-Za-z])(?=[^A-Za-z])",
        RegexOptions.IgnorePatternWhitespace Or RegexOptions.Compiled)
 
    Friend Structure Production
        Implements IComparable(Of Production)
 
        Public ReadOnly Text As String
        Public ReadOnly ReferencedRules As ImmutableArray(Of String)
 
        Public Sub New(text As String, Optional referencedRules As IEnumerable(Of String) = Nothing)
            Me.Text = text
            Me.ReferencedRules = If(referencedRules Is Nothing, ImmutableArray(Of String).Empty, referencedRules.ToImmutableArray())
        End Sub
 
        Public Overrides Function Equals(obj As Object) As Boolean
            Return Text = DirectCast(obj, Production).Text
        End Function
 
        Public Overrides Function ToString() As String
            Return Text
        End Function
 
        Public Function CompareTo(other As Production) As Integer Implements IComparable(Of Production).CompareTo
            Return StringComparer.Ordinal.Compare(Me.Text, other.Text)
        End Function
 
        Public Function Prefix(val As String) As Production
            Return New Production(val + Me.Text, ReferencedRules)
        End Function
 
        Public Function Suffix(val As String, Optional [when] As Boolean = True) As Production
            Return If([when], New Production(Me.Text + val, ReferencedRules), Me)
        End Function
 
        Public Function Parenthesize(Optional [when] As Boolean = True) As Production
            Return If([when], Prefix("(").Suffix(")"), Me)
        End Function
    End Structure
End Class
 
Namespace Global.Microsoft.CodeAnalysis.VisualBasic
    Partial Friend Class GreenNode
        Public Const ListKind = 1
    End Class
End Namespace