Operator (!==) winning over keyword identifier (!==:)

Elixir allows keyword identifiers (the word + colon like in JSON syntax) to use operators, like `!==` as the identifier before the colon, `:`, but my current bnf is parsing the !== as just an operator instead of the combine `!==:`.  Here is the Elixir.bnf:

{   parserClass="org.elixir_lang.parser.ElixirParser"   extends="com.intellij.extapi.psi.ASTWrapperPsiElement"   extends("atom|(at|binary|capture|unary)Operation|keywordIdentifier|value")=expression   extends("(addition|and|arrow|association|comparison|hat|inMatch|match|multiplication|or|pipe|relational|stab|two|type|when)Operation")=binaryOperation   psiClassPrefix="Elixir"   psiImplClassSuffix="Impl"   psiPackage="org.elixir_lang.psi"   psiImplPackage="org.elixir_lang.psi.impl"   elementTypeHolderClass="org.elixir_lang.psi.ElixirTypes"   elementTypeClass="org.elixir_lang.psi.ElixirElementType"   tokenTypeClass="org.elixir_lang.psi.ElixirTokenType"   tokens = [     COMMENT = "regexp:#[^\r\n]*(\n|\r|\r\n)?"   ] } // expressionList is optional to handle code-less file that contains only EOL between blank lines and order comment // lines elixirFile ::= EOL* (expressionList EOL*)? // In alphabetical order /* Unlike other binary operation, additionOperations cannot begin with EOLs: if there are EOLs, then the +/-    is interpreted as unaryOperation */ additionOperation ::= expression DUAL_OPERATOR EOL* expression andOperation ::= expression EOL* AND_OPERATOR EOL* expression arrowOperation ::= expression EOL* ARROW_OPERATOR EOL* expression associationOperation ::= expression EOL* ASSOCIATION_OPERATOR EOL* expression { rightAssociative = true } atom ::= COLON (ATOM_FRAGMENT | quote) atOperation ::= AT_OPERATOR EOL* expression fake binaryOperation ::= expression + {   methods = [     left = "/expr[0]"  // will be @NotNull as far as we have "+" in the expression     right = "/expr[1]" // "expr" is the name of the auto-calculated child property (singular or list)   ] } captureOperation ::= CAPTURE_OPERATOR EOL* expression charList ::= CHAR_LIST_PROMOTER              interpolatedCharListBody              CHAR_LIST_TERMINATOR charListHeredoc ::= CHAR_LIST_HEREDOC_PROMOTER EOL                     interpolatedCharListBody                     CHAR_LIST_HEREDOC_TERMINATOR comparisonOperation ::= expression EOL* COMPARISON_OPERATOR EOL* expression /* order of choices is lower precedence first as lower precedence operation are meant to enclose higher precedence    operators.    @see https://github.com/elixir-lang/elixir/blob/de39bbaca277002797e52ffbde617ace06233a2b/lib/elixir/src/elixir_parser.yrl#L44-L71 */ expression ::= stabOperation |                captureOperation |                inMatchOperation |                whenOperation |                typeOperation |                pipeOperation |                associationOperation |                matchOperation |                orOperation |                andOperation |                comparisonOperation |                relationalOperation |                arrowOperation |                twoOperation |                additionOperation |                multiplicationOperation |                hatOperation |                unaryOperation |                atOperation |                // TODO remove from expressions once containers are implemented                keywordIdentifier |                value private expressionList ::= expression (EOL+ expression)* hatOperation ::= expression EOL* HAT_OPERATOR EOL* expression inMatchOperation ::= expression EOL* IN_MATCH_OPERATOR EOL* expression private interpolatedCharListSigil ::= TILDE INTERPOLATING_CHAR_LIST_SIGIL_NAME CHAR_LIST_SIGIL_PROMOTER interpolatedCharListBody CHAR_LIST_SIGIL_TERMINATOR private interpolatedCharListBody ::= (interpolation | CHAR_LIST_FRAGMENT | VALID_ESCAPE_SEQUENCE)* private interpolatedHeredocCharListSigil ::= TILDE INTERPOLATING_CHAR_LIST_SIGIL_NAME CHAR_LIST_SIGIL_HEREDOC_PROMOTER EOL                                              interpolatedCharListBody                                              CHAR_LIST_SIGIL_HEREDOC_TERMINATOR private interpolatedHeredocRegex ::= TILDE INTERPOLATING_REGEX_SIGIL_NAME REGEX_HEREDOC_PROMOTER EOL                                      interpolatedRegexBody                                      REGEX_HEREDOC_TERMINATOR SIGIL_MODIFIER* private interpolatedHeredocSigil ::= TILDE INTERPOLATING_SIGIL_NAME SIGIL_HEREDOC_PROMOTER EOL                                      interpolatedSigilBody                                      SIGIL_HEREDOC_PROMOTER SIGIL_MODIFIER* private interpolatedHeredocStringSigil ::= TILDE INTERPOLATING_STRING_SIGIL_NAME STRING_SIGIL_HEREDOC_PROMOTER EOL                                            interpolatedStringBody                                            STRING_SIGIL_HEREDOC_TERMINATOR private interpolatedHeredocWords ::= TILDE INTERPOLATING_WORDS_SIGIL_NAME WORDS_HEREDOC_PROMOTER EOL                                      interpolatedWordsBody                                      WORDS_HEREDOC_TERMINATOR SIGIL_MODIFIER* private interpolatedRegex ::= TILDE INTERPOLATING_REGEX_SIGIL_NAME REGEX_PROMOTER interpolatedRegexBody REGEX_TERMINATOR SIGIL_MODIFIER* private interpolatedRegexBody ::= (interpolation | REGEX_FRAGMENT | VALID_ESCAPE_SEQUENCE)* private interpolatedSigil ::= TILDE INTERPOLATING_SIGIL_NAME SIGIL_PROMOTER interpolatedSigilBody SIGIL_TERMINATOR SIGIL_MODIFIER* private interpolatedSigilBody ::= (interpolation | SIGIL_FRAGMENT | VALID_ESCAPE_SEQUENCE)* private interpolatedStringSigil ::= TILDE INTERPOLATING_STRING_SIGIL_NAME STRING_SIGIL_PROMOTER interpolatedStringBody STRING_SIGIL_TERMINATOR private interpolatedStringBody ::=  (interpolation | STRING_FRAGMENT | VALID_ESCAPE_SEQUENCE)* private interpolatedWordsBody ::= (interpolation | WORDS_FRAGMENT | VALID_ESCAPE_SEQUENCE)* interpolation ::= INTERPOLATION_START expressionList? INTERPOLATION_END keywordIdentifier ::= (ALIAS | BIT_STRING_OPERATOR | COMPARISON_OPERATOR | IDENTIFIER | MAP_OPERATOR | quote | TUPLE_OPERATOR) COLON private literalCharListBody ::= CHAR_LIST_FRAGMENT* private literalCharListSigil ::= TILDE LITERAL_CHAR_LIST_SIGIL_NAME CHAR_LIST_SIGIL_PROMOTER literalCharListBody CHAR_LIST_SIGIL_TERMINATOR private literalHeredocRegex ::= TILDE LITERAL_REGEX_SIGIL_NAME REGEX_HEREDOC_PROMOTER EOL                                 literalRegexBody                                 REGEX_HEREDOC_TERMINATOR SIGIL_MODIFIER* private literalHeredocSigil ::= TILDE LITERAL_SIGIL_NAME SIGIL_HEREDOC_PROMOTER EOL                                 literalSigilBody                                 SIGIL_HEREDOC_TERMINATOR SIGIL_MODIFIER* private literalHeredocStringSigil ::= TILDE LITERAL_STRING_SIGIL_NAME STRING_SIGIL_HEREDOC_PROMOTER EOL                                       literalStringBody                                       STRING_SIGIL_HEREDOC_TERMINATOR private literalHeredocWords ::= TILDE LITERAL_WORDS_SIGIL_NAME WORDS_HEREDOC_PROMOTER EOL                                 literalWordsBody                                 WORDS_HEREDOC_TERMINATOR SIGIL_MODIFIER* private literalRegex ::= TILDE LITERAL_SIGIL_NAME REGEX_PROMOTER literalRegexBody REGEX_TERMINATOR SIGIL_MODIFIER* private literalRegexBody ::= REGEX_FRAGMENT* private literalSigil ::= TILDE LITERAL_SIGIL_NAME SIGIL_PROMOTER literalSigilBody SIGIL_TERMINATOR SIGIL_MODIFIER* private literalSigilBody ::= SIGIL_FRAGMENT* private literalStringBody ::= STRING_FRAGMENT* private literalStringSigil ::= TILDE LITERAL_STRING_SIGIL_NAME STRING_SIGIL_PROMOTER literalStringBody STRING_SIGIL_TERMINATOR private literalWords ::=  TILDE LITERAL_SIGIL_NAME WORDS_PROMOTER literal WORDS_TERMINATOR SIGIL_MODIFIER* private literalWordsBody ::= WORDS_FRAGMENT* matchOperation ::= expression EOL* MATCH_OPERATOR EOL* expression { rightAssociative = true } multiplicationOperation ::= expression EOL* MULTIPLICATION_OPERATOR EOL* expression orOperation ::= expression EOL* OR_OPERATOR EOL* expression pipeOperation ::= expression EOL* PIPE_OPERATOR EOL* expression { rightAssociative = true } private quote ::= (charList | string) relationalOperation ::= expression EOL* RELATIONAL_OPERATOR EOL* expression sigil ::= interpolatedCharListSigil |           interpolatedHeredocCharListSigil |           interpolatedHeredocRegex |           interpolatedHeredocSigil |           interpolatedHeredocStringSigil |           interpolatedHeredocWords |           interpolatedRegex |           interpolatedSigil |           interpolatedStringSigil |           literalCharListSigil |           literalHeredocRegex |           literalHeredocSigil |           literalHeredocStringSigil |           literalHeredocWords |           literalRegex |           literalSigil |           literalStringSigil |           literalWords stabOperation ::= expression EOL* STAB_OPERATOR EOL* expression { rightAssociative = true } string ::= STRING_PROMOTER            interpolatedStringBody            STRING_TERMINATOR stringHeredoc ::= STRING_HEREDOC_PROMOTER EOL                   interpolatedStringBody                   STRING_HEREDOC_TERMINATOR twoOperation ::= expression EOL* TWO_OPERATOR EOL* expression { rightAssociative = true } typeOperation ::= expression EOL* TYPE_OPERATOR EOL* expression { rightAssociative = true } unaryOperation ::= (DUAL_OPERATOR | UNARY_OPERATOR) EOL* expression value ::= ALIAS | atom | BIT_STRING_OPERATOR | CHAR_TOKEN | NUMBER | charListHeredoc | IDENTIFIER | MAP_OPERATOR | quote | sigil | stringHeredoc | TUPLE_OPERATOR whenOperation ::= expression EOL* WHEN_OPERATOR EOL* expression { rightAssociative = true }


Here's the test file:

<<>>: # <three token operator>: !==: #&&&:


and the PSI tree

Elixir File(0,43)   ElixirComparisonOperationImpl(COMPARISON_OPERATION)(0,36)     ElixirKeywordIdentifierImpl(KEYWORD_IDENTIFIER)(0,5)       PsiElement(ElixirTokenType.BIT_STRING_OPERATOR)('<<>>')(0,4)       PsiElement(ElixirTokenType.COLON)(':')(4,5)     PsiElement(ElixirTokenType.EOL)('\n')(5,6)     PsiElement(ElixirTokenType.EOL)('\n')(6,7)     PsiComment(ElixirTokenType.COMMENT)('# <three token operator>:')(7,32)     PsiElement(ElixirTokenType.EOL)('\n')(32,33)     PsiElement(ElixirTokenType.COMPARISON_OPERATOR)('!==')(33,36)   PsiElement(ElixirTokenType.COLON)(':')(36,37)   PsiErrorElement:ElixirTokenType.ATOM_FRAGMENT, ElixirTokenType.CHAR_LIST_PROMOTER or ElixirTokenType.STRING_PROMOTER expected, got ' '(37,38)     PsiElement(ElixirTokenType.EOL)('\n')(37,38)   PsiComment(ElixirTokenType.COMMENT)('#&&&:')(38,43)


I expected

Elixir File(0,43)   ElixirKeywordIdentifierImpl(KEYWORD_IDENTIFIER)(0,5)     PsiElement(ElixirTokenType.BIT_STRING_OPERATOR)('<<>>')(0,4)     PsiElement(ElixirTokenType.COLON)(':')(4,5)   PsiElement(ElixirTokenType.EOL)('\n')(5,6)   PsiElement(ElixirTokenType.EOL)('\n')(6,7)   PsiComment(ElixirTokenType.COMMENT)('# <three token operator>:')(7,32)   PsiElement(ElixirTokenType.EOL)('\n')(32,33)
  ElixirKeyIdentifierImpl(KEYWORD_IDENTIFIER)(33,37)
    PsiElement(ElixirTokenType.COMPARISON_OPERATOR)('!==')(33,36)
    PsiElement(ElixirTokenType.COLON)(':')(36,37)   PsiElement(ElixirTokenType.COMPARISON_OPERATOR)('!==')(33,36)   PsiElement(ElixirTokenType.EOL)('\n')(37,38)   PsiComment(ElixirTokenType.COMMENT)('#&&&:')(38,43)


I figure I must have the precendence screwed up somewhere.  Any suggestions for what I can change in the bnf to get the keyword identifer version, `!==:` favored over the operator version `!==`.

5 comments
Comment actions Permalink

I thought I should add that I tried what I thought was the obvious precedence change of putting `keywordIndentifier` at the top of `expression` instead of near the bottom and that didn't have any effect, which I don't actually understand.  The only way I got `!==:` to show up was to comment out comparisionOperation, which contains `!==` as a binary operator.

0
Comment actions Permalink

Please see the priority table via QuickDoc:

Priority table:
0: BINARY(stabOperation)
1: PREFIX(captureOperation)
2: BINARY(inMatchOperation)
3: BINARY(whenOperation)
4: BINARY(typeOperation)
5: BINARY(pipeOperation)
6: BINARY(associationOperation)
7: BINARY(matchOperation)
8: BINARY(orOperation)
9: BINARY(andOperation)
10: BINARY(comparisonOperation)
11: BINARY(relationalOperation)
12: BINARY(arrowOperation)
13: BINARY(twoOperation)
14: BINARY(additionOperation)
15: BINARY(multiplicationOperation)
16: BINARY(hatOperation)
17: PREFIX(unaryOperation)
18: PREFIX(atOperation)
19: ATOM(keywordIdentifier)      <----- my guess that ATOM is not good
20: ATOM(value)

To say for sure one need to debug the generated parser.

0
Comment actions Permalink

I wasn't able to get the precedence to work in the bnf, but I took a hint from how elixir itself handles the parsing in https://github.com/elixir-lang/elixir/blob/de39bbaca277002797e52ffbde617ace06233a2b/lib/elixir/src/elixir_tokenizer.erl and moved recognizing the `<operator>: <space>` sequence in the flex file instead:

/* <YYINITIAL> is first even though it isn't lexicographically first because it is the first state.    Rules that aren't dependent on detecting the end of INTERPOLATION can be shared between <YYINITIAL> and    <INTERPOLATION> */ <YYINITIAL, INTERPOLATION> {   {AND_OPERATOR} / {COLON}{SPACE}            { return ElixirTypes.OPERATOR_KEYWORD; }   {AND_OPERATOR}                             { return ElixirTypes.AND_OPERATOR; }   {ARROW_OPERATOR} / {COLON}{SPACE}          { return ElixirTypes.OPERATOR_KEYWORD; }   {ARROW_OPERATOR}                           { return ElixirTypes.ARROW_OPERATOR; }   {ASSOCIATION_OPERATOR} / {COLON}{SPACE}    { return ElixirTypes.OPERATOR_KEYWORD; }   {ASSOCIATION_OPERATOR}                     { return ElixirTypes.ASSOCIATION_OPERATOR; }   {ALIAS}                                    { return ElixirTypes.ALIAS; }   {AT_OPERATOR} / {COLON}{SPACE}             { return ElixirTypes.OPERATOR_KEYWORD; }   {AT_OPERATOR}                              { return ElixirTypes.AT_OPERATOR; }   {BIT_STRING_OPERATOR} / {COLON}{SPACE}     { return ElixirTypes.OPERATOR_KEYWORD; }   {BIT_STRING_OPERATOR}                      { return ElixirTypes.BIT_STRING_OPERATOR; }   {CAPTURE_OPERATOR} / {COLON}{SPACE}        { return ElixirTypes.OPERATOR_KEYWORD; }   {CAPTURE_OPERATOR}                         { return ElixirTypes.CAPTURE_OPERATOR; }   {EOL}                                      { return ElixirTypes.EOL; }   {ESCAPED_CONTROL_EOL}|{WHITE_SPACE}+       { return TokenType.WHITE_SPACE; }   {CHAR_TOKEN}                               { return ElixirTypes.CHAR_TOKEN; }   /* So that that atom of comparison operator consumes all 3 ':' instead of {TYPE_OPERATOR} consuming '::'      and ':' being leftover */   {COLON} / {TYPE_OPERATOR}                  { pushAndBegin(ATOM_START);                                                return ElixirTypes.COLON; }   {COLON} / {SPACE}                          { return ElixirTypes.COLON; }   // Must be after `{COLON} / {TYPE_OPERATOR}`, so that 3 ':' are consumed before 1.   {TYPE_OPERATOR}                            { return ElixirTypes.TYPE_OPERATOR; }   // Must be after {TYPE_OPERATOR}, so that 1 ':' is consumed after 2   {COLON}                                    { pushAndBegin(ATOM_START);                                                return ElixirTypes.COLON; }   {COMMENT}                                  { return ElixirTypes.COMMENT; }   {COMPARISON_OPERATOR} / {COLON}{SPACE}     { return ElixirTypes.OPERATOR_KEYWORD; }   {COMPARISON_OPERATOR}                      { return ElixirTypes.COMPARISON_OPERATOR; }   {DUAL_OPERATOR} / {COLON}{SPACE}           { return ElixirTypes.OPERATOR_KEYWORD; }   {DUAL_OPERATOR}                            { return ElixirTypes.DUAL_OPERATOR; }   {DECIMAL_FLOAT}                            { return ElixirTypes.NUMBER; }   {HAT_OPERATOR} / {COLON}{SPACE}            { return ElixirTypes.OPERATOR_KEYWORD; }   {HAT_OPERATOR}                             { return ElixirTypes.HAT_OPERATOR; }   {OR_OPERATOR} / {COLON}{SPACE}             { return ElixirTypes.OPERATOR_KEYWORD; }   // Must be before {IDENTIFIER} as "or" would be parsed as an identifier since it's a lowercase alphanumeric.   {OR_OPERATOR}                              { return ElixirTypes.OR_OPERATOR; }   {UNARY_OPERATOR} / {COLON}{SPACE}          { return ElixirTypes.OPERATOR_KEYWORD; }   // Must be before {IDENTIFIER} as "not" would be parsed as an identifier since it's a lowercase alphanumeric.   {UNARY_OPERATOR}                           { return ElixirTypes.UNARY_OPERATOR; }   // Must be before {IDENTIFIER} as "when" would be parsed as an identifier since it's a lowercase alphanumeric.   {WHEN_OPERATOR}                            { return ElixirTypes.WHEN_OPERATOR; }   {IDENTIFIER}                               { return ElixirTypes.IDENTIFIER; }   {INTEGER}                                  { return ElixirTypes.NUMBER; }   {IN_MATCH_OPERATOR} / {COLON}{SPACE}       { return ElixirTypes.OPERATOR_KEYWORD; }   {IN_MATCH_OPERATOR}                        { return ElixirTypes.IN_MATCH_OPERATOR; }   {MAP_OPERATOR} / {COLON}{SPACE}            { return ElixirTypes.OPERATOR_KEYWORD; }   {MAP_OPERATOR}                             { return ElixirTypes.MAP_OPERATOR; }   {MATCH_OPERATOR} / {COLON}{SPACE}          { return ElixirTypes.OPERATOR_KEYWORD; }   {MATCH_OPERATOR}                           { return ElixirTypes.MATCH_OPERATOR; }   {MULTIPLICATION_OPERATOR} / {COLON}{SPACE} { return ElixirTypes.OPERATOR_KEYWORD; }   {MULTIPLICATION_OPERATOR}                  { return ElixirTypes.MULTIPLICATION_OPERATOR; }   {PIPE_OPERATOR} / {COLON}{SPACE}           { return ElixirTypes.OPERATOR_KEYWORD; }   {PIPE_OPERATOR}                            { return ElixirTypes.PIPE_OPERATOR; }   {RELATIONAL_OPERATOR} / {COLON}{SPACE}     { return ElixirTypes.OPERATOR_KEYWORD; }   {RELATIONAL_OPERATOR}                      { return ElixirTypes.RELATIONAL_OPERATOR; }   {STAB_OPERATOR} / {COLON}{SPACE}           { return ElixirTypes.OPERATOR_KEYWORD; }   {STAB_OPERATOR}                            { return ElixirTypes.STAB_OPERATOR; }   {STRUCT_OPERATOR} / {COLON}{SPACE}         { return ElixirTypes.OPERATOR_KEYWORD; }   {STRUCT_OPERATOR}                          { return ElixirTypes.STRUCT_OPERATOR; }   {TILDE}                                    { pushAndBegin(SIGIL);                                                return ElixirTypes.TILDE; }   {TUPLE_OPERATOR} / {COLON}{SPACE}          { return ElixirTypes.OPERATOR_KEYWORD; }   {TUPLE_OPERATOR}                           { return ElixirTypes.TUPLE_OPERATOR; }   {TWO_OPERATOR} / {COLON}{SPACE}            { return ElixirTypes.OPERATOR_KEYWORD; }   {TWO_OPERATOR}                             { return ElixirTypes.TWO_OPERATOR; }   {QUOTE_HEREDOC_PROMOTER}                   { startQuote(yytext());                                                return promoterType(); }   /* MUST be after {QUOTE_HEREDOC_PROMOTER} for <BODY, INTERPOLATION> as {QUOTE_HEREDOC_PROMOTER} is prefixed by      {QUOTE_PROMOTER} */   {QUOTE_PROMOTER}                           { startQuote(yytext());                                                return promoterType(); } }


The full code is here: https://github.com/KronicDeth/intellij-elixir/blob/b93ddbbbfaf911303023de23af3e6ad0ac49fe5e/src/org/elixir_lang/Elixir.flex.  So, I have a working way to distinguish operators and operators used as keyword identifiers, just not the way I was trying to do it.

0
Comment actions Permalink

Glad you've made id.

As far as Pratt expression parsing is concerned:

From the algorithm it is clear that ATOM and PREFIX are always first to be parsed so their precedence do not really count .
That's why it is tricky to balance ATOM and BINARY parsing priorities.

0

Please sign in to leave a comment.