I try the tokenizer-engine to parse a script and to recognize thinBasic-keywords and equates.
But how does it work?
I tried this way (and a few others already)
Uses "console", "Tokenizer"
Begin Const
%Token_TBKeyword = 100
%Token_TBEquate
%Token_Comment
%Token_Parenthesis
End Const
Function TBMain()
' read in all keywords:
' --- enter a valid path here if thinBasic is not installed on "C:\" !
SetupKeywords( "c:\thinBasic\thinAir\Syntax\thinBasic\thinBasic_Keywords.ini" )
' run tokenizer on this script:
Tokenize(APP_SourceName)
PrintL "------------------------- key to end"
WaitKey
End Function
Sub SetupKeywords(ByVal sFile As String)
Local allKeywords() As String
Local i As Long
Parse File sFile, allKeywords, $CRLF
Array Sort allKeywords, Descend ' brings empty elements to the end
While StrPtrLen(StrPtr(allKeywords(UBound(allKeywords)))) = 0
' remove empty elements
ReDim Preserve allKeywords(UBound(allKeywords)-1)
Wend
Array Sort allKeywords, Ascend ' now sort as needed
Tokenizer_Default_Char("#", %TOKENIZER_DEFAULT_ALPHA)
Tokenizer_Default_Char("$", %TOKENIZER_DEFAULT_ALPHA)
Tokenizer_Default_Char("%", %TOKENIZER_DEFAULT_ALPHA)
Tokenizer_Default_Char(":", %TOKENIZER_DEFAULT_NEWLINE)
Tokenizer_KeyAdd("'", %Token_Comment, 0)
Tokenizer_KeyAdd("(", %Token_Parenthesis, 1)
Tokenizer_KeyAdd(")", %Token_Parenthesis, -1)
For i = 1 To UBound(allKeywords)
Select Case Peek(Byte, StrPtr(allKeywords(i)))
Case 36, 37 ' $, %
Tokenizer_KeyAdd(allKeywords(i), %Token_TBEquate, i)
Case Else
Tokenizer_KeyAdd(allKeywords(i), %Token_TBKeyword, i)
End Select
Next
End Sub
Sub Tokenize(sFile As String)
Local sToken, sCode As String
Local lPos, lMain, lSub, lParenthesis, lLines As Long
Local pKey As DWord
sCode = Load_File(sFile)
If StrPtrLen(StrPtr(sCode)) = 0 Then Exit Sub
lPos = 1
Do
pKey = Tokenizer_GetNextToken(sCode, lPos, lMain, sToken, lSub)
Incr lLines
Select Case lMain
Case %TOKENIZER_FINISHED
Exit Do
Case %TOKENIZER_ERROR
Exit Do
Case %TOKENIZER_QUOTE
PrintL "quoted string : " & sToken
Case %TOKENIZER_DELIMITER
PrintL "delimiter : " & sToken
Case %TOKENIZER_NUMBER
PrintL "number : " & sToken
Case %TOKENIZER_EOL
PrintL "(new line)"
Case Else
Select Case Tokenizer_KeyGetMainType(pKey)
Case %Token_TBKeyword
PrintL "TBKeyword :" & sToken
Case %Token_TBEquate
PrintL "TBEquate :" & sToken
Case %Token_Parenthesis
lParenthesis += Tokenizer_KeyGetSubType(pKey)
PrintL = "parenthesis :" & sToken & Str$(lParenthesis)
Case %Token_Comment
Tokenizer_MoveToEol(sCode, lPos, TRUE)
PrintL "comment"
Case Else
PrintL "other token :" & sToken
End Select
End Select
If lLines > 20 Then
PrintL "------------------- key to continue --------------"
WaitKey
lLines = 0
EndIf
Loop
If lParenthesis <> 0 Then PrintL "found unbalanced parenthesis"
End Sub
why does it not recognize my user-tokens?
Bookmarks