This was based on a lot of examples, and was done before the specs were available. It probably needs updating.
This grammar parses fine with Coco/R for C++.
/*
* Guy's attempt at writing a parser for LOLCODE (see http://lolcode.com/)
* It's not entirely LL(1) clean, but then the syntax is written for cats.
*/
$CD
COMPILER LOLCODE
CHARACTERS
letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
letternon = letter - "N" .
digit = "0123456789" .
cr = CHR(13) .
lf = CHR(10) .
tab = CHR(9) .
sp = CHR(32) .
noquote = ANY - '"' - cr - lf - tab .
regexpat = ANY - '/' - cr - lf - tab .
TOKENS
identifier = ( letternon | letter ( letter | digit ) ) { ( letter | digit ) } . /* hack to stop "N" because it is the concatination operator */
number = digit { digit } .
string = '"' { noquote } '"' .
regex = '/' { regexpat } '/' .
/*
* these are literal token keywords, but they have to be in tokens because
* they have spaces in them.
*/
CANHAS = "CAN" sp { sp } "HAS" .
IHASA = "I" sp { sp } "HAS" sp { sp } "A" .
OPENFILE = "OPEN" sp { sp } "FILE" .
IMINYR = "IM" sp { sp } "IN" sp { sp } "YR" .
IMOUTTAYR = "IM" sp { sp } "OUTTA" sp { sp } "YR" .
UPPINYR = "UPPIN" sp { sp } "YR" .
YARLY = "YA" sp { sp } "RLY" .
ORLY = "O" sp { sp } "RLY" .
NOWAI = "NO" sp { sp } "WAI" .
WTFIZ = "WTF" sp { sp } "IZ" .
AWSUMTHX = "AWSUM" sp { sp } "THX" .
ONOES = "O" sp { sp } "NOES" .
MAGICZON = "MAGICZ" sp { sp } "ON" .
IGNORE tab + cr + lf
COMMENTS FROM "BT" TO cr /* this should be "BTW" but cocor doesn't allow three chars */
COMMENTS FROM "^^" TO cr
PRODUCTIONS
LOLCODE = [ Block ] EOF .
Block = "HAI" { Statement } "KTHXBYE" .
Statement = Keyword | CompoundState .
Keyword = ( CANHAS identifier "?"
| IHASA identifier [ "IZ" Expression ]
| "VISIBLE" [ "SAME-LINEZ" ] Expression
| "INVISIBLE" Expression
| "UP" identifier [ "!!" Number ]
| "NERF" identifier [ "!!" Number ]
| "KBYE" /* this should probably be KTHXBYE but I can't get that to work */
| OPENFILE Expression
| "GIMMEH" identifier
| [ "MAH" ] identifier "IZ" Expression /* assignment */
) .
CompoundState = ( IfStatement
| LoopStatement
| CaseStatement
| TryStatement
| Block
) .
/* compound statements -- looping and other block structures */
IfStatement = "IZ" Condition IfElseBlock .
IfElseBlock = ( "?" Statement
| ORLY "?"
YARLY
{ Statement }
[ NOWAI /* else */
{ Statement }
]
"KTHX"
) . /* funnily enough, lolcode avoids a hanging-else */
LoopStatement = IMINYR identifier [ "," UPPINYR identifier [ "!!" Number ] [ "TILL" Number ] ]
{ Statement }
IMOUTTAYR identifier [ "," "WIF" Condition ] .
CaseStatement = WTFIZ Expression "?"
{ "OMG" Literal /* case */
{ Statement }
}
[ "WTFBBQ" { Statement } ] /* default case */
"KTHX" .
TryStatement = SYNC "PLZ" Statement "?"
AWSUMTHX
{ Statement }
[ ONOES
{ Statement }
]
[ "KTHX" ] . /* this has a serious hanging-else like problem though. it could be fixed by making the block end required. */
/* now we define expressions and the like */
Condition = ( Expression { ( "BIGGER" "THAN" | "SMALLER" "THAN" | "SAMEZ" "AS" ) Expression } /* >, < & = */
| MAGICZON Expression "WITH" regex /* regular expressions */
) .
Expression = Concated { "N" Concated } . /* allow concatination. this is a silly operator choice */
Concated = Term | ( ( "ADDZ" | "MINUSEZ" ) Expression [ "!!" ] Expression ) . /* reverse polish + & - */
Term = Factor | ( ( "TIMEZ" | "DIVIDEZ" | "POWERZ" ) Expression [ "!!" ] Expression ) . /* more reverse polish */
Factor = [ "NOT" ] Logic .
Logic = identifier | Literal .
Literal = Number | string
| "WORKZ0RZ" . /* true. false is NOTWORKZORZ */
Number = "NOTHING" | number . /* handle zero */
END LOLCODE.
On FreeBSD I build this with a Makefile that looks like this:
#You will have to change this to suit your system
CC = gcc
CFLAGS = -g
# these match FreeBSD systems
COCOR = /usr/local/bin/cocor -DCRFRAMES=/usr/local/lib/cocor
LIB = /usr/local/share/cocor/cplus2/cr_lib.a
lolcode: lolcodp.o lolcods.o lolcod.o
$(CC) $(CFLAGS) -ololcode lolcod.o lolcods.o lolcodp.o $(LIB)
lolcodp.o: lolcode.atg
$(COCOR) lolcode.atg
$(CC) $(CFLAGS) -c lolcod.c lolcodp.c lolcods.c
test: lolcode
-for i in ex/*.lol ; do\
echo ===\> $${i} ; \
./lolcode -L $${i} ; \
if [ $$? -ne 0 ] ; then \
cat $${i%lol}lst ; \
else \
echo test code $${i} compiled successfully ; \
fi ; \
echo ; \
done
clean:
rm -f *.o lolcode lolcod.c lolcod?.c lolcod.h lolcod?.h *.h
rm -f *.lst *~ ex/*.lst ex/*~
(Using Coco/R out of my lang/cocor port)
Do with it as you will. YMMV.