This was based on a lot of examples, and was done before the specs were available. It probably needs updating.

This grammar parses fine with Coco/R for C++.

/*
 * Guy's attempt at writing a parser for LOLCODE (see http://lolcode.com/)
 * It's not entirely LL(1) clean, but then the syntax is written for cats.
 */
$CD

COMPILER LOLCODE

CHARACTERS
  letter        = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" .
  letternon     = letter - "N" .
  digit         = "0123456789" .
  cr            = CHR(13) .
  lf            = CHR(10) .
  tab           = CHR(9) .
  sp            = CHR(32) .
  noquote       = ANY - '"' - cr - lf - tab .
  regexpat      = ANY - '/' - cr - lf - tab .

TOKENS
  identifier    = ( letternon | letter ( letter | digit ) ) { ( letter | digit ) } .    /* hack to stop "N" because it is the concatination operator */
  number        = digit { digit } .
  string        = '"' { noquote } '"' .
  regex         = '/' { regexpat } '/' .

  /* 
   * these are literal token keywords, but they have to be in tokens because
   * they have spaces in them.
   */

  CANHAS        = "CAN" sp { sp } "HAS" .
  IHASA         = "I" sp { sp } "HAS" sp { sp } "A" .
  OPENFILE      = "OPEN" sp { sp } "FILE" .
  IMINYR        = "IM" sp { sp } "IN" sp { sp } "YR" .
  IMOUTTAYR     = "IM" sp { sp } "OUTTA" sp { sp } "YR" .
  UPPINYR       = "UPPIN" sp { sp } "YR" .
  YARLY         = "YA" sp { sp } "RLY" .
  ORLY          = "O" sp { sp } "RLY" .
  NOWAI         = "NO" sp { sp } "WAI" .
  WTFIZ         = "WTF" sp { sp } "IZ" .
  AWSUMTHX      = "AWSUM" sp { sp } "THX" .
  ONOES         = "O" sp { sp } "NOES" .
  MAGICZON      = "MAGICZ" sp { sp } "ON" .

IGNORE tab + cr + lf

COMMENTS FROM "BT" TO cr    /* this should be "BTW" but cocor doesn't allow three chars */
COMMENTS FROM "^^" TO cr

PRODUCTIONS
  LOLCODE       = [ Block ] EOF .

  Block         = "HAI" { Statement } "KTHXBYE" .

  Statement     = Keyword | CompoundState . 

  Keyword       = ( CANHAS identifier "?" 
                  | IHASA identifier [ "IZ" Expression ] 
                  | "VISIBLE" [ "SAME-LINEZ" ] Expression 
                  | "INVISIBLE" Expression
                  | "UP" identifier [ "!!" Number ]
                  | "NERF" identifier [ "!!" Number ]
                  | "KBYE"    /* this should probably be KTHXBYE but I can't get that to work */
                  | OPENFILE Expression
                  | "GIMMEH" identifier
                  | [ "MAH" ] identifier "IZ" Expression    /* assignment */
                  ) .

  CompoundState = ( IfStatement
                  | LoopStatement 
                  | CaseStatement
                  | TryStatement
                  | Block
                  ) .

  /* compound statements -- looping and other block structures */

  IfStatement   = "IZ" Condition IfElseBlock .

  IfElseBlock   = ( "?" Statement
                  | ORLY "?" 
                    YARLY 
                    { Statement }
                    [ NOWAI    /* else */ 
                      { Statement }
                    ]
                    "KTHX"
                  ) .    /* funnily enough, lolcode avoids a hanging-else */

  LoopStatement = IMINYR identifier [ "," UPPINYR identifier [ "!!" Number ] [ "TILL" Number ] ]
                  { Statement } 
                  IMOUTTAYR identifier [ "," "WIF" Condition ] .

  CaseStatement = WTFIZ Expression "?"
                  { "OMG" Literal    /* case */
                    { Statement } 
                  }
                  [ "WTFBBQ" { Statement } ]    /* default case */
                  "KTHX" .

  TryStatement  = SYNC "PLZ" Statement "?" 
                  AWSUMTHX 
                  { Statement }
                  [ ONOES
                    { Statement } 
                  ] 
                  [ "KTHX" ] .    /* this has a serious hanging-else like problem though.  it could be fixed by making the block end required. */


  /* now we define expressions and the like */

  Condition     = ( Expression { ( "BIGGER" "THAN" | "SMALLER" "THAN" | "SAMEZ" "AS" ) Expression }    /* >, < & = */
                  | MAGICZON Expression "WITH" regex    /* regular expressions */
                  ) .

  Expression    = Concated { "N" Concated } .    /* allow concatination.  this is a silly operator choice */

  Concated      = Term | ( ( "ADDZ" | "MINUSEZ" ) Expression [ "!!" ] Expression ) .    /* reverse polish + & - */

  Term          = Factor | ( ( "TIMEZ" | "DIVIDEZ" | "POWERZ" ) Expression [ "!!" ] Expression ) .    /* more reverse polish */

  Factor        = [ "NOT" ] Logic .

  Logic         = identifier | Literal .
                
  Literal       = Number | string 
                | "WORKZ0RZ" .    /* true.  false is NOTWORKZORZ */

  Number        = "NOTHING" | number .    /* handle zero */

END LOLCODE.

On FreeBSD I build this with a Makefile that looks like this:

#You will have to change this to suit your system
CC             = gcc
CFLAGS          = -g 

# these match FreeBSD systems
COCOR           = /usr/local/bin/cocor -DCRFRAMES=/usr/local/lib/cocor
LIB             = /usr/local/share/cocor/cplus2/cr_lib.a

lolcode:        lolcodp.o lolcods.o lolcod.o
		$(CC) $(CFLAGS) -ololcode lolcod.o lolcods.o lolcodp.o $(LIB)

lolcodp.o:      lolcode.atg
		$(COCOR) lolcode.atg
		$(CC) $(CFLAGS) -c lolcod.c lolcodp.c lolcods.c

test:		lolcode
		-for i in ex/*.lol ; do\
			echo ===\> $${i} ; \
			./lolcode -L $${i} ; \
			if [ $$? -ne 0 ] ; then \
				cat $${i%lol}lst ; \
			else \
				echo test code $${i} compiled successfully ; \
			fi ; \
			echo ; \
		done

clean:
		rm -f *.o lolcode lolcod.c lolcod?.c lolcod.h lolcod?.h *.h
		rm -f *.lst *~ ex/*.lst ex/*~

(Using Coco/R out of my lang/cocor port)

Do with it as you will. YMMV.

implementations/lolcode-dot-atg.txt · Last modified: 2007/06/28 20:11 by ghalse
Recent changes RSS feed Creative Commons License Donate Driven by DokuWiki