Package PyFoam :: Package ThirdParty :: Package ply :: Module yacc
[hide private]
[frames] | no frames]

Source Code for Module PyFoam.ThirdParty.ply.yacc

   1  #----------------------------------------------------------------------------- 
   2  # ply: yacc.py 
   3  # 
   4  # Author(s): David M. Beazley (dave@dabeaz.com) 
   5  # 
   6  # Copyright (C) 2001-2007, David M. Beazley 
   7  # 
   8  # This library is free software; you can redistribute it and/or 
   9  # modify it under the terms of the GNU Lesser General Public 
  10  # License as published by the Free Software Foundation; either 
  11  # version 2.1 of the License, or (at your option) any later version. 
  12  #  
  13  # This library is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  16  # Lesser General Public License for more details. 
  17  #  
  18  # You should have received a copy of the GNU Lesser General Public 
  19  # License along with this library; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21  #  
  22  # See the file COPYING for a complete copy of the LGPL. 
  23  # 
  24  # 
  25  # This implements an LR parser that is constructed from grammar rules defined 
  26  # as Python functions. The grammer is specified by supplying the BNF inside 
  27  # Python documentation strings.  The inspiration for this technique was borrowed 
  28  # from John Aycock's Spark parsing system.  PLY might be viewed as cross between 
  29  # Spark and the GNU bison utility. 
  30  # 
  31  # The current implementation is only somewhat object-oriented. The 
  32  # LR parser itself is defined in terms of an object (which allows multiple 
  33  # parsers to co-exist).  However, most of the variables used during table 
  34  # construction are defined in terms of global variables.  Users shouldn't 
  35  # notice unless they are trying to define multiple parsers at the same 
  36  # time using threads (in which case they should have their head examined). 
  37  # 
  38  # This implementation supports both SLR and LALR(1) parsing.  LALR(1) 
  39  # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 
  40  # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 
  41  # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced 
  42  # by the more efficient DeRemer and Pennello algorithm. 
  43  # 
  44  # :::::::: WARNING ::::::: 
  45  # 
  46  # Construction of LR parsing tables is fairly complicated and expensive. 
  47  # To make this module run fast, a *LOT* of work has been put into 
  48  # optimization---often at the expensive of readability and what might 
  49  # consider to be good Python "coding style."   Modify the code at your 
  50  # own risk! 
  51  # ---------------------------------------------------------------------------- 
  52   
  53  __version__ = "2.3" 
  54   
  55  #----------------------------------------------------------------------------- 
  56  #                     === User configurable parameters === 
  57  # 
  58  # Change these to modify the default behavior of yacc (if you wish) 
  59  #----------------------------------------------------------------------------- 
  60   
  61  yaccdebug   = 1                # Debugging mode.  If set, yacc generates a 
  62                                 # a 'parser.out' file in the current directory 
  63   
  64  debug_file  = 'parser.out'     # Default name of the debugging file 
  65  tab_module  = 'parsetab'       # Default name of the table module 
  66  default_lr  = 'LALR'           # Default LR table generation method 
  67   
  68  error_count = 3                # Number of symbols that must be shifted to leave recovery mode 
  69   
  70  import re, types, sys, cStringIO, md5, os.path 
  71   
  72  # Exception raised for yacc-related errors 
73 -class YaccError(Exception): pass
74 75 # Available instance types. This is used when parsers are defined by a class. 76 # it's a little funky because I want to preserve backwards compatibility 77 # with Python 2.0 where types.ObjectType is undefined. 78 79 try: 80 _INSTANCETYPE = (types.InstanceType, types.ObjectType) 81 except AttributeError: 82 _INSTANCETYPE = types.InstanceType
83 - class object: pass # Note: needed if no new-style classes present
84 85 #----------------------------------------------------------------------------- 86 # === LR Parsing Engine === 87 # 88 # The following classes are used for the LR parser itself. These are not 89 # used during table construction and are independent of the actual LR 90 # table generation algorithm 91 #----------------------------------------------------------------------------- 92 93 # This class is used to hold non-terminal grammar symbols during parsing. 94 # It normally has the following attributes set: 95 # .type = Grammar symbol type 96 # .value = Symbol value 97 # .lineno = Starting line number 98 # .endlineno = Ending line number (optional, set automatically) 99 # .lexpos = Starting lex position 100 # .endlexpos = Ending lex position (optional, set automatically) 101
102 -class YaccSymbol(object):
103 - def __str__(self): return self.type
104 - def __repr__(self): return str(self)
105 106 # This class is a wrapper around the objects actually passed to each 107 # grammar rule. Index lookup and assignment actually assign the 108 # .value attribute of the underlying YaccSymbol object. 109 # The lineno() method returns the line number of a given 110 # item (or 0 if not defined). The linespan() method returns 111 # a tuple of (startline,endline) representing the range of lines 112 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 113 # representing the range of positional information for a symbol. 114
115 -class YaccProduction:
116 - def __init__(self,s,stack=None):
117 self.slice = s 118 self.pbstack = [] 119 self.stack = stack
120 - def __getitem__(self,n):
121 if n >= 0: return self.slice[n].value 122 else: return self.stack[n].value
123
124 - def __setitem__(self,n,v):
125 self.slice[n].value = v
126
127 - def __getslice__(self,i,j):
128 return [s.value for s in self.slice[i:j]]
129
130 - def __len__(self):
131 return len(self.slice)
132
133 - def lineno(self,n):
134 return getattr(self.slice[n],"lineno",0)
135
136 - def linespan(self,n):
137 startline = getattr(self.slice[n],"lineno",0) 138 endline = getattr(self.slice[n],"endlineno",startline) 139 return startline,endline
140
141 - def lexpos(self,n):
142 return getattr(self.slice[n],"lexpos",0)
143
144 - def lexspan(self,n):
145 startpos = getattr(self.slice[n],"lexpos",0) 146 endpos = getattr(self.slice[n],"endlexpos",startpos) 147 return startpos,endpos
148
149 - def pushback(self,n):
150 if n <= 0: 151 raise ValueError, "Expected a positive value" 152 if n > (len(self.slice)-1): 153 raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1) 154 for i in range(0,n): 155 self.pbstack.append(self.slice[-i-1])
156 157 # The LR Parsing engine. This is defined as a class so that multiple parsers 158 # can exist in the same process. A user never instantiates this directly. 159 # Instead, the global yacc() function should be used to create a suitable Parser 160 # object. 161
162 -class Parser:
163 - def __init__(self,magic=None):
164 165 # This is a hack to keep users from trying to instantiate a Parser 166 # object directly. 167 168 if magic != "xyzzy": 169 raise YaccError, "Can't instantiate Parser. Use yacc() instead." 170 171 # Reset internal state 172 self.productions = None # List of productions 173 self.errorfunc = None # Error handling function 174 self.action = { } # LR Action table 175 self.goto = { } # LR goto table 176 self.require = { } # Attribute require table 177 self.method = "Unknown LR" # Table construction method used
178
179 - def errok(self):
180 self.errorok = 1
181
182 - def restart(self):
183 del self.statestack[:] 184 del self.symstack[:] 185 sym = YaccSymbol() 186 sym.type = '$end' 187 self.symstack.append(sym) 188 self.statestack.append(0)
189
190 - def parse(self,input=None,lexer=None,debug=0,tracking=0):
191 lookahead = None # Current lookahead symbol 192 lookaheadstack = [ ] # Stack of lookahead symbols 193 actions = self.action # Local reference to action table 194 goto = self.goto # Local reference to goto table 195 prod = self.productions # Local reference to production list 196 pslice = YaccProduction(None) # Production object passed to grammar rules 197 errorcount = 0 # Used during error recovery 198 199 # If no lexer was given, we will try to use the lex module 200 if not lexer: 201 import lex 202 lexer = lex.lexer 203 204 pslice.lexer = lexer 205 pslice.parser = self 206 207 # If input was supplied, pass to lexer 208 if input: 209 lexer.input(input) 210 211 # Tokenize function 212 get_token = lexer.token 213 214 statestack = [ ] # Stack of parsing states 215 self.statestack = statestack 216 symstack = [ ] # Stack of grammar symbols 217 self.symstack = symstack 218 219 pslice.stack = symstack # Put in the production 220 errtoken = None # Err token 221 222 # The start state is assumed to be (0,$end) 223 224 statestack.append(0) 225 sym = YaccSymbol() 226 sym.type = '$end' 227 symstack.append(sym) 228 state = 0 229 while 1: 230 # Get the next symbol on the input. If a lookahead symbol 231 # is already set, we just use that. Otherwise, we'll pull 232 # the next token off of the lookaheadstack or from the lexer 233 if debug > 1: 234 print 'state', state 235 if not lookahead: 236 if not lookaheadstack: 237 lookahead = get_token() # Get the next token 238 else: 239 lookahead = lookaheadstack.pop() 240 if not lookahead: 241 lookahead = YaccSymbol() 242 lookahead.type = '$end' 243 if debug: 244 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip() 245 246 # Check the action table 247 ltype = lookahead.type 248 t = actions[state].get(ltype) 249 250 if debug > 1: 251 print 'action', t 252 if t is not None: 253 if t > 0: 254 # shift a symbol on the stack 255 if ltype == '$end': 256 # Error, end of input 257 sys.stderr.write("yacc: Parse error. EOF\n") 258 return 259 statestack.append(t) 260 state = t 261 if debug > 1: 262 sys.stderr.write("%-60s shift state %s\n" % (errorlead, t)) 263 symstack.append(lookahead) 264 lookahead = None 265 266 # Decrease error count on successful shift 267 if errorcount: errorcount -=1 268 continue 269 270 if t < 0: 271 # reduce a symbol on the stack, emit a production 272 p = prod[-t] 273 pname = p.name 274 plen = p.len 275 276 # Get production function 277 sym = YaccSymbol() 278 sym.type = pname # Production name 279 sym.value = None 280 if debug > 1: 281 sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t)) 282 283 if plen: 284 targ = symstack[-plen-1:] 285 targ[0] = sym 286 if tracking: 287 t1 = targ[1] 288 sym.lineno = t1.lineno 289 sym.lexpos = t1.lexpos 290 t1 = targ[-1] 291 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 292 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 293 del symstack[-plen:] 294 del statestack[-plen:] 295 else: 296 if tracking: 297 sym.lineno = lexer.lineno 298 sym.lexpos = lexer.lexpos 299 targ = [ sym ] 300 pslice.slice = targ 301 302 # Call the grammar rule with our special slice object 303 p.func(pslice) 304 305 # If there was a pushback, put that on the stack 306 if pslice.pbstack: 307 lookaheadstack.append(lookahead) 308 for _t in pslice.pbstack: 309 lookaheadstack.append(_t) 310 lookahead = None 311 pslice.pbstack = [] 312 313 symstack.append(sym) 314 state = goto[statestack[-1]][pname] 315 statestack.append(state) 316 continue 317 318 if t == 0: 319 n = symstack[-1] 320 return getattr(n,"value",None) 321 322 if t == None: 323 if debug: 324 sys.stderr.write(errorlead + "\n") 325 # We have some kind of parsing error here. To handle 326 # this, we are going to push the current token onto 327 # the tokenstack and replace it with an 'error' token. 328 # If there are any synchronization rules, they may 329 # catch it. 330 # 331 # In addition to pushing the error token, we call call 332 # the user defined p_error() function if this is the 333 # first syntax error. This function is only called if 334 # errorcount == 0. 335 if errorcount == 0 or self.errorok: 336 errorcount = error_count 337 self.errorok = 0 338 errtoken = lookahead 339 if errtoken.type == '$end': 340 errtoken = None # End of file! 341 if self.errorfunc: 342 global errok,token,restart 343 errok = self.errok # Set some special functions available in error recovery 344 token = get_token 345 restart = self.restart 346 tok = self.errorfunc(errtoken) 347 del errok, token, restart # Delete special functions 348 349 if self.errorok: 350 # User must have done some kind of panic 351 # mode recovery on their own. The 352 # returned token is the next lookahead 353 lookahead = tok 354 errtoken = None 355 continue 356 else: 357 if errtoken: 358 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 359 else: lineno = 0 360 if lineno: 361 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 362 else: 363 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 364 else: 365 sys.stderr.write("yacc: Parse error in input. EOF\n") 366 return 367 368 else: 369 errorcount = error_count 370 371 # case 1: the statestack only has 1 entry on it. If we're in this state, the 372 # entire parse has been rolled back and we're completely hosed. The token is 373 # discarded and we just keep going. 374 375 if len(statestack) <= 1 and lookahead.type != '$end': 376 lookahead = None 377 errtoken = None 378 # Nuke the pushback stack 379 del lookaheadstack[:] 380 continue 381 382 # case 2: the statestack has a couple of entries on it, but we're 383 # at the end of the file. nuke the top entry and generate an error token 384 385 # Start nuking entries on the stack 386 if lookahead.type == '$end': 387 # Whoa. We're really hosed here. Bail out 388 return 389 390 if lookahead.type != 'error': 391 sym = symstack[-1] 392 if sym.type == 'error': 393 # Hmmm. Error is on top of stack, we'll just nuke input 394 # symbol and continue 395 lookahead = None 396 continue 397 t = YaccSymbol() 398 t.type = 'error' 399 if hasattr(lookahead,"lineno"): 400 t.lineno = lookahead.lineno 401 t.value = lookahead 402 lookaheadstack.append(lookahead) 403 lookahead = t 404 else: 405 symstack.pop() 406 statestack.pop() 407 408 continue 409 410 # Call an error function here 411 raise RuntimeError, "yacc: internal parser error!!!\n"
412 413 # ----------------------------------------------------------------------------- 414 # === Parser Construction === 415 # 416 # The following functions and variables are used to implement the yacc() function 417 # itself. This is pretty hairy stuff involving lots of error checking, 418 # construction of LR items, kernels, and so forth. Although a lot of 419 # this work is done using global variables, the resulting Parser object 420 # is completely self contained--meaning that it is safe to repeatedly 421 # call yacc() with different grammars in the same application. 422 # ----------------------------------------------------------------------------- 423 424 # ----------------------------------------------------------------------------- 425 # validate_file() 426 # 427 # This function checks to see if there are duplicated p_rulename() functions 428 # in the parser module file. Without this function, it is really easy for 429 # users to make mistakes by cutting and pasting code fragments (and it's a real 430 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 431 # we just do a little regular expression pattern matching of def statements 432 # to try and detect duplicates. 433 # ----------------------------------------------------------------------------- 434
435 -def validate_file(filename):
436 base,ext = os.path.splitext(filename) 437 if ext != '.py': return 1 # No idea. Assume it's okay. 438 439 try: 440 f = open(filename) 441 lines = f.readlines() 442 f.close() 443 except IOError: 444 return 1 # Oh well 445 446 # Match def p_funcname( 447 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 448 counthash = { } 449 linen = 1 450 noerror = 1 451 for l in lines: 452 m = fre.match(l) 453 if m: 454 name = m.group(1) 455 prev = counthash.get(name) 456 if not prev: 457 counthash[name] = linen 458 else: 459 sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev)) 460 noerror = 0 461 linen += 1 462 return noerror
463 464 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
465 -def validate_dict(d):
466 for n,v in d.items(): 467 if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue 468 if n[0:2] == 't_': continue 469 470 if n[0:2] == 'p_': 471 sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n) 472 if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1: 473 try: 474 doc = v.__doc__.split(" ") 475 if doc[1] == ':': 476 sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n)) 477 except StandardError: 478 pass
479 480 # ----------------------------------------------------------------------------- 481 # === GRAMMAR FUNCTIONS === 482 # 483 # The following global variables and functions are used to store, manipulate, 484 # and verify the grammar rules specified by the user. 485 # ----------------------------------------------------------------------------- 486 487 # Initialize all of the global variables used during grammar construction
488 -def initialize_vars():
489 global Productions, Prodnames, Prodmap, Terminals 490 global Nonterminals, First, Follow, Precedence, LRitems 491 global Errorfunc, Signature, Requires 492 493 Productions = [None] # A list of all of the productions. The first 494 # entry is always reserved for the purpose of 495 # building an augmented grammar 496 497 Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 498 # productions of that nonterminal. 499 500 Prodmap = { } # A dictionary that is only used to detect duplicate 501 # productions. 502 503 Terminals = { } # A dictionary mapping the names of terminal symbols to a 504 # list of the rules where they are used. 505 506 Nonterminals = { } # A dictionary mapping names of nonterminals to a list 507 # of rule numbers where they are used. 508 509 First = { } # A dictionary of precomputed FIRST(x) symbols 510 511 Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 512 513 Precedence = { } # Precedence rules for each terminal. Contains tuples of the 514 # form ('right',level) or ('nonassoc', level) or ('left',level) 515 516 LRitems = [ ] # A list of all LR items for the grammar. These are the 517 # productions with the "dot" like E -> E . PLUS E 518 519 Errorfunc = None # User defined error handler 520 521 Signature = md5.new() # Digital signature of the grammar rules, precedence 522 # and other information. Used to determined when a 523 # parsing table needs to be regenerated. 524 525 Requires = { } # Requires list 526 527 # File objects used when creating the parser.out debugging file 528 global _vf, _vfc 529 _vf = cStringIO.StringIO() 530 _vfc = cStringIO.StringIO()
531 532 # ----------------------------------------------------------------------------- 533 # class Production: 534 # 535 # This class stores the raw information about a single production or grammar rule. 536 # It has a few required attributes: 537 # 538 # name - Name of the production (nonterminal) 539 # prod - A list of symbols making up its production 540 # number - Production number. 541 # 542 # In addition, a few additional attributes are used to help with debugging or 543 # optimization of table generation. 544 # 545 # file - File where production action is defined. 546 # lineno - Line number where action is defined 547 # func - Action function 548 # prec - Precedence level 549 # lr_next - Next LR item. Example, if we are ' E -> E . PLUS E' 550 # then lr_next refers to 'E -> E PLUS . E' 551 # lr_index - LR item index (location of the ".") in the prod list. 552 # lookaheads - LALR lookahead symbols for this item 553 # len - Length of the production (number of symbols on right hand side) 554 # ----------------------------------------------------------------------------- 555
556 -class Production:
557 - def __init__(self,**kw):
558 for k,v in kw.items(): 559 setattr(self,k,v) 560 self.lr_index = -1 561 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure 562 self.lr1_added = 0 # Flag indicating whether or not added to LR1 563 self.usyms = [ ] 564 self.lookaheads = { } 565 self.lk_added = { } 566 self.setnumbers = [ ]
567
568 - def __str__(self):
569 if self.prod: 570 s = "%s -> %s" % (self.name," ".join(self.prod)) 571 else: 572 s = "%s -> <empty>" % self.name 573 return s
574
575 - def __repr__(self):
576 return str(self)
577 578 # Compute lr_items from the production
579 - def lr_item(self,n):
580 if n > len(self.prod): return None 581 p = Production() 582 p.name = self.name 583 p.prod = list(self.prod) 584 p.number = self.number 585 p.lr_index = n 586 p.lookaheads = { } 587 p.setnumbers = self.setnumbers 588 p.prod.insert(n,".") 589 p.prod = tuple(p.prod) 590 p.len = len(p.prod) 591 p.usyms = self.usyms 592 593 # Precompute list of productions immediately following 594 try: 595 p.lrafter = Prodnames[p.prod[n+1]] 596 except (IndexError,KeyError),e: 597 p.lrafter = [] 598 try: 599 p.lrbefore = p.prod[n-1] 600 except IndexError: 601 p.lrbefore = None 602 603 return p
604
605 -class MiniProduction:
606 pass
607 608 # regex matching identifiers 609 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 610 611 # ----------------------------------------------------------------------------- 612 # add_production() 613 # 614 # Given an action function, this function assembles a production rule. 615 # The production rule is assumed to be found in the function's docstring. 616 # This rule has the general syntax: 617 # 618 # name1 ::= production1 619 # | production2 620 # | production3 621 # ... 622 # | productionn 623 # name2 ::= production1 624 # | production2 625 # ... 626 # ----------------------------------------------------------------------------- 627
628 -def add_production(f,file,line,prodname,syms):
629 630 if Terminals.has_key(prodname): 631 sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname)) 632 return -1 633 if prodname == 'error': 634 sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname)) 635 return -1 636 637 if not _is_identifier.match(prodname): 638 sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname)) 639 return -1 640 641 for x in range(len(syms)): 642 s = syms[x] 643 if s[0] in "'\"": 644 try: 645 c = eval(s) 646 if (len(c) > 1): 647 sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) 648 return -1 649 if not Terminals.has_key(c): 650 Terminals[c] = [] 651 syms[x] = c 652 continue 653 except SyntaxError: 654 pass 655 if not _is_identifier.match(s) and s != '%prec': 656 sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname)) 657 return -1 658 659 # See if the rule is already in the rulemap 660 map = "%s -> %s" % (prodname,syms) 661 if Prodmap.has_key(map): 662 m = Prodmap[map] 663 sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m)) 664 sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line)) 665 return -1 666 667 p = Production() 668 p.name = prodname 669 p.prod = syms 670 p.file = file 671 p.line = line 672 p.func = f 673 p.number = len(Productions) 674 675 676 Productions.append(p) 677 Prodmap[map] = p 678 if not Nonterminals.has_key(prodname): 679 Nonterminals[prodname] = [ ] 680 681 # Add all terminals to Terminals 682 i = 0 683 while i < len(p.prod): 684 t = p.prod[i] 685 if t == '%prec': 686 try: 687 precname = p.prod[i+1] 688 except IndexError: 689 sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line)) 690 return -1 691 692 prec = Precedence.get(precname,None) 693 if not prec: 694 sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname)) 695 return -1 696 else: 697 p.prec = prec 698 del p.prod[i] 699 del p.prod[i] 700 continue 701 702 if Terminals.has_key(t): 703 Terminals[t].append(p.number) 704 # Is a terminal. We'll assign a precedence to p based on this 705 if not hasattr(p,"prec"): 706 p.prec = Precedence.get(t,('right',0)) 707 else: 708 if not Nonterminals.has_key(t): 709 Nonterminals[t] = [ ] 710 Nonterminals[t].append(p.number) 711 i += 1 712 713 if not hasattr(p,"prec"): 714 p.prec = ('right',0) 715 716 # Set final length of productions 717 p.len = len(p.prod) 718 p.prod = tuple(p.prod) 719 720 # Calculate unique syms in the production 721 p.usyms = [ ] 722 for s in p.prod: 723 if s not in p.usyms: 724 p.usyms.append(s) 725 726 # Add to the global productions list 727 try: 728 Prodnames[p.name].append(p) 729 except KeyError: 730 Prodnames[p.name] = [ p ] 731 return 0
732 733 # Given a raw rule function, this function rips out its doc string 734 # and adds rules to the grammar 735
736 -def add_function(f):
737 line = f.func_code.co_firstlineno 738 file = f.func_code.co_filename 739 error = 0 740 741 if isinstance(f,types.MethodType): 742 reqdargs = 2 743 else: 744 reqdargs = 1 745 746 if f.func_code.co_argcount > reqdargs: 747 sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__)) 748 return -1 749 750 if f.func_code.co_argcount < reqdargs: 751 sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__)) 752 return -1 753 754 if f.__doc__: 755 # Split the doc string into lines 756 pstrings = f.__doc__.splitlines() 757 lastp = None 758 dline = line 759 for ps in pstrings: 760 dline += 1 761 p = ps.split() 762 if not p: continue 763 try: 764 if p[0] == '|': 765 # This is a continuation of a previous rule 766 if not lastp: 767 sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline)) 768 return -1 769 prodname = lastp 770 if len(p) > 1: 771 syms = p[1:] 772 else: 773 syms = [ ] 774 else: 775 prodname = p[0] 776 lastp = prodname 777 assign = p[1] 778 if len(p) > 2: 779 syms = p[2:] 780 else: 781 syms = [ ] 782 if assign != ':' and assign != '::=': 783 sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline)) 784 return -1 785 786 787 e = add_production(f,file,dline,prodname,syms) 788 error += e 789 790 791 except StandardError: 792 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps)) 793 error -= 1 794 else: 795 sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__)) 796 return error
797 798 799 # Cycle checking code (Michael Dyck) 800
801 -def compute_reachable():
802 ''' 803 Find each symbol that can be reached from the start symbol. 804 Print a warning for any nonterminals that can't be reached. 805 (Unused terminals have already had their warning.) 806 ''' 807 Reachable = { } 808 for s in Terminals.keys() + Nonterminals.keys(): 809 Reachable[s] = 0 810 811 mark_reachable_from( Productions[0].prod[0], Reachable ) 812 813 for s in Nonterminals.keys(): 814 if not Reachable[s]: 815 sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s)
816
817 -def mark_reachable_from(s, Reachable):
818 ''' 819 Mark all symbols that are reachable from symbol s. 820 ''' 821 if Reachable[s]: 822 # We've already reached symbol s. 823 return 824 Reachable[s] = 1 825 for p in Prodnames.get(s,[]): 826 for r in p.prod: 827 mark_reachable_from(r, Reachable)
828 829 # ----------------------------------------------------------------------------- 830 # compute_terminates() 831 # 832 # This function looks at the various parsing rules and tries to detect 833 # infinite recursion cycles (grammar rules where there is no possible way 834 # to derive a string of only terminals). 835 # -----------------------------------------------------------------------------
836 -def compute_terminates():
837 ''' 838 Raise an error for any symbols that don't terminate. 839 ''' 840 Terminates = {} 841 842 # Terminals: 843 for t in Terminals.keys(): 844 Terminates[t] = 1 845 846 Terminates['$end'] = 1 847 848 # Nonterminals: 849 850 # Initialize to false: 851 for n in Nonterminals.keys(): 852 Terminates[n] = 0 853 854 # Then propagate termination until no change: 855 while 1: 856 some_change = 0 857 for (n,pl) in Prodnames.items(): 858 # Nonterminal n terminates iff any of its productions terminates. 859 for p in pl: 860 # Production p terminates iff all of its rhs symbols terminate. 861 for s in p.prod: 862 if not Terminates[s]: 863 # The symbol s does not terminate, 864 # so production p does not terminate. 865 p_terminates = 0 866 break 867 else: 868 # didn't break from the loop, 869 # so every symbol s terminates 870 # so production p terminates. 871 p_terminates = 1 872 873 if p_terminates: 874 # symbol n terminates! 875 if not Terminates[n]: 876 Terminates[n] = 1 877 some_change = 1 878 # Don't need to consider any more productions for this n. 879 break 880 881 if not some_change: 882 break 883 884 some_error = 0 885 for (s,terminates) in Terminates.items(): 886 if not terminates: 887 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': 888 # s is used-but-not-defined, and we've already warned of that, 889 # so it would be overkill to say that it's also non-terminating. 890 pass 891 else: 892 sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s) 893 some_error = 1 894 895 return some_error
896 897 # ----------------------------------------------------------------------------- 898 # verify_productions() 899 # 900 # This function examines all of the supplied rules to see if they seem valid. 901 # -----------------------------------------------------------------------------
902 -def verify_productions(cycle_check=1):
903 error = 0 904 for p in Productions: 905 if not p: continue 906 907 for s in p.prod: 908 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error': 909 sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s)) 910 error = 1 911 continue 912 913 unused_tok = 0 914 # Now verify all of the tokens 915 if yaccdebug: 916 _vf.write("Unused terminals:\n\n") 917 for s,v in Terminals.items(): 918 if s != 'error' and not v: 919 sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s) 920 if yaccdebug: _vf.write(" %s\n"% s) 921 unused_tok += 1 922 923 # Print out all of the productions 924 if yaccdebug: 925 _vf.write("\nGrammar\n\n") 926 for i in range(1,len(Productions)): 927 _vf.write("Rule %-5d %s\n" % (i, Productions[i])) 928 929 unused_prod = 0 930 # Verify the use of all productions 931 for s,v in Nonterminals.items(): 932 if not v: 933 p = Prodnames[s][0] 934 # sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s)) 935 unused_prod += 1 936 937 938 if unused_tok == 1: 939 sys.stderr.write("yacc: Warning. There is 1 unused token.\n") 940 if unused_tok > 1: 941 sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok) 942 943 # if unused_prod == 1: 944 # sys.stderr.write("yacc: Warning. There is 1 unused rule.\n") 945 # if unused_prod > 1: 946 # sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod) 947 948 if yaccdebug: 949 _vf.write("\nTerminals, with rules where they appear\n\n") 950 ks = Terminals.keys() 951 ks.sort() 952 for k in ks: 953 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]]))) 954 _vf.write("\nNonterminals, with rules where they appear\n\n") 955 ks = Nonterminals.keys() 956 ks.sort() 957 for k in ks: 958 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]]))) 959 960 if (cycle_check): 961 compute_reachable() 962 error += compute_terminates() 963 # error += check_cycles() 964 return error
965 966 # ----------------------------------------------------------------------------- 967 # build_lritems() 968 # 969 # This function walks the list of productions and builds a complete set of the 970 # LR items. The LR items are stored in two ways: First, they are uniquely 971 # numbered and placed in the list _lritems. Second, a linked list of LR items 972 # is built for each production. For example: 973 # 974 # E -> E PLUS E 975 # 976 # Creates the list 977 # 978 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 979 # ----------------------------------------------------------------------------- 980
981 -def build_lritems():
982 for p in Productions: 983 lastlri = p 984 lri = p.lr_item(0) 985 i = 0 986 while 1: 987 lri = p.lr_item(i) 988 lastlri.lr_next = lri 989 if not lri: break 990 lri.lr_num = len(LRitems) 991 LRitems.append(lri) 992 lastlri = lri 993 i += 1
994 995 # In order for the rest of the parser generator to work, we need to 996 # guarantee that no more lritems are generated. Therefore, we nuke 997 # the p.lr_item method. (Only used in debugging) 998 # Production.lr_item = None 999 1000 # ----------------------------------------------------------------------------- 1001 # add_precedence() 1002 # 1003 # Given a list of precedence rules, add to the precedence table. 1004 # ----------------------------------------------------------------------------- 1005
1006 -def add_precedence(plist):
1007 plevel = 0 1008 error = 0 1009 for p in plist: 1010 plevel += 1 1011 try: 1012 prec = p[0] 1013 terms = p[1:] 1014 if prec != 'left' and prec != 'right' and prec != 'nonassoc': 1015 sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec) 1016 return -1 1017 for t in terms: 1018 if Precedence.has_key(t): 1019 sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t) 1020 error += 1 1021 continue 1022 Precedence[t] = (prec,plevel) 1023 except: 1024 sys.stderr.write("yacc: Invalid precedence table.\n") 1025 error += 1 1026 1027 return error
1028 1029 # ----------------------------------------------------------------------------- 1030 # augment_grammar() 1031 # 1032 # Compute the augmented grammar. This is just a rule S' -> start where start 1033 # is the starting symbol. 1034 # ----------------------------------------------------------------------------- 1035
1036 -def augment_grammar(start=None):
1037 if not start: 1038 start = Productions[1].name 1039 Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None) 1040 Productions[0].usyms = [ start ] 1041 Nonterminals[start].append(0)
1042 1043 1044 # ------------------------------------------------------------------------- 1045 # first() 1046 # 1047 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1048 # 1049 # During execution of compute_first1, the result may be incomplete. 1050 # Afterward (e.g., when called from compute_follow()), it will be complete. 1051 # -------------------------------------------------------------------------
1052 -def first(beta):
1053 1054 # We are computing First(x1,x2,x3,...,xn) 1055 result = [ ] 1056 for x in beta: 1057 x_produces_empty = 0 1058 1059 # Add all the non-<empty> symbols of First[x] to the result. 1060 for f in First[x]: 1061 if f == '<empty>': 1062 x_produces_empty = 1 1063 else: 1064 if f not in result: result.append(f) 1065 1066 if x_produces_empty: 1067 # We have to consider the next x in beta, 1068 # i.e. stay in the loop. 1069 pass 1070 else: 1071 # We don't have to consider any further symbols in beta. 1072 break 1073 else: 1074 # There was no 'break' from the loop, 1075 # so x_produces_empty was true for all x in beta, 1076 # so beta produces empty as well. 1077 result.append('<empty>') 1078 1079 return result
1080 1081 1082 # FOLLOW(x) 1083 # Given a non-terminal. This function computes the set of all symbols 1084 # that might follow it. Dragon book, p. 189. 1085
1086 -def compute_follow(start=None):
1087 # Add '$end' to the follow list of the start symbol 1088 for k in Nonterminals.keys(): 1089 Follow[k] = [ ] 1090 1091 if not start: 1092 start = Productions[1].name 1093 1094 Follow[start] = [ '$end' ] 1095 1096 while 1: 1097 didadd = 0 1098 for p in Productions[1:]: 1099 # Here is the production set 1100 for i in range(len(p.prod)): 1101 B = p.prod[i] 1102 if Nonterminals.has_key(B): 1103 # Okay. We got a non-terminal in a production 1104 fst = first(p.prod[i+1:]) 1105 hasempty = 0 1106 for f in fst: 1107 if f != '<empty>' and f not in Follow[B]: 1108 Follow[B].append(f) 1109 didadd = 1 1110 if f == '<empty>': 1111 hasempty = 1 1112 if hasempty or i == (len(p.prod)-1): 1113 # Add elements of follow(a) to follow(b) 1114 for f in Follow[p.name]: 1115 if f not in Follow[B]: 1116 Follow[B].append(f) 1117 didadd = 1 1118 if not didadd: break 1119 1120 if 0 and yaccdebug: 1121 _vf.write('\nFollow:\n') 1122 for k in Nonterminals.keys(): 1123 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1124 1125 # ------------------------------------------------------------------------- 1126 # compute_first1() 1127 # 1128 # Compute the value of FIRST1(X) for all symbols 1129 # -------------------------------------------------------------------------
1130 -def compute_first1():
1131 1132 # Terminals: 1133 for t in Terminals.keys(): 1134 First[t] = [t] 1135 1136 First['$end'] = ['$end'] 1137 First['#'] = ['#'] # what's this for? 1138 1139 # Nonterminals: 1140 1141 # Initialize to the empty set: 1142 for n in Nonterminals.keys(): 1143 First[n] = [] 1144 1145 # Then propagate symbols until no change: 1146 while 1: 1147 some_change = 0 1148 for n in Nonterminals.keys(): 1149 for p in Prodnames[n]: 1150 for f in first(p.prod): 1151 if f not in First[n]: 1152 First[n].append( f ) 1153 some_change = 1 1154 if not some_change: 1155 break 1156 1157 if 0 and yaccdebug: 1158 _vf.write('\nFirst:\n') 1159 for k in Nonterminals.keys(): 1160 _vf.write("%-20s : %s\n" % 1161 (k, " ".join([str(s) for s in First[k]])))
1162 1163 # ----------------------------------------------------------------------------- 1164 # === SLR Generation === 1165 # 1166 # The following functions are used to construct SLR (Simple LR) parsing tables 1167 # as described on p.221-229 of the dragon book. 1168 # ----------------------------------------------------------------------------- 1169 1170 # Global variables for the LR parsing engine
1171 -def lr_init_vars():
1172 global _lr_action, _lr_goto, _lr_method 1173 global _lr_goto_cache, _lr0_cidhash 1174 1175 _lr_action = { } # Action table 1176 _lr_goto = { } # Goto table 1177 _lr_method = "Unknown" # LR method used 1178 _lr_goto_cache = { } 1179 _lr0_cidhash = { }
1180 1181 1182 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1183 # prodlist is a list of productions. 1184 1185 _add_count = 0 # Counter used to detect cycles 1186
1187 -def lr0_closure(I):
1188 global _add_count 1189 1190 _add_count += 1 1191 prodlist = Productions 1192 1193 # Add everything in I to J 1194 J = I[:] 1195 didadd = 1 1196 while didadd: 1197 didadd = 0 1198 for j in J: 1199 for x in j.lrafter: 1200 if x.lr0_added == _add_count: continue 1201 # Add B --> .G to J 1202 J.append(x.lr_next) 1203 x.lr0_added = _add_count 1204 didadd = 1 1205 1206 return J
1207 1208 # Compute the LR(0) goto function goto(I,X) where I is a set 1209 # of LR(0) items and X is a grammar symbol. This function is written 1210 # in a way that guarantees uniqueness of the generated goto sets 1211 # (i.e. the same goto set will never be returned as two different Python 1212 # objects). With uniqueness, we can later do fast set comparisons using 1213 # id(obj) instead of element-wise comparison. 1214
1215 -def lr0_goto(I,x):
1216 # First we look for a previously cached entry 1217 g = _lr_goto_cache.get((id(I),x),None) 1218 if g: return g 1219 1220 # Now we generate the goto set in a way that guarantees uniqueness 1221 # of the result 1222 1223 s = _lr_goto_cache.get(x,None) 1224 if not s: 1225 s = { } 1226 _lr_goto_cache[x] = s 1227 1228 gs = [ ] 1229 for p in I: 1230 n = p.lr_next 1231 if n and n.lrbefore == x: 1232 s1 = s.get(id(n),None) 1233 if not s1: 1234 s1 = { } 1235 s[id(n)] = s1 1236 gs.append(n) 1237 s = s1 1238 g = s.get('$end',None) 1239 if not g: 1240 if gs: 1241 g = lr0_closure(gs) 1242 s['$end'] = g 1243 else: 1244 s['$end'] = gs 1245 _lr_goto_cache[(id(I),x)] = g 1246 return g
1247 1248 _lr0_cidhash = { } 1249 1250 # Compute the LR(0) sets of item function
1251 -def lr0_items():
1252 1253 C = [ lr0_closure([Productions[0].lr_next]) ] 1254 i = 0 1255 for I in C: 1256 _lr0_cidhash[id(I)] = i 1257 i += 1 1258 1259 # Loop over the items in C and each grammar symbols 1260 i = 0 1261 while i < len(C): 1262 I = C[i] 1263 i += 1 1264 1265 # Collect all of the symbols that could possibly be in the goto(I,X) sets 1266 asyms = { } 1267 for ii in I: 1268 for s in ii.usyms: 1269 asyms[s] = None 1270 1271 for x in asyms.keys(): 1272 g = lr0_goto(I,x) 1273 if not g: continue 1274 if _lr0_cidhash.has_key(id(g)): continue 1275 _lr0_cidhash[id(g)] = len(C) 1276 C.append(g) 1277 1278 return C
1279 1280 # ----------------------------------------------------------------------------- 1281 # ==== LALR(1) Parsing ==== 1282 # 1283 # LALR(1) parsing is almost exactly the same as SLR except that instead of 1284 # relying upon Follow() sets when performing reductions, a more selective 1285 # lookahead set that incorporates the state of the LR(0) machine is utilized. 1286 # Thus, we mainly just have to focus on calculating the lookahead sets. 1287 # 1288 # The method used here is due to DeRemer and Pennelo (1982). 1289 # 1290 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 1291 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 1292 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 1293 # 1294 # Further details can also be found in: 1295 # 1296 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 1297 # McGraw-Hill Book Company, (1985). 1298 # 1299 # Note: This implementation is a complete replacement of the LALR(1) 1300 # implementation in PLY-1.x releases. That version was based on 1301 # a less efficient algorithm and it had bugs in its implementation. 1302 # ----------------------------------------------------------------------------- 1303 1304 # ----------------------------------------------------------------------------- 1305 # compute_nullable_nonterminals() 1306 # 1307 # Creates a dictionary containing all of the non-terminals that might produce 1308 # an empty production. 1309 # ----------------------------------------------------------------------------- 1310
1311 -def compute_nullable_nonterminals():
1312 nullable = {} 1313 num_nullable = 0 1314 while 1: 1315 for p in Productions[1:]: 1316 if p.len == 0: 1317 nullable[p.name] = 1 1318 continue 1319 for t in p.prod: 1320 if not nullable.has_key(t): break 1321 else: 1322 nullable[p.name] = 1 1323 if len(nullable) == num_nullable: break 1324 num_nullable = len(nullable) 1325 return nullable
1326 1327 # ----------------------------------------------------------------------------- 1328 # find_nonterminal_trans(C) 1329 # 1330 # Given a set of LR(0) items, this functions finds all of the non-terminal 1331 # transitions. These are transitions in which a dot appears immediately before 1332 # a non-terminal. Returns a list of tuples of the form (state,N) where state 1333 # is the state number and N is the nonterminal symbol. 1334 # 1335 # The input C is the set of LR(0) items. 1336 # ----------------------------------------------------------------------------- 1337
1338 -def find_nonterminal_transitions(C):
1339 trans = [] 1340 for state in range(len(C)): 1341 for p in C[state]: 1342 if p.lr_index < p.len - 1: 1343 t = (state,p.prod[p.lr_index+1]) 1344 if Nonterminals.has_key(t[1]): 1345 if t not in trans: trans.append(t) 1346 state = state + 1 1347 return trans
1348 1349 # ----------------------------------------------------------------------------- 1350 # dr_relation() 1351 # 1352 # Computes the DR(p,A) relationships for non-terminal transitions. The input 1353 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 1354 # 1355 # Returns a list of terminals. 1356 # ----------------------------------------------------------------------------- 1357
1358 -def dr_relation(C,trans,nullable):
1359 dr_set = { } 1360 state,N = trans 1361 terms = [] 1362 1363 g = lr0_goto(C[state],N) 1364 for p in g: 1365 if p.lr_index < p.len - 1: 1366 a = p.prod[p.lr_index+1] 1367 if Terminals.has_key(a): 1368 if a not in terms: terms.append(a) 1369 1370 # This extra bit is to handle the start state 1371 if state == 0 and N == Productions[0].prod[0]: 1372 terms.append('$end') 1373 1374 return terms
1375 1376 # ----------------------------------------------------------------------------- 1377 # reads_relation() 1378 # 1379 # Computes the READS() relation (p,A) READS (t,C). 1380 # ----------------------------------------------------------------------------- 1381
1382 -def reads_relation(C, trans, empty):
1383 # Look for empty transitions 1384 rel = [] 1385 state, N = trans 1386 1387 g = lr0_goto(C[state],N) 1388 j = _lr0_cidhash.get(id(g),-1) 1389 for p in g: 1390 if p.lr_index < p.len - 1: 1391 a = p.prod[p.lr_index + 1] 1392 if empty.has_key(a): 1393 rel.append((j,a)) 1394 1395 return rel
1396 1397 # ----------------------------------------------------------------------------- 1398 # compute_lookback_includes() 1399 # 1400 # Determines the lookback and includes relations 1401 # 1402 # LOOKBACK: 1403 # 1404 # This relation is determined by running the LR(0) state machine forward. 1405 # For example, starting with a production "N : . A B C", we run it forward 1406 # to obtain "N : A B C ." We then build a relationship between this final 1407 # state and the starting state. These relationships are stored in a dictionary 1408 # lookdict. 1409 # 1410 # INCLUDES: 1411 # 1412 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 1413 # 1414 # This relation is used to determine non-terminal transitions that occur 1415 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 1416 # if the following holds: 1417 # 1418 # B -> LAT, where T -> epsilon and p' -L-> p 1419 # 1420 # L is essentially a prefix (which may be empty), T is a suffix that must be 1421 # able to derive an empty string. State p' must lead to state p with the string L. 1422 # 1423 # ----------------------------------------------------------------------------- 1424
1425 -def compute_lookback_includes(C,trans,nullable):
1426 1427 lookdict = {} # Dictionary of lookback relations 1428 includedict = {} # Dictionary of include relations 1429 1430 # Make a dictionary of non-terminal transitions 1431 dtrans = {} 1432 for t in trans: 1433 dtrans[t] = 1 1434 1435 # Loop over all transitions and compute lookbacks and includes 1436 for state,N in trans: 1437 lookb = [] 1438 includes = [] 1439 for p in C[state]: 1440 if p.name != N: continue 1441 1442 # Okay, we have a name match. We now follow the production all the way 1443 # through the state machine until we get the . on the right hand side 1444 1445 lr_index = p.lr_index 1446 j = state 1447 while lr_index < p.len - 1: 1448 lr_index = lr_index + 1 1449 t = p.prod[lr_index] 1450 1451 # Check to see if this symbol and state are a non-terminal transition 1452 if dtrans.has_key((j,t)): 1453 # Yes. Okay, there is some chance that this is an includes relation 1454 # the only way to know for certain is whether the rest of the 1455 # production derives empty 1456 1457 li = lr_index + 1 1458 while li < p.len: 1459 if Terminals.has_key(p.prod[li]): break # No forget it 1460 if not nullable.has_key(p.prod[li]): break 1461 li = li + 1 1462 else: 1463 # Appears to be a relation between (j,t) and (state,N) 1464 includes.append((j,t)) 1465 1466 g = lr0_goto(C[j],t) # Go to next set 1467 j = _lr0_cidhash.get(id(g),-1) # Go to next state 1468 1469 # When we get here, j is the final state, now we have to locate the production 1470 for r in C[j]: 1471 if r.name != p.name: continue 1472 if r.len != p.len: continue 1473 i = 0 1474 # This look is comparing a production ". A B C" with "A B C ." 1475 while i < r.lr_index: 1476 if r.prod[i] != p.prod[i+1]: break 1477 i = i + 1 1478 else: 1479 lookb.append((j,r)) 1480 for i in includes: 1481 if not includedict.has_key(i): includedict[i] = [] 1482 includedict[i].append((state,N)) 1483 lookdict[(state,N)] = lookb 1484 1485 return lookdict,includedict
1486 1487 # ----------------------------------------------------------------------------- 1488 # digraph() 1489 # traverse() 1490 # 1491 # The following two functions are used to compute set valued functions 1492 # of the form: 1493 # 1494 # F(x) = F'(x) U U{F(y) | x R y} 1495 # 1496 # This is used to compute the values of Read() sets as well as FOLLOW sets 1497 # in LALR(1) generation. 1498 # 1499 # Inputs: X - An input set 1500 # R - A relation 1501 # FP - Set-valued function 1502 # ------------------------------------------------------------------------------ 1503
1504 -def digraph(X,R,FP):
1505 N = { } 1506 for x in X: 1507 N[x] = 0 1508 stack = [] 1509 F = { } 1510 for x in X: 1511 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1512 return F
1513
1514 -def traverse(x,N,stack,F,X,R,FP):
1515 stack.append(x) 1516 d = len(stack) 1517 N[x] = d 1518 F[x] = FP(x) # F(X) <- F'(x) 1519 1520 rel = R(x) # Get y's related to x 1521 for y in rel: 1522 if N[y] == 0: 1523 traverse(y,N,stack,F,X,R,FP) 1524 N[x] = min(N[x],N[y]) 1525 for a in F.get(y,[]): 1526 if a not in F[x]: F[x].append(a) 1527 if N[x] == d: 1528 N[stack[-1]] = sys.maxint 1529 F[stack[-1]] = F[x] 1530 element = stack.pop() 1531 while element != x: 1532 N[stack[-1]] = sys.maxint 1533 F[stack[-1]] = F[x] 1534 element = stack.pop()
1535 1536 # ----------------------------------------------------------------------------- 1537 # compute_read_sets() 1538 # 1539 # Given a set of LR(0) items, this function computes the read sets. 1540 # 1541 # Inputs: C = Set of LR(0) items 1542 # ntrans = Set of nonterminal transitions 1543 # nullable = Set of empty transitions 1544 # 1545 # Returns a set containing the read sets 1546 # ----------------------------------------------------------------------------- 1547
1548 -def compute_read_sets(C, ntrans, nullable):
1549 FP = lambda x: dr_relation(C,x,nullable) 1550 R = lambda x: reads_relation(C,x,nullable) 1551 F = digraph(ntrans,R,FP) 1552 return F
1553 1554 # ----------------------------------------------------------------------------- 1555 # compute_follow_sets() 1556 # 1557 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 1558 # and an include set, this function computes the follow sets 1559 # 1560 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 1561 # 1562 # Inputs: 1563 # ntrans = Set of nonterminal transitions 1564 # readsets = Readset (previously computed) 1565 # inclsets = Include sets (previously computed) 1566 # 1567 # Returns a set containing the follow sets 1568 # ----------------------------------------------------------------------------- 1569
1570 -def compute_follow_sets(ntrans,readsets,inclsets):
1571 FP = lambda x: readsets[x] 1572 R = lambda x: inclsets.get(x,[]) 1573 F = digraph(ntrans,R,FP) 1574 return F
1575 1576 # ----------------------------------------------------------------------------- 1577 # add_lookaheads() 1578 # 1579 # Attaches the lookahead symbols to grammar rules. 1580 # 1581 # Inputs: lookbacks - Set of lookback relations 1582 # followset - Computed follow set 1583 # 1584 # This function directly attaches the lookaheads to productions contained 1585 # in the lookbacks set 1586 # ----------------------------------------------------------------------------- 1587
1588 -def add_lookaheads(lookbacks,followset):
1589 for trans,lb in lookbacks.items(): 1590 # Loop over productions in lookback 1591 for state,p in lb: 1592 if not p.lookaheads.has_key(state): 1593 p.lookaheads[state] = [] 1594 f = followset.get(trans,[]) 1595 for a in f: 1596 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
1597 1598 # ----------------------------------------------------------------------------- 1599 # add_lalr_lookaheads() 1600 # 1601 # This function does all of the work of adding lookahead information for use 1602 # with LALR parsing 1603 # ----------------------------------------------------------------------------- 1604
1605 -def add_lalr_lookaheads(C):
1606 # Determine all of the nullable nonterminals 1607 nullable = compute_nullable_nonterminals() 1608 1609 # Find all non-terminal transitions 1610 trans = find_nonterminal_transitions(C) 1611 1612 # Compute read sets 1613 readsets = compute_read_sets(C,trans,nullable) 1614 1615 # Compute lookback/includes relations 1616 lookd, included = compute_lookback_includes(C,trans,nullable) 1617 1618 # Compute LALR FOLLOW sets 1619 followsets = compute_follow_sets(trans,readsets,included) 1620 1621 # Add all of the lookaheads 1622 add_lookaheads(lookd,followsets)
1623 1624 # ----------------------------------------------------------------------------- 1625 # lr_parse_table() 1626 # 1627 # This function constructs the parse tables for SLR or LALR 1628 # -----------------------------------------------------------------------------
1629 -def lr_parse_table(method):
1630 global _lr_method 1631 goto = _lr_goto # Goto array 1632 action = _lr_action # Action array 1633 actionp = { } # Action production array (temporary) 1634 1635 _lr_method = method 1636 1637 n_srconflict = 0 1638 n_rrconflict = 0 1639 1640 if yaccdebug: 1641 sys.stderr.write("yacc: Generating %s parsing table...\n" % method) 1642 _vf.write("\n\nParsing method: %s\n\n" % method) 1643 1644 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 1645 # This determines the number of states 1646 1647 C = lr0_items() 1648 1649 if method == 'LALR': 1650 add_lalr_lookaheads(C) 1651 1652 1653 # Build the parser table, state by state 1654 st = 0 1655 for I in C: 1656 # Loop over each production in I 1657 actlist = [ ] # List of actions 1658 st_action = { } 1659 st_actionp = { } 1660 st_goto = { } 1661 if yaccdebug: 1662 _vf.write("\nstate %d\n\n" % st) 1663 for p in I: 1664 _vf.write(" (%d) %s\n" % (p.number, str(p))) 1665 _vf.write("\n") 1666 1667 for p in I: 1668 try: 1669 if p.len == p.lr_index + 1: 1670 if p.name == "S'": 1671 # Start symbol. Accept! 1672 st_action["$end"] = 0 1673 st_actionp["$end"] = p 1674 else: 1675 # We are at the end of a production. Reduce! 1676 if method == 'LALR': 1677 laheads = p.lookaheads[st] 1678 else: 1679 laheads = Follow[p.name] 1680 for a in laheads: 1681 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 1682 r = st_action.get(a,None) 1683 if r is not None: 1684 # Whoa. Have a shift/reduce or reduce/reduce conflict 1685 if r > 0: 1686 # Need to decide on shift or reduce here 1687 # By default we favor shifting. Need to add 1688 # some precedence rules here. 1689 sprec,slevel = Productions[st_actionp[a].number].prec 1690 rprec,rlevel = Precedence.get(a,('right',0)) 1691 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 1692 # We really need to reduce here. 1693 st_action[a] = -p.number 1694 st_actionp[a] = p 1695 if not slevel and not rlevel: 1696 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) 1697 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) 1698 n_srconflict += 1 1699 elif (slevel == rlevel) and (rprec == 'nonassoc'): 1700 st_action[a] = None 1701 else: 1702 # Hmmm. Guess we'll keep the shift 1703 if not rlevel: 1704 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) 1705 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) 1706 n_srconflict +=1 1707 elif r < 0: 1708 # Reduce/reduce conflict. In this case, we favor the rule 1709 # that was defined first in the grammar file 1710 oldp = Productions[-r] 1711 pp = Productions[p.number] 1712 if oldp.line > pp.line: 1713 st_action[a] = -p.number 1714 st_actionp[a] = p 1715 # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st) 1716 n_rrconflict += 1 1717 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, st_actionp[a].number, st_actionp[a])) 1718 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,st_actionp[a].number, st_actionp[a])) 1719 else: 1720 sys.stderr.write("Unknown conflict in state %d\n" % st) 1721 else: 1722 st_action[a] = -p.number 1723 st_actionp[a] = p 1724 else: 1725 i = p.lr_index 1726 a = p.prod[i+1] # Get symbol right after the "." 1727 if Terminals.has_key(a): 1728 g = lr0_goto(I,a) 1729 j = _lr0_cidhash.get(id(g),-1) 1730 if j >= 0: 1731 # We are in a shift state 1732 actlist.append((a,p,"shift and go to state %d" % j)) 1733 r = st_action.get(a,None) 1734 if r is not None: 1735 # Whoa have a shift/reduce or shift/shift conflict 1736 if r > 0: 1737 if r != j: 1738 sys.stderr.write("Shift/shift conflict in state %d\n" % st) 1739 elif r < 0: 1740 # Do a precedence check. 1741 # - if precedence of reduce rule is higher, we reduce. 1742 # - if precedence of reduce is same and left assoc, we reduce. 1743 # - otherwise we shift 1744 rprec,rlevel = Productions[st_actionp[a].number].prec 1745 sprec,slevel = Precedence.get(a,('right',0)) 1746 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')): 1747 # We decide to shift here... highest precedence to shift 1748 st_action[a] = j 1749 st_actionp[a] = p 1750 if not rlevel: 1751 n_srconflict += 1 1752 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st) 1753 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a) 1754 elif (slevel == rlevel) and (rprec == 'nonassoc'): 1755 st_action[a] = None 1756 else: 1757 # Hmmm. Guess we'll keep the reduce 1758 if not slevel and not rlevel: 1759 n_srconflict +=1 1760 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st) 1761 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a) 1762 1763 else: 1764 sys.stderr.write("Unknown conflict in state %d\n" % st) 1765 else: 1766 st_action[a] = j 1767 st_actionp[a] = p 1768 1769 except StandardError,e: 1770 print sys.exc_info() 1771 raise YaccError, "Hosed in lr_parse_table" 1772 1773 # Print the actions associated with each terminal 1774 if yaccdebug: 1775 _actprint = { } 1776 for a,p,m in actlist: 1777 if st_action.has_key(a): 1778 if p is st_actionp[a]: 1779 _vf.write(" %-15s %s\n" % (a,m)) 1780 _actprint[(a,m)] = 1 1781 _vf.write("\n") 1782 for a,p,m in actlist: 1783 if st_action.has_key(a): 1784 if p is not st_actionp[a]: 1785 if not _actprint.has_key((a,m)): 1786 _vf.write(" ! %-15s [ %s ]\n" % (a,m)) 1787 _actprint[(a,m)] = 1 1788 1789 # Construct the goto table for this state 1790 if yaccdebug: 1791 _vf.write("\n") 1792 nkeys = { } 1793 for ii in I: 1794 for s in ii.usyms: 1795 if Nonterminals.has_key(s): 1796 nkeys[s] = None 1797 for n in nkeys.keys(): 1798 g = lr0_goto(I,n) 1799 j = _lr0_cidhash.get(id(g),-1) 1800 if j >= 0: 1801 st_goto[n] = j 1802 if yaccdebug: 1803 _vf.write(" %-30s shift and go to state %d\n" % (n,j)) 1804 1805 action[st] = st_action 1806 actionp[st] = st_actionp 1807 goto[st] = st_goto 1808 1809 st += 1 1810 1811 if yaccdebug: 1812 if n_srconflict == 1: 1813 sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict) 1814 if n_srconflict > 1: 1815 sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict) 1816 if n_rrconflict == 1: 1817 sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict) 1818 if n_rrconflict > 1: 1819 sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
1820 1821 # ----------------------------------------------------------------------------- 1822 # ==== LR Utility functions ==== 1823 # ----------------------------------------------------------------------------- 1824 1825 # ----------------------------------------------------------------------------- 1826 # _lr_write_tables() 1827 # 1828 # This function writes the LR parsing tables to a file 1829 # ----------------------------------------------------------------------------- 1830
1831 -def lr_write_tables(modulename=tab_module,outputdir=''):
1832 filename = os.path.join(outputdir,modulename) + ".py" 1833 try: 1834 f = open(filename,"w") 1835 1836 f.write(""" 1837 # %s 1838 # This file is automatically generated. Do not edit. 1839 1840 _lr_method = %s 1841 1842 _lr_signature = %s 1843 """ % (filename, repr(_lr_method), repr(Signature.digest()))) 1844 1845 # Change smaller to 0 to go back to original tables 1846 smaller = 1 1847 1848 # Factor out names to try and make smaller 1849 if smaller: 1850 items = { } 1851 1852 for s,nd in _lr_action.items(): 1853 for name,v in nd.items(): 1854 i = items.get(name) 1855 if not i: 1856 i = ([],[]) 1857 items[name] = i 1858 i[0].append(s) 1859 i[1].append(v) 1860 1861 f.write("\n_lr_action_items = {") 1862 for k,v in items.items(): 1863 f.write("%r:([" % k) 1864 for i in v[0]: 1865 f.write("%r," % i) 1866 f.write("],[") 1867 for i in v[1]: 1868 f.write("%r," % i) 1869 1870 f.write("]),") 1871 f.write("}\n") 1872 1873 f.write(""" 1874 _lr_action = { } 1875 for _k, _v in _lr_action_items.items(): 1876 for _x,_y in zip(_v[0],_v[1]): 1877 if not _lr_action.has_key(_x): _lr_action[_x] = { } 1878 _lr_action[_x][_k] = _y 1879 del _lr_action_items 1880 """) 1881 1882 else: 1883 f.write("\n_lr_action = { "); 1884 for k,v in _lr_action.items(): 1885 f.write("(%r,%r):%r," % (k[0],k[1],v)) 1886 f.write("}\n"); 1887 1888 if smaller: 1889 # Factor out names to try and make smaller 1890 items = { } 1891 1892 for s,nd in _lr_goto.items(): 1893 for name,v in nd.items(): 1894 i = items.get(name) 1895 if not i: 1896 i = ([],[]) 1897 items[name] = i 1898 i[0].append(s) 1899 i[1].append(v) 1900 1901 f.write("\n_lr_goto_items = {") 1902 for k,v in items.items(): 1903 f.write("%r:([" % k) 1904 for i in v[0]: 1905 f.write("%r," % i) 1906 f.write("],[") 1907 for i in v[1]: 1908 f.write("%r," % i) 1909 1910 f.write("]),") 1911 f.write("}\n") 1912 1913 f.write(""" 1914 _lr_goto = { } 1915 for _k, _v in _lr_goto_items.items(): 1916 for _x,_y in zip(_v[0],_v[1]): 1917 if not _lr_goto.has_key(_x): _lr_goto[_x] = { } 1918 _lr_goto[_x][_k] = _y 1919 del _lr_goto_items 1920 """) 1921 else: 1922 f.write("\n_lr_goto = { "); 1923 for k,v in _lr_goto.items(): 1924 f.write("(%r,%r):%r," % (k[0],k[1],v)) 1925 f.write("}\n"); 1926 1927 # Write production table 1928 f.write("_lr_productions = [\n") 1929 for p in Productions: 1930 if p: 1931 if (p.func): 1932 f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line)) 1933 else: 1934 f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len)) 1935 else: 1936 f.write(" None,\n") 1937 f.write("]\n") 1938 1939 f.close() 1940 1941 except IOError,e: 1942 print >>sys.stderr, "Unable to create '%s'" % filename 1943 print >>sys.stderr, e 1944 return
1945
1946 -def lr_read_tables(module=tab_module,optimize=0):
1947 global _lr_action, _lr_goto, _lr_productions, _lr_method 1948 try: 1949 exec "import %s as parsetab" % module 1950 1951 if (optimize) or (Signature.digest() == parsetab._lr_signature): 1952 _lr_action = parsetab._lr_action 1953 _lr_goto = parsetab._lr_goto 1954 _lr_productions = parsetab._lr_productions 1955 _lr_method = parsetab._lr_method 1956 return 1 1957 else: 1958 return 0 1959 1960 except (ImportError,AttributeError): 1961 return 0
1962 1963 1964 # ----------------------------------------------------------------------------- 1965 # yacc(module) 1966 # 1967 # Build the parser module 1968 # ----------------------------------------------------------------------------- 1969
1970 -def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
1971 global yaccdebug 1972 yaccdebug = debug 1973 1974 initialize_vars() 1975 files = { } 1976 error = 0 1977 1978 1979 # Add parsing method to signature 1980 Signature.update(method) 1981 1982 # If a "module" parameter was supplied, extract its dictionary. 1983 # Note: a module may in fact be an instance as well. 1984 1985 if module: 1986 # User supplied a module object. 1987 if isinstance(module, types.ModuleType): 1988 ldict = module.__dict__ 1989 elif isinstance(module, _INSTANCETYPE): 1990 _items = [(k,getattr(module,k)) for k in dir(module)] 1991 ldict = { } 1992 for i in _items: 1993 ldict[i[0]] = i[1] 1994 else: 1995 raise ValueError,"Expected a module" 1996 1997 else: 1998 # No module given. We might be able to get information from the caller. 1999 # Throw an exception and unwind the traceback to get the globals 2000 2001 try: 2002 raise RuntimeError 2003 except RuntimeError: 2004 e,b,t = sys.exc_info() 2005 f = t.tb_frame 2006 f = f.f_back # Walk out to our calling function 2007 ldict = f.f_globals # Grab its globals dictionary 2008 2009 # Add starting symbol to signature 2010 if not start: 2011 start = ldict.get("start",None) 2012 if start: 2013 Signature.update(start) 2014 2015 # If running in optimized mode. We're going to 2016 2017 if (optimize and lr_read_tables(tabmodule,1)): 2018 # Read parse table 2019 del Productions[:] 2020 for p in _lr_productions: 2021 if not p: 2022 Productions.append(None) 2023 else: 2024 m = MiniProduction() 2025 m.name = p[0] 2026 m.len = p[1] 2027 m.file = p[3] 2028 m.line = p[4] 2029 if p[2]: 2030 m.func = ldict[p[2]] 2031 Productions.append(m) 2032 2033 else: 2034 # Get the tokens map 2035 if (module and isinstance(module,_INSTANCETYPE)): 2036 tokens = getattr(module,"tokens",None) 2037 else: 2038 tokens = ldict.get("tokens",None) 2039 2040 if not tokens: 2041 raise YaccError,"module does not define a list 'tokens'" 2042 if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)): 2043 raise YaccError,"tokens must be a list or tuple." 2044 2045 # Check to see if a requires dictionary is defined. 2046 requires = ldict.get("require",None) 2047 if requires: 2048 if not (isinstance(requires,types.DictType)): 2049 raise YaccError,"require must be a dictionary." 2050 2051 for r,v in requires.items(): 2052 try: 2053 if not (isinstance(v,types.ListType)): 2054 raise TypeError 2055 v1 = [x.split(".") for x in v] 2056 Requires[r] = v1 2057 except StandardError: 2058 print >>sys.stderr, "Invalid specification for rule '%s' in require. Expected a list of strings" % r 2059 2060 2061 # Build the dictionary of terminals. We a record a 0 in the 2062 # dictionary to track whether or not a terminal is actually 2063 # used in the grammar 2064 2065 if 'error' in tokens: 2066 print >>sys.stderr, "yacc: Illegal token 'error'. Is a reserved word." 2067 raise YaccError,"Illegal token name" 2068 2069 for n in tokens: 2070 if Terminals.has_key(n): 2071 print >>sys.stderr, "yacc: Warning. Token '%s' multiply defined." % n 2072 Terminals[n] = [ ] 2073 2074 Terminals['error'] = [ ] 2075 2076 # Get the precedence map (if any) 2077 prec = ldict.get("precedence",None) 2078 if prec: 2079 if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)): 2080 raise YaccError,"precedence must be a list or tuple." 2081 add_precedence(prec) 2082 Signature.update(repr(prec)) 2083 2084 for n in tokens: 2085 if not Precedence.has_key(n): 2086 Precedence[n] = ('right',0) # Default, right associative, 0 precedence 2087 2088 # Look for error handler 2089 ef = ldict.get('p_error',None) 2090 if ef: 2091 if isinstance(ef,types.FunctionType): 2092 ismethod = 0 2093 elif isinstance(ef, types.MethodType): 2094 ismethod = 1 2095 else: 2096 raise YaccError,"'p_error' defined, but is not a function or method." 2097 eline = ef.func_code.co_firstlineno 2098 efile = ef.func_code.co_filename 2099 files[efile] = None 2100 2101 if (ef.func_code.co_argcount != 1+ismethod): 2102 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline) 2103 global Errorfunc 2104 Errorfunc = ef 2105 else: 2106 print >>sys.stderr, "yacc: Warning. no p_error() function is defined." 2107 2108 # Get the list of built-in functions with p_ prefix 2109 symbols = [ldict[f] for f in ldict.keys() 2110 if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_' 2111 and ldict[f].__name__ != 'p_error')] 2112 2113 # Check for non-empty symbols 2114 if len(symbols) == 0: 2115 raise YaccError,"no rules of the form p_rulename are defined." 2116 2117 # Sort the symbols by line number 2118 symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno)) 2119 2120 # Add all of the symbols to the grammar 2121 for f in symbols: 2122 if (add_function(f)) < 0: 2123 error += 1 2124 else: 2125 files[f.func_code.co_filename] = None 2126 2127 # Make a signature of the docstrings 2128 for f in symbols: 2129 if f.__doc__: 2130 Signature.update(f.__doc__) 2131 2132 lr_init_vars() 2133 2134 if error: 2135 raise YaccError,"Unable to construct parser." 2136 2137 if not lr_read_tables(tabmodule): 2138 2139 # Validate files 2140 for filename in files.keys(): 2141 if not validate_file(filename): 2142 error = 1 2143 2144 # Validate dictionary 2145 validate_dict(ldict) 2146 2147 if start and not Prodnames.has_key(start): 2148 raise YaccError,"Bad starting symbol '%s'" % start 2149 2150 augment_grammar(start) 2151 error = verify_productions(cycle_check=check_recursion) 2152 otherfunc = [ldict[f] for f in ldict.keys() 2153 if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')] 2154 2155 if error: 2156 raise YaccError,"Unable to construct parser." 2157 2158 build_lritems() 2159 compute_first1() 2160 compute_follow(start) 2161 2162 if method in ['SLR','LALR']: 2163 lr_parse_table(method) 2164 else: 2165 raise YaccError, "Unknown parsing method '%s'" % method 2166 2167 if write_tables: 2168 lr_write_tables(tabmodule,outputdir) 2169 2170 if yaccdebug: 2171 try: 2172 f = open(os.path.join(outputdir,debugfile),"w") 2173 f.write(_vfc.getvalue()) 2174 f.write("\n\n") 2175 f.write(_vf.getvalue()) 2176 f.close() 2177 except IOError,e: 2178 print >>sys.stderr, "yacc: can't create '%s'" % debugfile,e 2179 2180 # Made it here. Create a parser object and set up its internal state. 2181 # Set global parse() method to bound method of parser object. 2182 2183 p = Parser("xyzzy") 2184 p.productions = Productions 2185 p.errorfunc = Errorfunc 2186 p.action = _lr_action 2187 p.goto = _lr_goto 2188 p.method = _lr_method 2189 p.require = Requires 2190 2191 global parse 2192 parse = p.parse 2193 2194 global parser 2195 parser = p 2196 2197 # Clean up all of the globals we created 2198 if (not optimize): 2199 yacc_cleanup() 2200 return p
2201 2202 # yacc_cleanup function. Delete all of the global variables 2203 # used during table construction 2204
2205 -def yacc_cleanup():
2206 global _lr_action, _lr_goto, _lr_method, _lr_goto_cache 2207 del _lr_action, _lr_goto, _lr_method, _lr_goto_cache 2208 2209 global Productions, Prodnames, Prodmap, Terminals 2210 global Nonterminals, First, Follow, Precedence, LRitems 2211 global Errorfunc, Signature, Requires 2212 2213 del Productions, Prodnames, Prodmap, Terminals 2214 del Nonterminals, First, Follow, Precedence, LRitems 2215 del Errorfunc, Signature, Requires 2216 2217 global _vf, _vfc 2218 del _vf, _vfc
2219 2220 2221 # Stub that raises an error if parsing is attempted without first calling yacc()
2222 -def parse(*args,**kwargs):
2223 raise YaccError, "yacc: No parser built with yacc()"
2224