#!/usr/bin/awk -f
# Copyright 2010 The Go Authors.  All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.

# This program implements the core idea from
#
#	Clinton L. Jeffery, Generating LR syntax error messages from examples,
#	ACM TOPLAS 25(5) (September 2003).  http://doi.acm.org/10.1145/937563.937566
# 
# It reads Bison's summary of a grammar followed by a file
# like go.errors, replacing lines beginning with % by the 
# yystate and yychar that will be active when an error happens
# while parsing that line.  
#
# Unlike the system described in the paper, the lines in go.errors
# give grammar symbol name lists, not actual program fragments.
# This is a little less programmer-friendly but doesn't require being
# able to run the text through lex.c.

BEGIN{
	bison = 1
	grammar = 0
	states = 0
	open = 0
}

# In Grammar section of y.output,
# record lhs and length of rhs for each rule.
bison && /^Grammar/ { grammar = 1 }
bison && /^(Terminals|state 0)/ { grammar = 0 }
grammar && NF>0 {
	if($2 != "|") {
		r = $2
		sub(/:$/, "", r)
	}
	rulelhs[$1] = r
	rulesize[$1] = NF-2
	if(rulesize[$1] == 1 && $3 == "%empty") {
		rulesize[$1] = 0
	}
	if(rulesize[$1] == 3 && $3 $4 $5 == "/*empty*/") {
		rulesize[$1] = 0
	}
}

# In state dumps, record shift/reduce actions.
bison && /^[Ss]tate 0/ { grammar = 0; states = 1 }

states && /^[Ss]tate / { state = $2 }
states { statetext[state] = statetext[state] $0 "\n" }

states && / shift/ {
	n = nshift[state]++
	if($0 ~ /and go to/)
		shift[state,n] = $7 # GNU Bison
	else
		shift[state,n] = $3 # Plan 9 Yacc
	shifttoken[state,n] = $1
	next
}
states && / (go to|goto)/ {
	n = nshift[state]++
	if($0 ~ /go to/)
		shift[state,n] = $5 # GNU Bison
	else
		shift[state,n] = $3 # Plan 9 Yacc
	shifttoken[state,n] = $1
	next
}
states && / reduce/ {
	n = nreduce[state]++
	if($0 ~ /reduce using rule/)
		reduce[state,n] = $5 # GNU Bison
	else
		reduce[state,n] = $3 # Plan 9 yacc
	reducetoken[state,n] = $1
	next
}

# Skip over the summary information printed by Plan 9 yacc.
/nonterminals$/,/^maximum spread/ { next }

# First // comment marks the beginning of the pattern file.
/^\/\// { bison = 0; grammar = 0; state = 0 }
bison { next }

# Treat % as first field on line as introducing a pattern (token sequence).
# Run it through the LR machine and print the induced "yystate, yychar,"
# at the point where the error happens.
$1 == "%" {
	nstack = 0
	state = 0
	f = 2
	tok = ""
	for(;;) {
		if(tok == "" && f <= NF) {
			tok = $f
			f++
		}
		found = 0
		for(j=0; j<nshift[state]; j++) {
			if(shifttoken[state,j] == tok) {
				# print "SHIFT " tok " " state " -> " shift[state,j]
				stack[nstack++] = state
				state = shift[state,j]
				found = 1
				tok = ""
				break
			}
		}
		if(found)
			continue
		for(j=0; j<nreduce[state]; j++) {
			t = reducetoken[state,j]
			if(t == tok || t == "$default" || t == ".") {
				stack[nstack++] = state
				rule = reduce[state,j]
				nstack -= rulesize[rule]
				state = stack[--nstack]
				lhs = rulelhs[rule]
				if(tok != "")
					--f
				tok = rulelhs[rule]
				# print "REDUCE " nstack " " state " " tok " rule " rule " size " rulesize[rule]
				found = 1
				break
			}
		}
		if(found)
			continue

		# No shift or reduce applied - found the error.
		printf("\t{%s, %s,\n", state, tok);
		open = 1;
		break
	}
	next
}

# Print other lines verbatim.
open && /,$/ {
	s = $0;
	sub(",", "},", s)
	print s
	open = 0
	next
}

open && /"$/ {
	print $0 "}"
	open = 0
	next
}

{print}
