原理P133页
思路
构造识别活前缀的DFA
若A->a.Bb属于 CLOSURE(I), 则每一个形如B->.r的项目也属于CLOSURE(I)
根据DFA构建LR(0)分析表
运行效果
项目源码
#include <bits/stdc++.h>
using namespace std;
const int N = 1005;
int m, n;
struct node {
string left;
set<string> right;
void print() {
for (auto it = right.begin(); it != right.end(); ++it) {
cout <<" " << left << "->" << *it << endl;
}
}
bool operator < (const node &k) const {
return left < k.left;
}
string to_string() {
string str = "";
str += left;
str += "->";
auto it = right.begin();
str += *it;
return str;
}
string full_string() {
string str = "";
str += left;
str += "->";
for (auto it = right.begin(); it != right.end(); ++it) {
str += *it;
auto itt = it;
itt++;
if (itt != right.end())str+='|';
}
return str;
}
};
vector<node> productionList; // 存储产生式
vector<node> productionListSplit; // 将 | 分开
set<string> not_end_charSet; // 非终结符集
set<char> end_charSet; // 终结符集
string not_end_charSet_Array[N]; // 非终结符集(数组存储)
char end_charSet_Array[N]; // 非终结符集(数组存储)
struct info {
int id; // CLOSURE编号
node mainProduction; // CLOSURE集主产生式
node append_Production; // CLOSURE集附加产生式
info(int id, node mainProduction) {
this->id = id;
this->mainProduction = mainProduction;
}
void print() {
cout << "------------------------" << endl;
cout << "I" << id << ":\n";
mainProduction.print();
append_Production.print();
}
};
struct edge {
int v;
char arc;
};
vector<edge> e[N];
map<string, int> closure_mainProduction_vis;
vector<info> CLOSURE; // CLOSURE集
bool done[N]; // CLOSURE集有没有计算过
char ACTION[N][N];
int ACTION_num[N][N];
int GOTO[N][N];
void print_tips() {
cout << "---------------------" << endl;
cout << "S'->E" << endl;
cout << "E->aA|bB" << endl;
cout << "A->cA|d" << endl;
cout << "B->cB|d" << endl;
cout << "$" << endl;
cout << "---------------------" << endl;
}
void splitProductionList() {
int sz = productionList.size();
for (int i = 0; i < sz; ++i) {
node production = productionList[i];
string left = production.left;
set<string>right = production.right;
for (auto it = right.begin(); it != right.end(); ++it) {
set<string> newSet;
newSet.insert(*it);
productionListSplit.push_back({left, newSet});
}
}
}
// 处理产生式
node split(string str) {
string left;
set<string> right;
string tmp = "";
for (auto c : str) {
if (c == '>') continue;
if (c == '-') {
left = tmp; // 记录左部
tmp = "";
} else if (c == '|') {
right.insert(tmp); // 插入右部
tmp = "";
} else {
tmp += c;
}
}
if (tmp != "") right.insert(tmp);
node n = {left, right};
return n;
}
void print_productionList() {
int sz = productionList.size();
for (int i = 0; i < sz; ++i) {
node cur = productionList[i];
cur.print();
}
}
void set_ReflectTo_arr() {
int cnt = 0;
for (auto it = not_end_charSet.begin(); it != not_end_charSet.end(); ++it) {
string tmp = *it;
not_end_charSet_Array[++cnt] = tmp;
}
cnt = 0;
for (auto it = end_charSet.begin(); it != end_charSet.end(); ++it) {
char tmp = *it;
end_charSet_Array[++cnt] = tmp;
}
// end_charSet_Array[++cnt] = '#';
}
vector<int> get_productionIndexArray_by_notEndChar(string notEndChar) {
vector<int> indexArray;
int sz = productionList.size();
for (int i = 0; i < sz; ++i) {
node production = productionList[i];
if (production.left == notEndChar) {
indexArray.push_back(i);
}
}
return indexArray;
}
// 构建非终结符集和终结符集
void build_notEndCharSet_And_endCharSet(string str) {
int sz = str.size();
string tmp = "";
int pos;
for (pos = 0; pos < sz; ++pos) {
if (str[pos] == '-') break;
tmp += str[pos];
}
not_end_charSet.insert(tmp); // 插入非终结符
for (pos++; pos < sz; ++pos) {
if (str[pos] == '-' || str[pos] == '>' || str[pos] == '|') {
continue;
}
if (!isupper(str[pos])) {
end_charSet.insert(str[pos]); // 插入终结符
}
}
end_charSet.insert('#');
}
string getString_dot_next_position(string s) {
int sz = s.size();
int dot_postion = 0;
for (int i = 0; i < sz; ++i) {
if (s[i] == '.') {
dot_postion = i; break;
}
}
string res = "";
for (int i = 0; i < sz; ++i) {
if (s[i] == '.') continue;
res += s[i];
if (i == dot_postion + 1) res += ".";
}
return res;
}
// 取转换条件(弧值)
char get_arc(string s) {
int sz = s.size();
for (int i = 0; i < sz; ++i) {
if (s[i] == '.' && i < sz - 1) return s[i + 1];
}
return EOF;
}
void next_CLOSURE_By_Production(node cur_production, int pre_CLOSURE_ID) {
string left = cur_production.left;
set<string> right = cur_production.right;
// cout << "cur.cur_production : ";
// cur_production.print();
for (auto it = right.begin(); it != right.end(); ++it) {
string right_string = *it;
char arc = get_arc(right_string);
set<string> next_right;
next_right.insert(getString_dot_next_position(right_string));
node next_mainProduction = {left, next_right};
// cout << "left: " << left << endl;
// cout << "next_mainProduction : ";
// next_mainProduction.print();
// CLOSURE结束处理
if (next_mainProduction.to_string().find(".") == -1) {
// cout << "not . : " << next_mainProduction.to_string() << endl;
string start = "S";
start += "\'";
if (left == start) {
ACTION[pre_CLOSURE_ID][(int)'#'] = 'A';
// cout << "yes......." << endl;
}
else {
ACTION[pre_CLOSURE_ID][(int)arc] = 'r';
int productionIndex = -1;
int cnt = 0;
for (auto production : productionListSplit) {
string full_string = production.full_string();
// cout << "full_string : " << full_string << endl;
if (full_string.find(next_mainProduction.to_string()) != -1) {
productionIndex = cnt;
break;
} else {
cnt++; // production下标
}
}
// cout << "---------------" << productionIndex << endl;
for (int i = 1; i <= n; ++i) {
ACTION[pre_CLOSURE_ID][(int)end_charSet_Array[i]] = 'r';
ACTION_num[pre_CLOSURE_ID][(int)end_charSet_Array[i]] = productionIndex;
}
// ACTION_num[pre_CLOSURE_ID][(int)arc] = productionIndex;
}
continue;
}
if (closure_mainProduction_vis[next_mainProduction.to_string()]) {
// next_mainProduction.print(); cout << "into if" << endl;
e[pre_CLOSURE_ID].push_back({closure_mainProduction_vis[next_mainProduction.to_string()] - 1, arc});
} else {
// next_mainProduction.print(); cout << "into else" << endl;
closure_mainProduction_vis[next_mainProduction.to_string()] = CLOSURE.size() + 1; // 标记
e[pre_CLOSURE_ID].push_back({CLOSURE.size(), arc}); // 记录 CLOSURE集的转换
CLOSURE.push_back(info(CLOSURE.size(), next_mainProduction)); // 向项目集中添加 CLOSURE
// CLOSURE[CLOSURE.size() - 1].id = CLOSURE.size() - 1;
for (auto itt = next_right.begin(); itt != next_right.end(); ++itt) {
string next_right_string = *itt;
int sz = next_right_string.size();
for (int i = 0; i < sz; ++i) {
if (next_right_string[i] == '.' && i < sz - 1 && isupper(next_right_string[i + 1])) {
string notEndChar = ""; notEndChar += next_right_string[i + 1];
vector<int> indexArray = get_productionIndexArray_by_notEndChar(notEndChar);
for (auto x : indexArray) {
node production = productionList[x];
string left = production.left;
set<string> right = production.right;
set<string> append_Production_right;
for (auto it = right.begin(); it != right.end(); ++it) {
append_Production_right.insert("." + *it);
}
node append_Production = {left, append_Production_right};
// append_Production.print();
CLOSURE[CLOSURE.size() - 1].append_Production = append_Production;
}
}
}
}
}
}
}
void next_CLOSURE(info cur_CLOSURE) {
done[cur_CLOSURE.id] = true;
node cur_mainProduction = cur_CLOSURE.mainProduction;
node cur_append_Production = cur_CLOSURE.append_Production;
next_CLOSURE_By_Production(cur_mainProduction, cur_CLOSURE.id);
next_CLOSURE_By_Production(cur_append_Production, cur_CLOSURE.id);
int sz = CLOSURE.size();
for (int i = 0; i < sz; ++i) {
if (!done[i]) {
next_CLOSURE(CLOSURE[i]);
}
}
}
void print_CLOSURE() {
int sz = CLOSURE.size();
for (int i = 0; i < sz; ++i) {
CLOSURE[i].print();
}
}
void print_edge() {
cout << "--------------------------" << endl;
int sz = CLOSURE.size();
int cnt = 0;
for (int i = 0; i < sz; ++i) cnt += e[i].size();
printf("print the edge, numbers is: %d \n", cnt);
for (int i = 0; i < sz; ++i) {
for (auto p : e[i]) {
printf("%d->%d, arc: %c\n", i, p.v, p.arc);
}
}
cout << "--------------------------" << endl;
}
void print_form() {
int sz = CLOSURE.size();
for (int i = 0; i < sz; ++i) {
for (auto p : e[i]) {
int v = p.v;
char arc = p.arc;
if (isupper(arc)) { // GOTO表
GOTO[i][(int)arc] = v;
} else {
ACTION[i][(int)arc] = 'S';
ACTION_num[i][(int)arc] = v;
}
}
}
cout << "\t\t " << "ACTION" << endl;
cout << "------------------------------------------" << endl;
for (int j = 1; j <= n; ++j) {
cout << "\t" << end_charSet_Array[j];
}
cout << endl;
for (int i = 0; i < sz; ++i) {
cout << i << "\t";
for (int j = 1; j <= n; ++j) {
if (ACTION_num[i][(int)end_charSet_Array[j]] == 0) {
if (ACTION[i][end_charSet_Array[j]] == 'A') {
cout << "acc\t";
}
printf("\t");
}
else
printf("%c%d\t", ACTION[i][end_charSet_Array[j]], ACTION_num[i][(int)end_charSet_Array[j]]);
}
cout << endl;
}
cout << "------------------------------------------" << endl;
cout << "\t " << "GOTO" << endl;
cout << "------------------------------------------" << endl;
for (int j = 1; j <= n; ++j) {
if (not_end_charSet_Array[j] == "S\'") {
cout << "\t"; continue;
}
cout << "\t" << not_end_charSet_Array[j];
}
cout << endl;
for (int i = 0; i < sz; ++i) {
cout << i << "\t";
for (int j = 1; j <= m; ++j) {
if (GOTO[i][(int)not_end_charSet_Array[j][0]] == 0) {
printf("\t");
} else {
printf("%d\t", GOTO[i][(int)not_end_charSet_Array[j][0]]);
}
}
cout << endl;
}
cout << "------------------------------------------" << endl;
}
int main() {
string str;
printf("please input production (end-Sign is $):\n");
print_tips();
while (cin >> str && str[0] != '$') {
node n = split(str); // 对读入进行处理 F->P^F|P
productionList.push_back(n); // 存储每个产生式
build_notEndCharSet_And_endCharSet(str); // 构建非终结符集和终结符集
}
splitProductionList();
m = not_end_charSet.size(); // 非终结符个数
n = end_charSet.size(); // 终结符个数
set_ReflectTo_arr(); // 集合映射到数组, 方便操作
// 取第一个产生式(起始符号S’的产生式)
node n = productionList[0];
auto it = n.right.begin();
set<string> right;
right.insert("." + *it);
node mainProduction = {n.left, right};
if (closure_mainProduction_vis[mainProduction.to_string()]);
else {
CLOSURE.push_back(info(CLOSURE.size(), mainProduction)); // 向项目集中添加 CLOSURE
closure_mainProduction_vis[mainProduction.to_string()] = CLOSURE.size(); // map中存的是 closure的id + 1
}
vector<int> indexArray = get_productionIndexArray_by_notEndChar(*it);
for (auto x : indexArray) {
node production = productionList[x];
string left = production.left;
set<string> right = production.right;
set<string> append_Production_right;
for (auto it = right.begin(); it != right.end(); ++it) {
append_Production_right.insert("." + *it);
}
node append_Production = {left, append_Production_right};
CLOSURE[0].append_Production = append_Production;
}
next_CLOSURE(CLOSURE[0]);
print_CLOSURE(); // 打印CLOSURE集合
print_edge();
print_form(); // 打印LR(0)分析表
return 0;
}
/*
S'->E
E->aA|bB
A->cA|d
B->cB|d
$
*/