编译原理—LR(0)分析表的构造(C++实现)

原理P133页

思路

  • 构造识别活前缀的DFA

    • 若A->a.Bb属于 CLOSURE(I), 则每一个形如B->.r的项目也属于CLOSURE(I)

  • 根据DFA构建LR(0)分析表

运行效果

项目源码

#include <bits/stdc++.h>

using namespace std;

const int N = 1005;

int m, n;

struct node {
    string left;
    set<string> right;
    void print() {
        for (auto it = right.begin(); it != right.end(); ++it) {
            cout <<"  " << left << "->" << *it << endl;
        }
    }
    bool operator < (const node &k) const {
        return left < k.left;
    }

    string to_string() {
        string str = "";
        str += left;
        str += "->";
        auto it = right.begin();
        str += *it;
        return str;
    }

    string full_string() {
        string str = "";
        str += left;
        str += "->";
        for (auto it = right.begin(); it != right.end(); ++it) {
            str += *it;
            auto itt = it;
            itt++;
            if (itt != right.end())str+='|'; 
        }
        return str;
    }
};
vector<node> productionList; // 存储产生式

vector<node> productionListSplit; // 将 | 分开

set<string> not_end_charSet; // 非终结符集
set<char> end_charSet; // 终结符集
string not_end_charSet_Array[N]; // 非终结符集(数组存储)
char end_charSet_Array[N]; // 非终结符集(数组存储)

struct info {
    int id; // CLOSURE编号
    node mainProduction; // CLOSURE集主产生式
    node append_Production; // CLOSURE集附加产生式
    info(int id, node mainProduction) {
        this->id = id;
        this->mainProduction = mainProduction;
    }

    void print() {
        cout << "------------------------" << endl;
        cout << "I" << id << ":\n";
        mainProduction.print();
        append_Production.print();
    }
};

struct edge {
    int v;
    char arc;
};
vector<edge> e[N];

map<string, int> closure_mainProduction_vis;
vector<info> CLOSURE; // CLOSURE集


bool done[N]; // CLOSURE集有没有计算过

char ACTION[N][N];
int ACTION_num[N][N];

int GOTO[N][N];
void print_tips() {

    cout << "---------------------" << endl;

    cout << "S'->E" << endl;
    cout << "E->aA|bB" << endl;
    cout << "A->cA|d" << endl;
    cout << "B->cB|d" << endl;
    cout << "$" << endl;

    cout << "---------------------" << endl;
}

void splitProductionList() {
    int sz = productionList.size();
    for (int i = 0; i < sz; ++i) {
        node production = productionList[i];
        string left = production.left;
        set<string>right = production.right;
        for (auto it = right.begin(); it != right.end(); ++it) {
            set<string> newSet;
            newSet.insert(*it);
            productionListSplit.push_back({left, newSet});
        }
    }
}

// 处理产生式
node split(string str) {
    string left;
    set<string> right;
    string tmp = "";
    for (auto c : str) {
        if (c == '>') continue;
        if (c == '-') {
            left = tmp; // 记录左部
            tmp = "";
        } else if (c == '|') {
            right.insert(tmp); // 插入右部
            tmp = "";
        } else {
            tmp += c;
        }
    }
    if (tmp != "") right.insert(tmp);
    node n = {left, right};
    return n;
}

void print_productionList() {
    int sz = productionList.size();
    for (int i = 0; i < sz; ++i) {
        node cur = productionList[i];
        cur.print();
    }
}
void set_ReflectTo_arr() {
    int cnt = 0;
    for (auto it = not_end_charSet.begin(); it != not_end_charSet.end(); ++it) {
        string tmp = *it;
        not_end_charSet_Array[++cnt] = tmp;
    }
    cnt = 0;
    for (auto it = end_charSet.begin(); it != end_charSet.end(); ++it) {
        char tmp = *it;
        end_charSet_Array[++cnt] = tmp;
    }
    // end_charSet_Array[++cnt] = '#';
}

vector<int> get_productionIndexArray_by_notEndChar(string notEndChar) {
    vector<int> indexArray;
    int sz = productionList.size();
    for (int i = 0; i < sz; ++i) {
        node production = productionList[i];
        if (production.left == notEndChar) {
            indexArray.push_back(i);
        } 
    }
    return indexArray;
}


// 构建非终结符集和终结符集
void build_notEndCharSet_And_endCharSet(string str) {
    int sz = str.size();
    string tmp = "";
    int pos; 
    for (pos = 0; pos < sz; ++pos) {
        if (str[pos] == '-') break;
        tmp += str[pos];
    }
    not_end_charSet.insert(tmp); // 插入非终结符
    for (pos++; pos < sz; ++pos) {
        if (str[pos] == '-' || str[pos] == '>' || str[pos] == '|') {
            continue;
        }
        if (!isupper(str[pos])) {
            end_charSet.insert(str[pos]); // 插入终结符
        }
    }
    end_charSet.insert('#');
}

string getString_dot_next_position(string s) {
    int sz = s.size();
    int dot_postion = 0;
    for (int i = 0; i < sz; ++i) {
        if (s[i] == '.') {
            dot_postion = i; break;
        }
    }
    string res = "";
    for (int i = 0; i < sz; ++i) {
        if (s[i] == '.') continue;
        res += s[i];
        if (i == dot_postion + 1) res += ".";
    }
    return res;
}
// 取转换条件(弧值)
char get_arc(string s) {
    int sz = s.size();
    for (int i = 0; i < sz; ++i) {
        if (s[i] == '.' && i < sz - 1) return s[i + 1];
    }
    return EOF;
}

void next_CLOSURE_By_Production(node cur_production, int pre_CLOSURE_ID) {
    string left = cur_production.left;
    set<string> right = cur_production.right;

    // cout << "cur.cur_production : "; 
    // cur_production.print();

    for (auto it = right.begin(); it != right.end(); ++it) {
        string right_string = *it;
        char arc = get_arc(right_string);
        set<string> next_right;
        next_right.insert(getString_dot_next_position(right_string));
        node next_mainProduction = {left, next_right};
        
        // cout << "left: " << left << endl; 
        // cout << "next_mainProduction : "; 
        // next_mainProduction.print();

        // CLOSURE结束处理
        if (next_mainProduction.to_string().find(".") == -1) {
            // cout << "not . : " << next_mainProduction.to_string() << endl;
            string start = "S";
            start += "\'";
            if (left == start) {
                ACTION[pre_CLOSURE_ID][(int)'#'] = 'A';

                // cout << "yes......." << endl;
            }
            else {

                ACTION[pre_CLOSURE_ID][(int)arc] = 'r';
                int productionIndex = -1;
                int cnt = 0;                
                for (auto production : productionListSplit) {
                    string full_string = production.full_string();
                    // cout << "full_string : " << full_string << endl;
                    if (full_string.find(next_mainProduction.to_string()) != -1) {
                        productionIndex = cnt; 
                        break;
                    } else {
                        cnt++; // production下标
                    }
                }

                // cout << "---------------" << productionIndex << endl;
                for (int i = 1; i <= n; ++i) {
                    ACTION[pre_CLOSURE_ID][(int)end_charSet_Array[i]] = 'r';
                    ACTION_num[pre_CLOSURE_ID][(int)end_charSet_Array[i]] = productionIndex;
                }
                // ACTION_num[pre_CLOSURE_ID][(int)arc] = productionIndex;
            }
            continue;
        }

        if (closure_mainProduction_vis[next_mainProduction.to_string()]) {
            // next_mainProduction.print(); cout << "into if" << endl;
            e[pre_CLOSURE_ID].push_back({closure_mainProduction_vis[next_mainProduction.to_string()] - 1, arc});
        } else {
            // next_mainProduction.print(); cout << "into else" << endl;

            closure_mainProduction_vis[next_mainProduction.to_string()] = CLOSURE.size() + 1; // 标记
            e[pre_CLOSURE_ID].push_back({CLOSURE.size(), arc}); // 记录 CLOSURE集的转换

            CLOSURE.push_back(info(CLOSURE.size(), next_mainProduction)); // 向项目集中添加 CLOSURE
            // CLOSURE[CLOSURE.size() - 1].id = CLOSURE.size() - 1;

            for (auto itt = next_right.begin(); itt != next_right.end(); ++itt) {
                string next_right_string = *itt;
                int sz = next_right_string.size();
                for (int i = 0; i < sz; ++i) {
                    if (next_right_string[i] == '.' && i < sz - 1 && isupper(next_right_string[i + 1])) {
                        string notEndChar = ""; notEndChar += next_right_string[i + 1];
                        vector<int> indexArray = get_productionIndexArray_by_notEndChar(notEndChar);
                        for (auto x : indexArray) {
                            node production = productionList[x];
                            string left = production.left;
                            set<string> right = production.right;
                            set<string> append_Production_right;
                            for (auto it = right.begin(); it != right.end(); ++it) {
                                append_Production_right.insert("." + *it);
                            }
                            node append_Production = {left, append_Production_right};
                            // append_Production.print();
                            CLOSURE[CLOSURE.size() - 1].append_Production = append_Production;
                        }
                    }
                }
            }
        }
    }
}

void next_CLOSURE(info cur_CLOSURE) {
    done[cur_CLOSURE.id] = true;
    node cur_mainProduction = cur_CLOSURE.mainProduction;
    node cur_append_Production = cur_CLOSURE.append_Production;
    next_CLOSURE_By_Production(cur_mainProduction, cur_CLOSURE.id);
    next_CLOSURE_By_Production(cur_append_Production, cur_CLOSURE.id);

    int sz = CLOSURE.size();
    for (int i = 0; i < sz; ++i) {
        if (!done[i]) {
            next_CLOSURE(CLOSURE[i]);
        }
    }
}

void print_CLOSURE() {
    int sz = CLOSURE.size();
    for (int i = 0; i < sz; ++i) {
        CLOSURE[i].print();
    }
}


void print_edge() {
    cout << "--------------------------" << endl;
    int sz = CLOSURE.size();

    int cnt = 0;
    for (int i = 0; i < sz; ++i) cnt += e[i].size(); 
    printf("print the edge, numbers is: %d \n", cnt);
    for (int i = 0; i < sz; ++i) {
        for (auto p : e[i]) {
            printf("%d->%d, arc: %c\n", i, p.v, p.arc);
        }
    }
    cout << "--------------------------" << endl;

}

void print_form() {
    int sz = CLOSURE.size();
    for (int i = 0; i < sz; ++i) {
        for (auto p : e[i]) {
            int v = p.v;
            char arc = p.arc;
            if (isupper(arc)) { // GOTO表
                GOTO[i][(int)arc] = v;
            } else {
                ACTION[i][(int)arc] = 'S';
                ACTION_num[i][(int)arc] = v;
            }
        }
    }

    cout << "\t\t    " << "ACTION" << endl;
    cout << "------------------------------------------" << endl;

    for (int j = 1; j <= n; ++j) {
        cout << "\t" << end_charSet_Array[j];
    }
    cout << endl;
    for (int i = 0; i < sz; ++i) {
        cout << i << "\t";
        for (int j = 1; j <= n; ++j) {
            if (ACTION_num[i][(int)end_charSet_Array[j]] == 0) {
                if (ACTION[i][end_charSet_Array[j]] == 'A') {
                    cout << "acc\t"; 
                }
                printf("\t");
            }
            else 
                printf("%c%d\t", ACTION[i][end_charSet_Array[j]], ACTION_num[i][(int)end_charSet_Array[j]]);
        }
        cout << endl;
    }
    cout << "------------------------------------------" << endl;

    cout << "\t    " << "GOTO" << endl;
    cout << "------------------------------------------" << endl;
    for (int j = 1; j <= n; ++j) {
        if (not_end_charSet_Array[j] == "S\'") {
            cout << "\t"; continue;
        }
        cout << "\t" << not_end_charSet_Array[j];
    }
    cout << endl;
    for (int i = 0; i < sz; ++i) {
        cout << i << "\t";
        for (int j = 1; j <= m; ++j) {
            if (GOTO[i][(int)not_end_charSet_Array[j][0]] == 0) {
                printf("\t");
            } else {
                printf("%d\t", GOTO[i][(int)not_end_charSet_Array[j][0]]);
            }
        }
        cout << endl;
    }
    cout << "------------------------------------------" << endl;
}

int main() {
    string str; 
    printf("please input production (end-Sign is $):\n");
    print_tips();
    while (cin >> str && str[0] != '$') {
        node n = split(str); // 对读入进行处理  F->P^F|P
        productionList.push_back(n); // 存储每个产生式
        build_notEndCharSet_And_endCharSet(str); // 构建非终结符集和终结符集
    }

    splitProductionList();

    m = not_end_charSet.size(); // 非终结符个数
    n = end_charSet.size(); // 终结符个数
    set_ReflectTo_arr(); // 集合映射到数组, 方便操作

    // 取第一个产生式(起始符号S’的产生式)

    node n = productionList[0];

    auto it = n.right.begin();

    set<string> right;
    right.insert("." + *it);

    node mainProduction = {n.left, right};

    if (closure_mainProduction_vis[mainProduction.to_string()]);
    else {
        CLOSURE.push_back(info(CLOSURE.size(), mainProduction)); // 向项目集中添加 CLOSURE
        closure_mainProduction_vis[mainProduction.to_string()] = CLOSURE.size(); // map中存的是 closure的id + 1
    }

    vector<int> indexArray = get_productionIndexArray_by_notEndChar(*it);

    for (auto x : indexArray) {
        node production = productionList[x];
        string left = production.left;
        set<string> right = production.right;
        set<string> append_Production_right;
        for (auto it = right.begin(); it != right.end(); ++it) {
            append_Production_right.insert("." + *it);
        }
        node append_Production = {left, append_Production_right};
        CLOSURE[0].append_Production = append_Production;
    }
    
    next_CLOSURE(CLOSURE[0]);

    print_CLOSURE(); // 打印CLOSURE集合

    print_edge();

    print_form(); // 打印LR(0)分析表
    
    return 0;
}

/*

S'->E
E->aA|bB
A->cA|d
B->cB|d
$

*/

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

xingxg.

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值