知识图谱数据导入代码

from py2neo import Graph, Node, Relationship, NodeMatcher
import pandas as pd
from pdb import set_trace


def load_data():
    # 加载数据
    # data = pd.read_excel('./santi.xlsx')
    # data = pd.read_excel('./mingchaonaxieshier.xlsx')
    data = pd.read_excel('./test.xlsx')
    start = data['S'].tolist()
    relation = data['P'].tolist()
    end = data['O'].tolist()
    start_list = [str(i) for i in start]
    relation_list = [str(i) for i in relation]
    end_list = [str(i) for i in end]
    link_dict = dict()
    link_dict['start'] = start_list
    link_dict['relation'] = relation_list
    link_dict['end'] = end_list
    df_data = pd.DataFrame(link_dict)
    return df_data


class DataToNeo4j:
    def __init__(self):
        link = Graph()
        self.graph = link

        self.start = 'start'
        self.end = 'end'

        self.graph.delete_all()   # 将之前的图  全部删除
        self.matcher = NodeMatcher(link)   # 为了查找

    def create_node(self, start, end):
        # 创建节点
        for name in start:
            node = Node(self.start, name=name)
            self.graph.create(node)

        for name in end:
            node = Node(self.end, name=name)
            self.graph.create(node)

    def create_relation(self, df_data):
        m = 0
        for m in range(0, len(df_data)):
            # print(list(self.matcher.match(self.start).where('_.name=' + "'" + df_data['start'][m] + "'")))
            # 相当于在'start'标签下找   name=某个名字的节点
            # print(list(self.matcher.match(self.end).where('_.name=' + "'" + df_data['end'][m] + "'")))
            # 相当于在'end'标签下找   name=某个名字的节点'
            # 然后为这两个节点创建关系
            try:
                rel = Relationship(
                    self.matcher.match(self.start).where('_.name=' + "'" + df_data['start'][m] + "'").first(),
                    df_data['relation'][m],
                    self.matcher.match(self.end).where('_.name=' + "'" + df_data['end'][m] + "'").first()
                )
                self.graph.create(rel)
            except AttributeError as e:
                print(e, m)


def data_extraction(df_data):
    node_start = []
    for i in df_data['start'].tolist():
        node_start.append(i)

    node_end = []
    for i in df_data['end'].tolist():
        node_end.append(i)

    # 去重
    node_start = list(set(node_start))
    node_end = list(set(node_end))
    return node_start, node_end


if __name__ == '__main__':
    df_data = load_data()
    # print(df_data.head())
    node_start, node_end = data_extraction(df_data)
    # 创建图
    create_data = DataToNeo4j()
    # 节点
    create_data.create_node(node_start, node_end)
    # 关系
    create_data.create_relation(df_data)
from py2neo import Graph, Node, Relationship, NodeMatcher
import pandas as pd
from pdb import set_trace


def load_data():
    # 加载数据
    data = pd.read_excel('./santi.xlsx')
    # data = pd.read_excel('./mingchaonaxieshier.xlsx')
    # data = pd.read_excel('./test.xlsx')
    start = data['S'].tolist()
    relation = data['P'].tolist()
    end = data['O'].tolist()
    start_list = [str(i) for i in start]
    relation_list = [str(i) for i in relation]
    end_list = [str(i) for i in end]
    link_dict = dict()
    link_dict['start'] = start_list
    link_dict['relation'] = relation_list
    link_dict['end'] = end_list
    df_data = pd.DataFrame(link_dict)
    return df_data


class DataToNeo4j:
    def __init__(self):
        link = Graph()
        self.graph = link

        self.start = 'start'
        self.end = 'end'

        self.graph.delete_all()   # 将之前的图  全部删除
        self.matcher = NodeMatcher(link)   # 为了查找

    def create_node(self, start, end):
        # 创建节点
        temp = []
        temp.extend(start)
        temp.extend(end)
        temp = list(set(temp))
        for t in temp:
            node = Node(self.start, name=t)
            self.graph.create(node)


        # for name in start:
        #     node = Node(self.start, name=name)
        #     self.graph.create(node)
        #
        # for name in end:
        #     node = Node(self.end, name=name)
        #     self.graph.create(node)

    def create_relation(self, df_data):
        m = 0
        for m in range(0, len(df_data)):
            # print(list(self.matcher.match(self.start).where('_.name=' + "'" + df_data['start'][m] + "'")))
            # 相当于在'start'标签下找   name=某个名字的节点
            # print(list(self.matcher.match(self.end).where('_.name=' + "'" + df_data['end'][m] + "'")))
            # 相当于在'end'标签下找   name=某个名字的节点'
            # 然后为这两个节点创建关系
            try:
                rel = Relationship(
                    self.matcher.match(self.start).where('_.name=' + "'" + df_data['start'][m] + "'").first(),
                    df_data['relation'][m],
                    self.matcher.match(self.start).where('_.name=' + "'" + df_data['end'][m] + "'").first()
                )
                self.graph.create(rel)
            except AttributeError as e:
                print(e, m)


def data_extraction(df_data):
    node_start = []
    for i in df_data['start'].tolist():
        node_start.append(i)

    node_end = []
    for i in df_data['end'].tolist():
        node_end.append(i)

    # 去重
    node_start = list(set(node_start))
    node_end = list(set(node_end))
    return node_start, node_end


if __name__ == '__main__':
    df_data = load_data()
    # print(df_data.head())
    node_start, node_end = data_extraction(df_data)
    # 创建图
    create_data = DataToNeo4j()
    # 节点
    create_data.create_node(node_start, node_end)
    # 关系
    create_data.create_relation(df_data)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值