import enum
import io
from logging import getLogger
import os
import platform
from typing import Union, Tuple, Optional
import cachetools
import rwlock
from .node import Node, FreelistNode
from .const import (
ENDIAN, PAGE_REFERENCE_BYTES, OTHERS_BYTES, TreeConf, FRAME_TYPE_BYTES
)
logger = getLogger(__name__)
class ReachedEndOfFile(Exception):
"""Read a file until its end."""
def open_file_in_dir(path: str) -> Tuple[io.FileIO, Optional[int]]:
"""Open a file and its directory.
The file is opened in binary mode and created if it does not exist.
Both file descriptors must be closed after use to prevent them from
leaking.
On Windows, the directory is not opened, as it is useless.
"""
directory = os.path.dirname(path)
if not os.path.isdir(directory):
raise ValueError('No directory {}'.format(directory))
if not os.path.exists(path):
file_fd = open(path, mode='x+b', buffering=0)
else:
file_fd = open(path, mode='r+b', buffering=0)
if platform.system() == 'Windows':
# Opening a directory is not possible on Windows, but that is not
# a problem since Windows does not need to fsync the directory in
# order to persist metadata
dir_fd = None
else:
dir_fd = os.open(directory, os.O_RDONLY)
return file_fd, dir_fd
def write_to_file(file_fd: io.FileIO, dir_fileno: Optional[int],
data: bytes, fsync: bool=True):
length_to_write = len(data)
written = 0
while written < length_to_write:
written += file_fd.write(data[written:])
if fsync:
fsync_file_and_dir(file_fd.fileno(), dir_fileno)
def fsync_file_and_dir(file_fileno: int, dir_fileno: Optional[int]):
os.fsync(file_fileno)
if dir_fileno is not None:
os.fsync(dir_fileno)
def read_from_file(file_fd: io.FileIO, start: int, stop: int) -> bytes:
length = stop - start
assert length >= 0
file_fd.seek(start)
data = bytes()
while file_fd.tell() < stop:
read_data = file_fd.read(stop - file_fd.tell())
if read_data == b'':
raise ReachedEndOfFile('Read until the end of file')
data += read_data
assert len(data) == length
return data
class FakeCache:
"""A cache that doesn't cache anything.
Because cachetools does not work with maxsize=0.
"""
def get(self, k):
pass
def __setitem__(self, key, value):
pass
def clear(self):
pass
class FileMemory:
__slots__ = ['_filename', '_tree_conf', '_lock', '_cache', '_fd',
'_dir_fd', '_wal', 'last_page', '_freelist_start_page',
'_root_node_page']
def __init__(self, filename: str, tree_conf: TreeConf,
cache_size: int=512):
self._filename = filename
self._tree_conf = tree_conf
self._lock = rwlock.RWLock()
if cache_size == 0:
self._cache = FakeCache()
else:
self._cache = cachetools.LRUCache(maxsize=cache_size)
self._fd, self._dir_fd = open_file_in_dir(filename)
self._wal = WAL(filename, tree_conf.page_size)
if self._wal.needs_recovery:
self.perform_checkpoint(reopen_wal=True)
# Get the next available page
self._fd.seek(0, io.SEEK_END)
last_byte = self._fd.tell()
self.last_page = int(last_byte / self._tree_conf.page_size)
self._freelist_start_page = 0
# Todo: Remove this, it should only be in Tree
self._root_node_page = 0
def get_node(self, page: int):
"""Get a node from storage.
The cache is not there to prevent hitting the disk, the OS is already
very good at it. It is there to avoid paying the price of deserializing
the data to create the Node object and its entry. This is a very
expensive operation in Python.
Since we have at most a single writer we can write to cache on
`set_node` if we invalidate the cache when a transaction is rolled
back.
"""
node = self._cache.get(page)
if node is not None:
return node
data = self._wal.get_page(page)
if not data:
data = self._read_page(page)
node = Node.from_page_data(self._tree_conf, data=data, page=page)
self._cache[node.page] = node
return node
def set_node(self, node: Node):
self._wal.set_page(node.page, node.dump())
self._cache[node.page] = node
def del_node(self, node: Node):
self._insert_in_freelist(node.page)
def del_page(self, page: int):
self._insert_in_freelist(page)
@property
def read_transaction(self):
class ReadTransaction:
def __enter__(self2):
self._lock.reader_lock.acquire()
def __exit__(self2, exc_type, exc_val, exc_tb):
self._lock.reader_lock.release()
return ReadTransaction()
@property
def write_transaction(self):
class WriteTransaction:
def __enter__(self2):
self._lock.writer_lock.acquire()
def __exit__(self2, exc_type, exc_val, exc_tb):
if exc_type:
# When an error happens in the middle of a write
# transaction we must roll it back and clear the cache
# because the writer may have partially modified the Nodes
self._wal.rollback()
self._cache.clear()
else:
self._wal.commit()
self._lock.writer_lock.release()
return WriteTransaction()
@property
def next_available_page(self) -> int:
last_freelist_page = self._pop_from_freelist()
if last_freelist_page is not None:
return last_freelist_page
self.last_page += 1
return self.last_page
def _traverse_free_list(self) -> Tuple[Optional[FreelistNode],
Optional[FreelistNode]]:
if self._freelist_start_page == 0:
return None, None
second_to_last_node = None
last_node = self.get_node(self._freelist_start_page)
while last_node.next_page is not None:
second_to_last_node = last_node
last_node = self.get_node(second_to_last_node.next_page)
return second_to_last_node, last_node
def _insert_in_freelist(self, page: int):
"""Insert a page at the end of the freelist."""
_, last_node = self._traverse_free_list()
self.set_node(FreelistNode(self._tree_conf, page=page, next_page=None))
if last_node is None:
# Write in metadata that the freelist got a new starting point
self._freelist_start_page = page
self.set_metadata(None, None)
else:
last_node.next_page = page
self.set_node(last_node)
def _pop_from_freelist(self) -> Optional[int]:
"""Remove the last page from the freelist and return its page."""
second_to_last_node, last_node = self._traverse_free_list()
if last_node is None:
# Freelist is completely empty, nothing to pop
return None
if second_to_last_node is None:
# Write in metadata that the freelist is empty
self._freelist_start_page = 0
self.set_metadata(None, None)
else:
second_to_last_node.next_page = None
self.set_node(second_to_last_node)
return last_node.page
# Todo: make metadata as a normal Node
def get_metadata(self) -> tuple:
try:
data = self._read_page(0)
except ReachedEndOfFile:
raise ValueError('Metadata not set yet')
end_root_node_page = PAGE_REFERENCE_BYTES
root_node_page = int.from_bytes(
data[0:end_root_node_page], ENDIAN
)
end_p
没有合适的资源?快使用搜索试试~ 我知道了~
Python 3的磁盘B +树-Python开发

共23个文件
py:17个
rst:1个
gitignore:1个

需积分: 50 4 下载量 178 浏览量
2021-05-25
17:37:09
上传
评论 1
收藏 27KB ZIP 举报
温馨提示
Bplustree Python 3的磁盘B +树。它感觉像是字典,但存储在磁盘上。 什么时候使用? 当要存储的数据不适合存储在内存中时当需要持久存储数据时当保持键i Bplustree Python 3的磁盘B + tree。感觉就像是字典,但存储在磁盘上。 什么时候使用? 当要存储的数据不适合存储在存储器中时当需要持久存储数据时当保持键的顺序很重要时该项目正在开发中:文件的格式可能会在版本之间改变。 不要用作您的主要数据源。 快速入门使用pip安装Bplustree:pip install bplustree创建存储在文件中的B + tree索引,并将其用于:>>> from bplustr
资源详情
资源评论
资源推荐
收起资源包目录



























共 23 条
- 1
























绘画窝
- 粉丝: 34
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助


最新资源
- 大数据时代翻译职业化的教学模式研究.docx
- 《软件工程实用教程》第11章软件项目管理.ppt
- (源码)基于LQR算法的机器人路径规划与跟踪系统.zip
- PLC控制的自动售货机设计-(2).doc
- 基于VC的网络聊天系统研究设计与实现.doc
- 安全管理事业部-:网上银行网络通讯安全解决方案.ppt
- 企业项目管理中的沟通和成本.docx
- 利用MATLAB实现连续信号采样与重构仿真课程设计.doc
- 大数据时代高校图书馆学科竞争力分析系统研究.docx
- 浅析网络战争中的国际法问题.docx
- 煤矿立井提升系统安全性分析及管理.docx
- 全国计算机等级测验二级java上机题库.doc
- 交通线路选择软件的研究与设计开发与实现研究与设计开发.doc
- 室内蜂窝移动通网络技术概述.doc
- 数学实验云计算辅助教学平台的建设初探.docx
- (源码)基于Node.js的个人博客网站.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈



安全验证
文档复制为VIP权益,开通VIP直接复制

评论0