Python3的磁盘B+树-Python开发资源-CSDN下载

共23个文件

py：17个

rst：1个

gitignore：1个

Python

Miscellaneous

需积分: 50 178 浏览量 2021-05-25 17:37:09 上传评论 1 收藏 27KB ZIP 举报

资源详情

资源评论

资源推荐

收起资源包目录

bplustree-master.zip （23个子文件）

bplustree-master

MANIFEST.in 34B

.travis.yml 304B

README.rst 4KB

tests

test_utils.py 683B

test_serializer.py 1KB

test_memory.py 6KB

conftest.py 581B

test_tree.py 9KB

test_entry.py 3KB

__init__.py 0B

test_node.py 5KB

LICENSE 1KB

bplustree

const.py 1KB

utils.py 666B

entry.py 8KB

__init__.py 178B

memory.py 16KB

node.py 11KB

tree.py 16KB

serializer.py 2KB

setup.py 2KB

.gitignore 78B

Makefile 210B

import enum import io from logging import getLogger import os import platform from typing import Union, Tuple, Optional import cachetools import rwlock from .node import Node, FreelistNode from .const import ( ENDIAN, PAGE_REFERENCE_BYTES, OTHERS_BYTES, TreeConf, FRAME_TYPE_BYTES ) logger = getLogger(__name__) class ReachedEndOfFile(Exception): """Read a file until its end.""" def open_file_in_dir(path: str) -> Tuple[io.FileIO, Optional[int]]: """Open a file and its directory. The file is opened in binary mode and created if it does not exist. Both file descriptors must be closed after use to prevent them from leaking. On Windows, the directory is not opened, as it is useless. """ directory = os.path.dirname(path) if not os.path.isdir(directory): raise ValueError('No directory {}'.format(directory)) if not os.path.exists(path): file_fd = open(path, mode='x+b', buffering=0) else: file_fd = open(path, mode='r+b', buffering=0) if platform.system() == 'Windows': # Opening a directory is not possible on Windows, but that is not # a problem since Windows does not need to fsync the directory in # order to persist metadata dir_fd = None else: dir_fd = os.open(directory, os.O_RDONLY) return file_fd, dir_fd def write_to_file(file_fd: io.FileIO, dir_fileno: Optional[int], data: bytes, fsync: bool=True): length_to_write = len(data) written = 0 while written < length_to_write: written += file_fd.write(data[written:]) if fsync: fsync_file_and_dir(file_fd.fileno(), dir_fileno) def fsync_file_and_dir(file_fileno: int, dir_fileno: Optional[int]): os.fsync(file_fileno) if dir_fileno is not None: os.fsync(dir_fileno) def read_from_file(file_fd: io.FileIO, start: int, stop: int) -> bytes: length = stop - start assert length >= 0 file_fd.seek(start) data = bytes() while file_fd.tell() < stop: read_data = file_fd.read(stop - file_fd.tell()) if read_data == b'': raise ReachedEndOfFile('Read until the end of file') data += read_data assert len(data) == length return data class FakeCache: """A cache that doesn't cache anything. Because cachetools does not work with maxsize=0. """ def get(self, k): pass def __setitem__(self, key, value): pass def clear(self): pass class FileMemory: __slots__ = ['_filename', '_tree_conf', '_lock', '_cache', '_fd', '_dir_fd', '_wal', 'last_page', '_freelist_start_page', '_root_node_page'] def __init__(self, filename: str, tree_conf: TreeConf, cache_size: int=512): self._filename = filename self._tree_conf = tree_conf self._lock = rwlock.RWLock() if cache_size == 0: self._cache = FakeCache() else: self._cache = cachetools.LRUCache(maxsize=cache_size) self._fd, self._dir_fd = open_file_in_dir(filename) self._wal = WAL(filename, tree_conf.page_size) if self._wal.needs_recovery: self.perform_checkpoint(reopen_wal=True) # Get the next available page self._fd.seek(0, io.SEEK_END) last_byte = self._fd.tell() self.last_page = int(last_byte / self._tree_conf.page_size) self._freelist_start_page = 0 # Todo: Remove this, it should only be in Tree self._root_node_page = 0 def get_node(self, page: int): """Get a node from storage. The cache is not there to prevent hitting the disk, the OS is already very good at it. It is there to avoid paying the price of deserializing the data to create the Node object and its entry. This is a very expensive operation in Python. Since we have at most a single writer we can write to cache on `set_node` if we invalidate the cache when a transaction is rolled back. """ node = self._cache.get(page) if node is not None: return node data = self._wal.get_page(page) if not data: data = self._read_page(page) node = Node.from_page_data(self._tree_conf, data=data, page=page) self._cache[node.page] = node return node def set_node(self, node: Node): self._wal.set_page(node.page, node.dump()) self._cache[node.page] = node def del_node(self, node: Node): self._insert_in_freelist(node.page) def del_page(self, page: int): self._insert_in_freelist(page) @property def read_transaction(self): class ReadTransaction: def __enter__(self2): self._lock.reader_lock.acquire() def __exit__(self2, exc_type, exc_val, exc_tb): self._lock.reader_lock.release() return ReadTransaction() @property def write_transaction(self): class WriteTransaction: def __enter__(self2): self._lock.writer_lock.acquire() def __exit__(self2, exc_type, exc_val, exc_tb): if exc_type: # When an error happens in the middle of a write # transaction we must roll it back and clear the cache # because the writer may have partially modified the Nodes self._wal.rollback() self._cache.clear() else: self._wal.commit() self._lock.writer_lock.release() return WriteTransaction() @property def next_available_page(self) -> int: last_freelist_page = self._pop_from_freelist() if last_freelist_page is not None: return last_freelist_page self.last_page += 1 return self.last_page def _traverse_free_list(self) -> Tuple[Optional[FreelistNode], Optional[FreelistNode]]: if self._freelist_start_page == 0: return None, None second_to_last_node = None last_node = self.get_node(self._freelist_start_page) while last_node.next_page is not None: second_to_last_node = last_node last_node = self.get_node(second_to_last_node.next_page) return second_to_last_node, last_node def _insert_in_freelist(self, page: int): """Insert a page at the end of the freelist.""" _, last_node = self._traverse_free_list() self.set_node(FreelistNode(self._tree_conf, page=page, next_page=None)) if last_node is None: # Write in metadata that the freelist got a new starting point self._freelist_start_page = page self.set_metadata(None, None) else: last_node.next_page = page self.set_node(last_node) def _pop_from_freelist(self) -> Optional[int]: """Remove the last page from the freelist and return its page.""" second_to_last_node, last_node = self._traverse_free_list() if last_node is None: # Freelist is completely empty, nothing to pop return None if second_to_last_node is None: # Write in metadata that the freelist is empty self._freelist_start_page = 0 self.set_metadata(None, None) else: second_to_last_node.next_page = None self.set_node(second_to_last_node) return last_node.page # Todo: make metadata as a normal Node def get_metadata(self) -> tuple: try: data = self._read_page(0) except ReachedEndOfFile: raise ValueError('Metadata not set yet') end_root_node_page = PAGE_REFERENCE_BYTES root_node_page = int.from_bytes( data[0:end_root_node_page], ENDIAN ) end_p