Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/fact/record_file.ex
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ defmodule Fact.RecordFile do
end

defp path(context, record_id) do
{:ok, Path.join(Storage.records_path(context), record_id)}
{:ok, Storage.records_path(context, record_id)}
end

defp process_write_results(results) do
Expand Down
20 changes: 20 additions & 0 deletions lib/fact/seam/parsers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,24 @@ defmodule Fact.Seam.Parsers do
end

def parse_non_neg_integer(_), do: :error

@doc """
Parses a value as an integer and verifies it is within the specified range.
"""
@doc since: "0.2.0"
@spec parse_integer_range(binary() | integer(), integer(), integer()) ::
{:ok, integer()} | :error
def parse_integer_range(value, min, max)
when is_integer(value) and value >= min and value <= max do
{:ok, value}
end

def parse_integer_range(value, min, max) when is_binary(value) do
parse_integer_range(String.to_integer(value), min, max)
rescue
ArgumentError ->
:error
end

def parse_integer_range(_, _, _), do: :error
end
5 changes: 4 additions & 1 deletion lib/fact/seam/storage/registry.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,8 @@ defmodule Fact.Seam.Storage.Registry do
* `Fact.Seam.Storage.Standard.V1`
"""
use Fact.Seam.Registry,
impls: [Fact.Seam.Storage.Standard.V1]
impls: [
Fact.Seam.Storage.Standard.V1,
Fact.Seam.Storage.Standard.V2
]
end
163 changes: 163 additions & 0 deletions lib/fact/seam/storage/standard/v2.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
defmodule Fact.Seam.Storage.Standard.V2 do

Check warning on line 1 in lib/fact/seam/storage/standard/v2.ex

View workflow job for this annotation

GitHub Actions / Build and test (1.15.x, 26, true)

function records_path/2 required by behaviour Fact.Seam.Storage is not implemented (in module Fact.Seam.Storage.Standard.V2)

Check warning on line 1 in lib/fact/seam/storage/standard/v2.ex

View workflow job for this annotation

GitHub Actions / Build and test (1.13.x, 25, false)

function records_path/2 required by behaviour Fact.Seam.Storage is not implemented (in module Fact.Seam.Storage.Standard.V2)

Check warning on line 1 in lib/fact/seam/storage/standard/v2.ex

View workflow job for this annotation

GitHub Actions / Build and test (1.14.x, 26, false)

function records_path/2 required by behaviour Fact.Seam.Storage is not implemented (in module Fact.Seam.Storage.Standard.V2)
@moduledoc """
Standard V2 implementation of the `Fact.Seam.Storage` seam.

This module creates 0 to 3 character buckets (i.e. sub-directories) for events.
Directories with a large number of files can cause performance and operational issues.

Most filesystems store directory entries in data structures (like B-trees or linear lists) that
degrade as the entry count grows. Listing, searching, or opening files requires scanning or
traversing these structures, which becomes slower as directories grow into thousands or millions of files.

### Tool Limitations

Many common tools struggle with huge directories.

* Shell glob expansion can exceed argument length limes or consume excessive memory
* `ls` becomes slow and unwieldy
* File browsers may hang or become unresponsive
* Backup tools and file synchronization can slow dramatically

### Inode and Metadata Overhead

Directory metadata must often be read into memory. A directory with millions of entries can
consume significant memory just for the directory itself, separate from the files it contains.

### Bucket Configurations

This implementation will create a sub-directory within the base `records_path`. The default event
record encoding is base16, with a default bucket_length of 2, which would result in 256 "buckets"
directories for storing events `00` to `ff`. Using an alternate encoding for record file names or
increasing the bucket length will increase this.

|encoding| bucket_length: 1| bucket_length: 2| bucket_length: 3|
|--|--|--|--|
|base16|16|256|1,024|
|base32|32|1,024|32,768|
|base64url|64|4,096|262,144|

> #### Too many buckets {: .warning}
>
> Having too many buckets is also not good, I would recommend not exceeding 4,096.
> Configure the system accordingly.
>

> #### Future {: .info}
>
> A future storage implementation, may add support for nested buckets.

"""
use Fact.Seam.Storage,
family: :standard,
version: 2

import Fact.Seam.Parsers, only: [parse_directory: 1, parse_integer_range: 3]

@typedoc """
The configuration options for the Standard v2 storage seam impl.

* `:path` - The base path to the database directory.
* `:bucket_length` - The length of event bucket directories.
"""
@typedoc since: "0.2.0"
@type t :: %__MODULE__{
path: Path.t(),
bucket_length: non_neg_integer()
}

@enforce_keys [:path]
defstruct [:path, :bucket_length]

@doc """
Get the default configuration options.
"""
@doc since: "0.2.0"
@spec default_options() :: t()
@impl true
def default_options(), do: %{path: nil, bucket_length: 2}

@doc """
Gets the specification for the configuration options.
"""
@doc since: "0.2.0"
@impl true
def option_specs() do
%{
path: %{
allowed: :any,
parse: &parse_directory/1,
error: :invalid_path_option
},
bucket_length: %{
allowed: :any,
parse: fn value -> parse_integer_range(value, 0, 3) end,
error: :invalid_bucket_length
}
}
end

@doc """
Creates the directory structure used for events and indexes.
"""
@doc since: "0.2.0"
@spec initialize_storage(t(), keyword()) :: {:ok, Path.t()} | {:error, term()}
@impl true
def initialize_storage(%__MODULE__{path: path} = this, opts) do
with :ok <- File.mkdir_p(path),
:ok <- File.mkdir_p(records_path(this, nil, opts)),
:ok <- File.mkdir_p(indices_path(this, opts)),
:ok <- File.write(Path.join(path, ".gitignore"), "*") do
{:ok, path}
end
end

@doc """
Gets the configured base path for the database.
"""
@doc since: "0.2.0"
@spec path(t(), keyword()) :: Path.t()
@impl true
def path(%__MODULE__{path: path}, _opts), do: path

@doc """
Gets the path to the base directory for records, or the path to a specific record.
"""
@doc since: "0.2.0"
@spec records_path(t(), nil | Fact.record_id(), keyword()) :: Path.t()
@impl true
def records_path(%__MODULE__{path: path, bucket_length: _}, nil, _opts) do
Path.join(path, "events")
end

def records_path(%__MODULE__{path: path, bucket_length: bucket_length}, record_id, _opts)
when is_binary(record_id) do
bucket = String.slice(record_id, 0, bucket_length)
bucket_path = Path.join([path, "events", bucket])
unless File.exists?(bucket_path), do: File.mkdir(bucket_path)
Path.join(bucket_path, record_id)
end

@doc """
Gets the path to the base directory for all indexes.
"""
@doc since: "0.2.0"
@spec indices_path(t(), keyword()) :: Path.t()
@impl true
def indices_path(%__MODULE__{path: path}, _opts), do: Path.join(path, "indices")

@doc """
Gets the path to the directory containing the ledger.
"""
@doc since: "0.2.0"
@spec ledger_path(t(), keyword()) :: Path.t()
@impl true
def ledger_path(%__MODULE__{path: path}, _opts), do: path

@doc """
Gets the path to the directory containing the lock file.
"""
@doc since: "0.2.0"
@spec locks_path(t(), keyword()) :: Path.t()
@impl true
def locks_path(%__MODULE__{path: path}, _opts), do: path
end
7 changes: 6 additions & 1 deletion lib/fact/storage.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,10 @@ defmodule Fact.Storage do

There is currently only a single implementation, see `Fact.Seam.Storage.Standard.V1`.
"""
use Fact.Seam.Storage.Adapter
use Fact.Seam.Storage.Adapter,
allowed_impls: [
{:standard, 1},
{:standard, 2}
],
default_impl: {:standard, 2}
end
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ defmodule Fact.MixProject do
Fact.Seam.Storage,
Fact.Seam.Storage.Adapter,
Fact.Seam.Storage.Standard.V1,
Fact.Seam.Storage.Standard.V2,
Fact.Seam.Storage.Registry
]
],
Expand Down
61 changes: 61 additions & 0 deletions test/fact/seam/parsers_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
defmodule Fact.Seam.ParsersTest do
use ExUnit.Case

alias Fact.Seam.Parsers

@moduletag :capture_log

doctest Parsers

test "module exists" do
assert is_list(Parsers.module_info())
end

describe "parse_integer_range/3" do
test "accepts integers within the range" do
assert {:ok, 5} = Parsers.parse_integer_range(5, 1, 10)
assert {:ok, 1} = Parsers.parse_integer_range(1, 1, 10)
assert {:ok, 10} = Parsers.parse_integer_range(10, 1, 10)
end

test "accepts negative integers within the range" do
assert {:ok, -5} = Parsers.parse_integer_range(-5, -10, -1)
assert {:ok, 0} = Parsers.parse_integer_range(0, -10, 10)
end

test "rejects integers below the minimum" do
assert :error = Parsers.parse_integer_range(0, 1, 10)
assert :error = Parsers.parse_integer_range(-1, 0, 10)
end

test "rejects integers above the maximum" do
assert :error = Parsers.parse_integer_range(11, 1, 10)
assert :error = Parsers.parse_integer_range(100, 1, 10)
end

test "accepts and parses valid string integers within the range" do
assert {:ok, 5} = Parsers.parse_integer_range("5", 1, 10)
assert {:ok, 1} = Parsers.parse_integer_range("1", 1, 10)
assert {:ok, 10} = Parsers.parse_integer_range("10", 1, 10)
end

test "rejects string integers outside the range" do
assert :error = Parsers.parse_integer_range("0", 1, 10)
assert :error = Parsers.parse_integer_range("11", 1, 10)
end

test "rejects non-numeric strings" do
assert :error = Parsers.parse_integer_range("abc", 1, 10)
assert :error = Parsers.parse_integer_range("1.5", 1, 10)
assert :error = Parsers.parse_integer_range("", 1, 10)
end

test "rejects non-integer and non-string values" do
assert :error = Parsers.parse_integer_range(5.0, 1, 10)
assert :error = Parsers.parse_integer_range(nil, 1, 10)
assert :error = Parsers.parse_integer_range(:five, 1, 10)
assert :error = Parsers.parse_integer_range([], 1, 10)
assert :error = Parsers.parse_integer_range(%{}, 1, 10)
end
end
end
Loading