commit ce024ba87a5bc1394e2130340b83243976a9ec12 Author: Andy Burke Date: Fri Jun 13 20:40:28 2025 -0700 feature: initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2af9080 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +deno.lock +tests/data +.fsdb diff --git a/README.md b/README.md new file mode 100644 index 0000000..8747ab0 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +# Disk Storage System + +We use the disk instead of a database to reduce complexity. We leave the hard +optimization to the filesystem layer. + +## API + + `collection.create(T)` - creates an object of type T, saving it to the disk + `collection.get(id) : T` - gets an object of type T based on the id field configured + `collection.update(T)` - updates an object of type T, saving it to the disk + `collection.delete(T)` - removes an object from the system and the disk + `collection.find(criteria)` - find all objects that match the criteria *that have an index* + +## CLI + +``` +[you@machine:~/] fsdb users create '{ + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right perspective." +}' +created: { + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right perspective." +} + +[you@machine:~/] fsdb users update '{ + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right angle." +}' +updated: { + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right angle." +} + +[you@machine:~/] fsdb users get able-fish-door-with-star-snow-idea-edge-salt-many +{ + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right angle." +} + +[you@machine:~/] fsdb users delete '{ + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right angle." +}' +deleted: { + "id": "able-fish-door-with-star-snow-idea-edge-salt-many", + "email": "commandlinetestuser@domain.com", + "phone": "213-555-1234", + "value": "By the way a horse is a lemon from the right angle." +} +``` + +## Indexers + +### Symlinks + +We create symlinks on the disk to help find objects more quickly and to allow +for browsing the data as a human. + +### SQLite + +TODO: index everything into a sqlite setup as well? would give a way to run +SQL against data still stored on disk in a nicely human browsable format. diff --git a/cli.ts b/cli.ts new file mode 100644 index 0000000..4e71b5b --- /dev/null +++ b/cli.ts @@ -0,0 +1,156 @@ +import { parseArgs } from '@std/cli/parse-args'; +import meta from './deno.json' with { type: 'json' }; +import * as fsdb from './fsdb.ts'; +import * as path from '@std/path'; +import * as fs from '@std/fs'; +import sanitize from './utils/sanitize.ts'; + +type COMMAND = 'get' | 'create' | 'update' | 'delete' | 'find'; + +const settings = parseArgs(Deno.args, { + boolean: ['help', 'version'], + alias: { + help: 'h', + root: 'r', + version: 'v' + }, + default: { + root: './.fsdb' + } +}); + +if (settings.help) { + console.log(`Usage: + + fsdb + [--h(elp)] + [--v(ersion)] + [--r(oot) ] + [input] + +Examples: + + fsdb items create '{ "id": "able-fish-door-tall-wait-dark-dark-nose-tall-very", "value": "test" }' + + fsdb items get able-fish-door-tall-wait-dark-dark-nose-tall-very + + +Options: + -h, --help Show this help message + -v, --version Show the version number + -r, --root Set the root directory of the database +`); + Deno.exit(0); +} + +if (settings.version) { + console.log(meta.version ?? 'unknown'); + Deno.exit(0); +} + +const collection_name: string | number | undefined = settings._.shift(); +if (typeof collection_name !== 'string') { + console.error('You must specify a collection name.'); + Deno.exit(1); +} + +const command: COMMAND | undefined = settings._.shift() as COMMAND; +if (!command) { + console.error('You must specify a valid command.'); + Deno.exit(1); +} + +const DB_ROOT = typeof settings.root === 'string' ? path.resolve(settings.root) : './.fsdb'; + +const collection_info_path: string = path.resolve(path.join(DB_ROOT, sanitize(collection_name), '.fsdb.collection.json')); +if (!fs.existsSync(collection_info_path)) { + console.error('fsdb command line interface cannot create collections at this time'); + Deno.exit(1); +} + +const collection_info = JSON.parse(Deno.readTextFileSync(collection_info_path)); +const collection_directory = path.dirname(collection_info_path); + +const collection: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ + name: collection_info.name, + root: collection_directory, + id_field: collection_info.id_field +}); + +switch (command) { + case 'get': { + const id_to_get: string = settings._.shift() as string; + if (typeof id_to_get !== 'string') { + console.error('You must specify an id for the item to get from the collection.'); + Deno.exit(1); + } + + const item: any = await collection.get(id_to_get); + if (item === null || typeof item === 'undefined') { + console.error(`No item found in collection "${collection_name}" with id: "${id_to_get}"`); + Deno.exit(1); + } + + console.log(JSON.stringify(item, null, 4)); + break; + } + case 'create': { + const item_json_from_command_line: string | number | undefined = settings._.shift(); + let item_json = item_json_from_command_line; + if (typeof item_json !== 'string') { + item_json = await new Response(Deno.stdin.readable).text(); + } + + if (typeof item_json !== 'string') { + console.error('You must specify some json for the item to create it in the collection.'); + Deno.exit(1); + } + + const item: any = JSON.parse(item_json); + const created_item: any = await collection.create(item); + + console.log('created: ' + JSON.stringify(created_item, null, 4)); + break; + } + case 'update': { + const item_json_from_command_line: string | number | undefined = settings._.shift(); + let item_json = item_json_from_command_line; + if (typeof item_json !== 'string') { + item_json = await new Response(Deno.stdin.readable).text(); + } + + if (typeof item_json !== 'string') { + console.error('You must specify some json for the item to update it in the collection.'); + Deno.exit(1); + } + + const item: any = JSON.parse(item_json); + const updated_item: any = await collection.update(item); + + console.log('updated: ' + JSON.stringify(updated_item, null, 4)); + break; + } + case 'delete': { + const item_json_from_command_line: string | number | undefined = settings._.shift(); + let item_json = item_json_from_command_line; + if (typeof item_json !== 'string') { + item_json = await new Response(Deno.stdin.readable).text(); + } + + if (typeof item_json !== 'string') { + console.error('You must specify some json for the item to delete it in from the collection.'); + Deno.exit(1); + } + + const item: any = JSON.parse(item_json); + const deleted_item: any = await collection.delete(item); + + console.log('deleted: ' + JSON.stringify(deleted_item, null, 4)); + break; + } + case 'find': + console.error('find is not yet implemented - need to find a way to restore the collection indexes on load'); + Deno.exit(1); +} + +Deno.exit(0); diff --git a/deno.json b/deno.json new file mode 100644 index 0000000..17ad92e --- /dev/null +++ b/deno.json @@ -0,0 +1,44 @@ +{ + "name": "@andyburke/fsdb", + "version": "0.0.2", + "license": "MIT", + "exports": { + ".": "./fsdb.ts", + "./cli": "./cli.ts" + }, + + "tasks": { + "lint": "deno lint", + "fmt": "deno fmt", + "test": "cd tests && DENO_ENV=test TEST_DATA_STORAGE_ROOT=./data/$(date --iso-8601=seconds) deno test --allow-env --allow-read --allow-write --fail-fast --trace-leaks ./", + "fsdb": "deno run --allow-env --allow-read --allow-write cli.ts" + }, + + "fmt": { + "include": ["**/*.ts"], + "options": { + "useTabs": true, + "lineWidth": 140, + "indentWidth": 4, + "singleQuote": true, + "proseWrap": "preserve", + "trailingCommas": "never" + } + }, + "lint": { + "include": ["**/*.ts"], + "rules": { + "tags": ["recommended"], + "exclude": ["no-explicit-any"] + } + }, + "imports": { + "@andyburke/lurid": "jsr:@andyburke/lurid@^0.2.0", + "@ndaidong/txtgen": "jsr:@ndaidong/txtgen@^4.0.1", + "@std/assert": "jsr:@std/assert@^1.0.13", + "@std/cli": "jsr:@std/cli@^1.0.20", + "@std/crypto": "jsr:@std/crypto@^1.0.5", + "@std/fs": "jsr:@std/fs@^1.0.18", + "@std/path": "jsr:@std/path@^1.1.0" + } +} diff --git a/fsdb.ts b/fsdb.ts new file mode 100644 index 0000000..b6bb7b9 --- /dev/null +++ b/fsdb.ts @@ -0,0 +1,255 @@ +import * as fs from '@std/fs'; +import * as path from '@std/path'; +import by_lurid from './organizers/by_lurid.ts'; +import { Optional } from './utils/optional.ts'; + +export type FSDB_COLLECTION_CONFIG = { + name: string; + id_field: string; + indexers?: Record>; + organize: (id: string) => string[]; + root: string; +}; +export type FSDB_COLLECTION_CONFIG_INPUT = Optional; + +export type FSDB_SEARCH_OPTIONS = { + limit: number; + offset?: number; +}; + +export interface FSDB_INDEXER { + set_fsdb_root(root: string): void; + index(item: T, authoritative_path: string): Promise; + remove(item: T, authoritative_path: string): Promise; + lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise; +} + +export class FSDB_COLLECTION> { + private config: FSDB_COLLECTION_CONFIG; + public INDEX: Record>; + + constructor(input_config: FSDB_COLLECTION_CONFIG_INPUT) { + this.config = { + ...{ + id_field: 'id', + organize: by_lurid, + root: `./.fsdb/${input_config?.name ?? 'unknown'}` + }, + ...(input_config ?? {}) + }; + + this.INDEX = this.config.indexers ?? {}; + for (const indexer of Object.values(this.INDEX)) { + indexer.set_fsdb_root(this.config.root); + } + + let existing_collection_info: any = undefined; + try { + const existing_collection_info_content: string = Deno.readTextFileSync( + path.resolve(path.join(this.config.root), '.fsdb.collection.json') + ); + existing_collection_info = JSON.parse(existing_collection_info_content); + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + + if (existing_collection_info) { + if (this.config.name !== existing_collection_info.name) { + console.warn('Mismatching collection name, maybe the collection was renamed? Be cautious.'); + } + + if (this.config.root !== existing_collection_info.root) { + console.warn('Mismatching collection root, maybe the collection was moved on disk? Be cautious.'); + } + + if (this.config.id_field !== existing_collection_info.id_field) { + console.warn('Mismatching collection id field, maybe the data format has changed? Be cautious.'); + } + + if ( + Object.keys(this.config.indexers ?? {}).sort().join('|') !== + Object.keys(existing_collection_info.indexers ?? {}).sort().join('|') + ) { + console.warn('Mismatching collection indexes, maybe the code was updated to add or drop an index? Be cautious.'); + } + } + + const collection_info_file_path: string = path.resolve(path.join(this.config.root, '.fsdb.collection.json')); + const collection_info_json: string = JSON.stringify(this.config, null, 4); + Deno.mkdirSync(path.dirname(collection_info_file_path), { + recursive: true + }); + Deno.writeTextFileSync(collection_info_file_path, collection_info_json); + } + + public get_organized_item_path(item: any, id_field?: string): string { + const id: string = item[id_field ?? this.config.id_field]; + const path_elements: string[] = this.config.organize(id); + const resolved_item_path = path.resolve(path.join(this.config.root, ...path_elements)); + return resolved_item_path; + } + + public get_organized_id_path(id: string): string { + return this.get_organized_item_path({ id }, 'id'); + } + + private async ensure_item_path(item: any, id_field?: string): Promise { + const organized_item_path: string = this.get_organized_item_path(item, id_field); + const organized_item_dir: string = path.dirname(organized_item_path); + await fs.ensureDir(organized_item_dir); + + return organized_item_path; + } + + private async write_item(item: T, override_path?: string): Promise { + const item_path: string = override_path ?? await this.ensure_item_path(item, this.config.id_field); + Deno.writeTextFileSync(item_path, JSON.stringify(item, null, 1)); + + if (this.config.indexers) { + for (const indexer of Object.values(this.config.indexers)) { + await (indexer as FSDB_INDEXER).index(item, item_path); + } + } + } + + async get(id: string): Promise { + const id_path: string = this.get_organized_id_path(id); + const item_exists: boolean = await fs.exists(id_path); + + if (!item_exists) { + return null; + } + + const content: string = await Deno.readTextFile(id_path); + return JSON.parse(content); + } + + async create(item: T): Promise { + const item_path: string = this.get_organized_item_path(item); + const item_exists: boolean = await fs.exists(item_path); + + if (item_exists) { + throw new Error('item already exists', { + cause: 'item_exists' + }); + } + + await this.write_item(item); + + return item; + } + + async update(item: T): Promise { + const item_path: string = this.get_organized_item_path(item); + const item_exists: boolean = await fs.exists(item_path); + + if (!item_exists) { + throw new Error('item does not exist', { + cause: 'item_does_not_exist' + }); + } + + await this.write_item(item, item_path); + + return item; + } + + async delete(item: T): Promise { + const item_path = this.get_organized_item_path(item); + const item_exists = await fs.exists(item_path); + + if (!item_exists) { + return null; + } + + if (this.config.indexers) { + for (const indexer of Object.values(this.config.indexers)) { + await (indexer as FSDB_INDEXER).remove(item, item_path); + } + } + + await Deno.remove(item_path); + + let dir = path.dirname(item_path); + do { + const files = Deno.readDirSync(dir); + let has_files = false; + for (const _ of files) { + has_files = true; + break; + } + + if (has_files) { + dir = ''; + break; + } + + await Deno.remove(dir); + dir = path.dirname(dir); + } while (dir.length); + return item; + } + + async find(criteria: Record, input_options?: FSDB_SEARCH_OPTIONS): Promise { + if (Deno.env.get('FSDB_PERF')) performance.mark('fsdb_find_begin'); + + const options: FSDB_SEARCH_OPTIONS = { + ...{ + limit: 100, + offset: 0 + }, + ...(input_options ?? {}) + }; + + const results: T[] = []; + const item_paths: string[] = []; + + // for each key in the search + // see if we have an index for it + // if we have an index, use that index to put any search path right at the beginning of the list + // + // once we have a list of items to search + // apply offset + // for each item, load it + // let matched = false; + // for each key in search + // if the item matches this key/value, matched = true; break; + // if limit reached, break; + // + // return matched items + + for (const search_key of Object.keys(criteria)) { + const indexer_for_search_key: FSDB_INDEXER | undefined = this.INDEX[search_key]; + const value: string = criteria[search_key]; + if (indexer_for_search_key) { + item_paths.push(...await indexer_for_search_key.lookup(value, input_options)); + } + } + + const limit = options?.limit ?? 100; + const offset = options?.offset ?? 0; + let counter = 0; + + for await (const item_path of item_paths) { + if (counter < offset) { + ++counter; + continue; + } + + const content = await Deno.readTextFile(item_path); + results.push(JSON.parse(content)); + ++counter; + + if (counter >= (offset + limit)) { + break; + } + } + + if (Deno.env.get('FSDB_PERF')) performance.mark('fsdb_find_end'); + if (Deno.env.get('FSDB_PERF')) console.dir(performance.measure('fsdb find time', 'fsdb_find_begin', 'fsdb_find_end')); + + return results; + } +} diff --git a/indexers/symlinks.ts b/indexers/symlinks.ts new file mode 100644 index 0000000..5e6d86c --- /dev/null +++ b/indexers/symlinks.ts @@ -0,0 +1,207 @@ +import * as fs from '@std/fs'; +import { FSDB_INDEXER, FSDB_SEARCH_OPTIONS } from '../fsdb.ts'; +import * as path from '@std/path'; +import sanitize from '../utils/sanitize.ts'; + +interface FSDB_INDEXER_SYMLINKS_CONFIG_SHARED { + name: string; + root?: string; + id_field?: string; + to_many?: boolean; + organize?: (value: string) => string[]; +} + +interface FSDB_INDEXER_SYMLINKS_CONFIG_WITH_FIELD extends FSDB_INDEXER_SYMLINKS_CONFIG_SHARED { + field: string; + get_values_to_index?: never; +} + +interface FSDB_INDEXER_SYMLINKS_CONFIG_WITH_GET_VALUE extends FSDB_INDEXER_SYMLINKS_CONFIG_SHARED { + field?: never; + get_values_to_index: (item: T) => string[]; +} + +export type FSDB_INDEXER_SYMLINKS_CONFIG = FSDB_INDEXER_SYMLINKS_CONFIG_WITH_FIELD | FSDB_INDEXER_SYMLINKS_CONFIG_WITH_GET_VALUE; + +async function cleanup_empty_directories(initial_path: string): Promise { + let current_directory: string = path.dirname(path.resolve(initial_path)); + let has_children: boolean = false; + const removed: string[] = []; + do { + for await (const _dir_entry of Deno.readDir(current_directory)) { + has_children = true; + break; + } + + if (!has_children) { + const removed_dir = current_directory; + await Deno.remove(removed_dir); + current_directory = path.dirname(removed_dir); + removed.push(removed_dir); + if (current_directory === Deno.cwd()) { + break; + } + } + } while (!has_children && current_directory.length); + + return removed; +} + +export class FSDB_INDEXER_SYMLINKS implements FSDB_INDEXER { + constructor(private config: FSDB_INDEXER_SYMLINKS_CONFIG) { + this.config.id_field = this.config.id_field ?? 'id'; + } + + public set_fsdb_root(root: string) { + this.config.root = this.config.root ?? path.resolve(path.join(root, '.indexes/', sanitize(this.config.name))); + } + + private get_values_to_index(item: any): string[] { + if (this.config.get_values_to_index) { + return this.config.get_values_to_index(item); + } + + const typed_field: any = this.config.field as keyof T; + + const value: string = item[typed_field]; + return [value]; + } + + async lookup(value: string, options?: FSDB_SEARCH_OPTIONS): Promise { + if (typeof this.config.root !== 'string') { + throw new Error('root should have been set by FSDB instance'); + } + + if (typeof this.config.organize === 'undefined') { + throw new Error('symlink indexer must have an organizer set!'); + } + + const results: string[] = []; + + const organized_paths: string[] = this.config.organize(value); + if (organized_paths.length === 0) { + return results; + } + + if (this.config.to_many) { + const filename: string = organized_paths.pop() ?? ''; // remove filename + const parsed_filename = path.parse(filename); + organized_paths.push(parsed_filename.name); // add back filename without extension for a directory + organized_paths.push('*'); // wildcard to get all references + } + + const limit = options?.limit ?? 100; + const offset = options?.offset ?? 0; + let counter = 0; + + const glob_pattern = path.resolve(path.join(this.config.root, ...organized_paths)); + for await (const item_file of fs.expandGlob(glob_pattern)) { + const file_info: Deno.FileInfo = await Deno.lstat(item_file.path); + if (file_info.isSymlink) { + if (counter < offset) { + ++counter; + continue; + } + + const resolved_item_path = await Deno.readLink(item_file.path); + results.push(resolved_item_path); + ++counter; + } + + if (counter >= (offset + limit)) { + break; + } + } + + return results; + } + + async index(item: T, authoritative_path: string): Promise { + if (typeof this.config.root !== 'string') { + throw new Error('root should have been set by FSDB instance'); + } + + if (typeof this.config.organize === 'undefined') { + throw new Error('symlink indexer must have an organizer set!'); + } + + const results: string[] = []; + const values: string[] = this.get_values_to_index(item); + if (values.length === 0) { + return results; + } + + for (const value of values) { + const organized_paths: string[] = this.config.organize(value); + if (organized_paths.length === 0) { + continue; + } + + if (this.config.to_many) { + const filename: string = organized_paths.pop() ?? ''; // remove filename + const parsed_filename = path.parse(filename); + organized_paths.push(parsed_filename.name); // add back filename without extension for a directory + + const item_id: string = item[this.config.id_field as keyof T] as string; + if (typeof item_id !== 'string') { + continue; + } + + organized_paths.push(`${item_id}.json`); + } + + const symlink_path = path.resolve(path.join(this.config.root, ...organized_paths)); + const item_dir = path.dirname(authoritative_path); + const reverse_link_path = path.join(item_dir, `.index.symlink.${this.config.name}.${sanitize(value)}`); + + // clean up old indexes + try { + const previous_symlink_index_link_path = await Deno.readLink(reverse_link_path); + await Deno.remove(previous_symlink_index_link_path); + + if (this.config.to_many) { + await cleanup_empty_directories(previous_symlink_index_link_path); + } + + await Deno.remove(reverse_link_path); + } catch (error) { + if (!(error instanceof Deno.errors.NotFound)) { + throw error; + } + } + + // create index symlink and reverse link + await Deno.mkdir(path.dirname(symlink_path), { recursive: true }); + await fs.ensureSymlink(authoritative_path, symlink_path); + await fs.ensureSymlink(symlink_path, reverse_link_path); + + results.push(symlink_path); + } + + return results; + } + + async remove(item: T, item_path: string): Promise { + const results: string[] = []; + const values: string[] = this.get_values_to_index(item); + const item_dir: string = path.dirname(item_path); + + for (const value of values) { + const item_dir_reverse_link: string = path.join(item_dir, `.index.symlink.${this.config.name}.${sanitize(value)}`); + if (!fs.existsSync(item_dir_reverse_link)) { + continue; + } + + const index_symlink_path: string = await Deno.readLink(item_dir_reverse_link); + if (fs.existsSync(index_symlink_path)) { + await Deno.remove(index_symlink_path); + results.push(index_symlink_path); + } + + await Deno.remove(item_dir_reverse_link); + results.push(...await cleanup_empty_directories(index_symlink_path)); + } + + return results; + } +} diff --git a/organizers/by_character.ts b/organizers/by_character.ts new file mode 100644 index 0000000..52457f0 --- /dev/null +++ b/organizers/by_character.ts @@ -0,0 +1,21 @@ +import sanitize from '../utils/sanitize.ts'; + +export default function by_character(value: string): string[] { + const result: string[] = []; + + // Replace invalid filename characters and leading dots + const sanitized_value = sanitize(value); + const characters_remaining = sanitized_value.split(''); + + let previous_characters = ''; + while (characters_remaining.length) { + previous_characters += characters_remaining.shift(); + result.push(previous_characters); + if (result.length >= 3) { + break; + } + } + + result.push(`${sanitized_value}.json`); + return result; +} diff --git a/organizers/by_email.ts b/organizers/by_email.ts new file mode 100644 index 0000000..886be49 --- /dev/null +++ b/organizers/by_email.ts @@ -0,0 +1,24 @@ +import sanitize from '../utils/sanitize.ts'; + +const EMAIL_REGEX = /^(?.+)@(?(?.+)\.(?.+))$/; +export default function by_email(email: string): string[] { + const { groups: { username, domain, hostname, tld } } = { + groups: { + username: undefined, + domain: undefined, + hostname: undefined, + tld: undefined + }, + ...(email.match(EMAIL_REGEX) ?? {}) + }; + + if (typeof username === 'undefined' || typeof domain === 'undefined' || typeof hostname === 'undefined' || typeof tld === 'undefined') { + return []; + } + + // for example, a symlinking index based on this organizer might look like: + // fsdb root index tld domain email + // [ ][ V ][ V ][ V ][ V ] + // /path/to/db/root/.indexes/email/com/example.com/soandso@example.com + return [sanitize(tld), sanitize(domain), sanitize(email), `${sanitize(email)}.json`]; +} diff --git a/organizers/by_lurid.ts b/organizers/by_lurid.ts new file mode 100644 index 0000000..f3fb6ff --- /dev/null +++ b/organizers/by_lurid.ts @@ -0,0 +1,18 @@ +import sanitize from '../utils/sanitize.ts'; + +export default function by_lurid(id: string): string[] { + // Replace invalid filename characters and leading dots + const sanitized_id = sanitize(id); + + // assuming a lurid, eg: able-fish-cost-them-post-many-form-hope-wife-born + // ./able-fish-cost-them/able-fish-cost-them-post-many-form/able-fish-cost-them-post-many-form-hope-wife-born.json + + const result: string[] = [ + sanitized_id.slice(0, 14), + sanitized_id.slice(0, 34), + sanitized_id, + `${sanitized_id}.json` + ]; + + return result; +} diff --git a/organizers/by_phone.ts b/organizers/by_phone.ts new file mode 100644 index 0000000..f85f38b --- /dev/null +++ b/organizers/by_phone.ts @@ -0,0 +1,35 @@ +import sanitize from '../utils/sanitize.ts'; + +const PHONE_REGEX = + /^(?\+?\d{1,3})?[\s.x-]?\(?(?\d{3})\)?[\s.x-]?(?\d{3})[\s.x-]?(?\d{4})$/; +export default function by_phone(phone: string): string[] { + const { groups: { country_code, area_code, central_office_code, subscriber_code } } = { + groups: { + country_code: undefined, + area_code: undefined, + central_office_code: undefined, + subscriber_code: undefined + }, + ...(phone.match(PHONE_REGEX) ?? {}) + }; + + if ( + typeof area_code !== 'string' || typeof central_office_code !== 'string' || + typeof subscriber_code !== 'string' + ) { + return []; + } + + const normalized_number = `${sanitize(area_code)}-${sanitize(central_office_code)}-${sanitize(subscriber_code)}`; + + // for example, a symlinking index based on this organizer might look like: + // fsdb root index country_code office_code area_code phone + // /path/to/db/root/.indexes/phone/1/213/555/213-555-1234 + return [ + sanitize(country_code ?? '1'), + sanitize(area_code), + sanitize(central_office_code), + normalized_number, + `${normalized_number}.json` + ]; +} diff --git a/tests/01_module_import.test.ts b/tests/01_module_import.test.ts new file mode 100644 index 0000000..683e0a6 --- /dev/null +++ b/tests/01_module_import.test.ts @@ -0,0 +1,13 @@ +import * as asserts from '@std/assert'; + +Deno.test({ + name: 'import fsdb', + permissions: { + env: true + }, + fn: async () => { + const fsdb = await import('../fsdb.ts'); + + asserts.assert(fsdb); + } +}); diff --git a/tests/02_store_and_retrieve_an_item.test.ts b/tests/02_store_and_retrieve_an_item.test.ts new file mode 100644 index 0000000..6776836 --- /dev/null +++ b/tests/02_store_and_retrieve_an_item.test.ts @@ -0,0 +1,39 @@ +import * as asserts from '@std/assert'; +import * as fsdb from '../fsdb.ts'; +import { get_data_dir } from './helpers.ts'; + +Deno.test({ + name: 'store and retrieve an item', + permissions: { + env: true, + read: ['./'], + write: ['./data'] + }, + fn: async () => { + type ITEM = { + id: string; + value: string; + }; + + const items: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ + name: 'test-02-items', + root: get_data_dir() + '/test-02-items' + }); + + asserts.assert(items); + + const item = { + id: 'blah-fish-test-with-cozy-home-down-here-yall-work', + value: 'the blah fish test, of course' + }; + + const stored_item = await items.create(item); + + asserts.assertObjectMatch(stored_item, item); + + const fetched_item = await items.get(item.id); + + asserts.assert(fetched_item); + asserts.assertObjectMatch(fetched_item, stored_item); + } +}); diff --git a/tests/03_index_items.test.ts b/tests/03_index_items.test.ts new file mode 100644 index 0000000..8d7223d --- /dev/null +++ b/tests/03_index_items.test.ts @@ -0,0 +1,88 @@ +import * as asserts from '@std/assert'; +import * as fsdb from '../fsdb.ts'; +import { FSDB_INDEXER_SYMLINKS } from '../indexers/symlinks.ts'; +import { get_data_dir, random_email_address, random_phone_number } from './helpers.ts'; +import lurid from '@andyburke/lurid'; +import by_email from '../organizers/by_email.ts'; +import by_character from '../organizers/by_character.ts'; +import by_phone from '../organizers/by_phone.ts'; +import { sentence } from 'jsr:@ndaidong/txtgen'; + +Deno.test({ + name: 'index some items', + permissions: { + env: true, + + // https://github.com/denoland/deno/discussions/17258 + read: true, + write: true + }, + fn: async () => { + type ITEM = { + id: string; + email: string; + phone: string; + value: string; + }; + + const item_collection: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ + name: 'test-03-items', + root: get_data_dir() + '/test-03-items', + indexers: { + email: new FSDB_INDEXER_SYMLINKS({ + name: 'email', + field: 'email', + organize: by_email + }), + phone: new FSDB_INDEXER_SYMLINKS({ + name: 'phone', + field: 'phone', + organize: by_phone + }), + by_character_test: new FSDB_INDEXER_SYMLINKS({ + name: 'by_character_test', + organize: by_character, + get_values_to_index: (item: ITEM) => item.value.split(/\W/).filter((word) => word.length > 3), + to_many: true + }) + } + }); + + asserts.assert(item_collection); + + const items: ITEM[] = []; + for (let i = 0; i < 10; ++i) { + const item = { + id: lurid(), + email: random_email_address(), + phone: random_phone_number(), + value: sentence() + }; + + items.push(item); + + const stored_item: ITEM = await item_collection.create(item); + + asserts.assertObjectMatch(stored_item, item); + } + + for (const item of items) { + const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email }); + asserts.assertLess(fetched_by_email.length, items.length); + asserts.assertGreater(fetched_by_email.length, 0); + asserts.assert(fetched_by_email.find((email_item) => email_item.id === item.id)); + + const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone }); + asserts.assertLess(fetched_by_phone.length, items.length); + asserts.assertGreater(fetched_by_phone.length, 0); + asserts.assert(fetched_by_phone.find((phone_item) => phone_item.id === item.id)); + + const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3); + const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; + const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value }); + asserts.assertLess(fetched_by_word_in_value.length, items.length); + asserts.assertGreater(fetched_by_word_in_value.length, 0); + asserts.assert(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); + } + } +}); diff --git a/tests/04_indexing_sanity_checks.test.ts b/tests/04_indexing_sanity_checks.test.ts new file mode 100644 index 0000000..cd3dbb3 --- /dev/null +++ b/tests/04_indexing_sanity_checks.test.ts @@ -0,0 +1,104 @@ +import * as asserts from '@std/assert'; +import * as fsdb from '../fsdb.ts'; +import { FSDB_INDEXER_SYMLINKS } from '../indexers/symlinks.ts'; +import { get_data_dir, random_email_address, random_phone_number } from './helpers.ts'; +import lurid from '@andyburke/lurid'; +import by_email from '../organizers/by_email.ts'; +import by_character from '../organizers/by_character.ts'; +import by_phone from '../organizers/by_phone.ts'; +import { sentence } from 'jsr:@ndaidong/txtgen'; + +Deno.test({ + name: 'index some items', + permissions: { + env: true, + + // https://github.com/denoland/deno/discussions/17258 + read: true, + write: true + }, + fn: async () => { + type ITEM = { + id: string; + email: string; + phone: string; + value: string; + }; + + const item_collection: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ + name: 'test-04-items', + root: get_data_dir() + '/test-04-items', + indexers: { + email: new FSDB_INDEXER_SYMLINKS({ + name: 'email', + field: 'email', + organize: by_email + }), + phone: new FSDB_INDEXER_SYMLINKS({ + name: 'phone', + field: 'phone', + organize: by_phone + }), + by_character_test: new FSDB_INDEXER_SYMLINKS({ + name: 'by_character_test', + organize: by_character, + get_values_to_index: (item: ITEM) => item.value.split(/\W/).filter((word) => word.length > 3), + to_many: true + }) + } + }); + + asserts.assert(item_collection); + + const items: ITEM[] = []; + for (let i = 0; i < 10; ++i) { + const item = { + id: lurid(), + email: random_email_address(), + phone: random_phone_number(), + value: sentence() + }; + + items.push(item); + + const stored_item: ITEM = await item_collection.create(item); + + asserts.assertObjectMatch(stored_item, item); + } + + for (const item of items) { + const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email }); + asserts.assertLess(fetched_by_email.length, items.length); + asserts.assertGreater(fetched_by_email.length, 0); + asserts.assert(fetched_by_email.find((email_item) => email_item.id === item.id)); + + const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone }); + asserts.assertLess(fetched_by_phone.length, items.length); + asserts.assertGreater(fetched_by_phone.length, 0); + asserts.assert(fetched_by_phone.find((phone_item) => phone_item.id === item.id)); + + const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3); + const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; + const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value }); + asserts.assertLess(fetched_by_word_in_value.length, items.length); + asserts.assertGreater(fetched_by_word_in_value.length, 0); + asserts.assert(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); + } + + // leave one item behind so the whole db for this test doesn't get cleaned up so I can hand-review it + for (const item of items.slice(1)) { + await item_collection.delete(item); + + const fetched_by_email: ITEM[] = await item_collection.find({ email: item.email }); + asserts.assertFalse(fetched_by_email.find((email_item) => email_item.id === item.id)); + + const fetched_by_phone: ITEM[] = await item_collection.find({ phone: item.phone }); + asserts.assertFalse(fetched_by_phone.find((phone_item) => phone_item.id === item.id)); + + const words_in_value: string[] = item.value.split(/\W/).filter((word) => word.length > 3); + const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; + const fetched_by_word_in_value: ITEM[] = await item_collection.find({ by_character_test: random_word_in_value }); + asserts.assertFalse(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); + } + } +}); diff --git a/tests/helpers.ts b/tests/helpers.ts new file mode 100644 index 0000000..3533287 --- /dev/null +++ b/tests/helpers.ts @@ -0,0 +1,53 @@ +import lurid from '@andyburke/lurid'; +import { convert_to_words } from '@andyburke/lurid/word_bytes'; + +const TLDs: string[] = [ + 'com', + 'org', + 'net', + 'edu', + 'gov', + 'nexus', + 'shop', + 'unreasonablylongtldname' +]; + +const random_byte_buffer: Uint8Array = new Uint8Array(3); +export function random_email_address(): string { + crypto.getRandomValues(random_byte_buffer); + const name = convert_to_words(random_byte_buffer).join('-'); + + crypto.getRandomValues(random_byte_buffer); + const domain = convert_to_words(random_byte_buffer).join('-'); + + const tld = TLDs[Math.floor(Math.random() * TLDs.length)]; + return `${name}@${domain}.${tld}`; +} + +export function random_username(): string { + crypto.getRandomValues(random_byte_buffer); + return convert_to_words(random_byte_buffer).join('-'); +} + +function get_a_random_array_element(values: any[]): any { + return values[Math.floor(Math.random() * values.length)]; +} + +const country_codes: string[] = ['', '+1', '1', '01', '219', '40', '506', '999']; +const digits: number[] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; +const joinings: string[] = [' ', '-', '.']; +export function random_phone_number(): string { + const country_code = get_a_random_array_element(country_codes); + const area_code = [0, 0, 0].map((_) => get_a_random_array_element(digits)).join(''); + const central_office_code = [0, 0, 0].map((_) => get_a_random_array_element(digits)).join(''); + const subscriber_code = [0, 0, 0, 0].map((_) => get_a_random_array_element(digits)).join(''); + + return `${country_code}${country_code ? get_a_random_array_element(joinings) : ''}${area_code}${ + get_a_random_array_element(joinings) + }${central_office_code}${get_a_random_array_element(joinings)}${subscriber_code}`; +} + +const DATA_DIR = lurid(); +export function get_data_dir(): string { + return Deno.env.get('TEST_DATA_STORAGE_ROOT') ?? DATA_DIR; +} diff --git a/utils/optional.ts b/utils/optional.ts new file mode 100644 index 0000000..4a4634c --- /dev/null +++ b/utils/optional.ts @@ -0,0 +1 @@ +export type Optional = Pick, K> & Omit; diff --git a/utils/sanitize.ts b/utils/sanitize.ts new file mode 100644 index 0000000..6973d6d --- /dev/null +++ b/utils/sanitize.ts @@ -0,0 +1,5 @@ +export default function sanitize(path: string): string { + return path.replace(/^\./, '__.') + .replace(/\//g, '__fwdslash__') + .replace(/\*/g, '__star__'); +}