diff --git a/indexers/symlinks.ts b/indexers/symlinks.ts index 7c875b4..96db604 100644 --- a/indexers/symlinks.ts +++ b/indexers/symlinks.ts @@ -7,6 +7,7 @@ import * as fs from '@std/fs'; import { FSDB_INDEXER, FSDB_SEARCH_OPTIONS } from '../fsdb.ts'; import * as path from '@std/path'; import sanitize from '../utils/sanitize.ts'; +import { walk, WALK_ENTRY } from '../utils/walk.ts'; interface FSDB_INDEXER_SYMLINKS_CONFIG_SHARED { name: string; @@ -14,7 +15,7 @@ interface FSDB_INDEXER_SYMLINKS_CONFIG_SHARED { id_field?: string; to_many?: boolean; organize?: (value: string) => string[]; - organize_id?: (value: string) => string[]; + organize_id?: (value: string, organized: string) => string[]; } interface FSDB_INDEXER_SYMLINKS_CONFIG_WITH_FIELD extends FSDB_INDEXER_SYMLINKS_CONFIG_SHARED { @@ -93,7 +94,6 @@ export class FSDB_INDEXER_SYMLINKS implements FSDB_INDEXER { const filename: string = organized_paths.pop() ?? ''; // remove filename const parsed_filename = path.parse(filename); organized_paths.push(parsed_filename.name); // add back filename without extension for a directory - organized_paths.push('*'); // wildcard to get all references } const limit = options?.limit ?? 100; @@ -109,20 +109,37 @@ export class FSDB_INDEXER_SYMLINKS implements FSDB_INDEXER { }); } - const glob_pattern = path.resolve(path.join(this.config.root, ...organized_paths)); - for await (const item_file of fs.expandGlob(glob_pattern)) { - const file_info: Deno.FileInfo = await Deno.lstat(item_file.path); + const resolved_path = path.resolve(path.join(this.config.root, ...organized_paths)); + for await ( + const entry of walk(resolved_path, { + filter: (entry: WALK_ENTRY): boolean => { + const extension = path.extname(entry.path); + if (extension.toLowerCase() !== '.json') { + return false; + } + + if (entry.info.isDirectory) { + return false; + } + + return true; + }, + sort: options?.sort + }) + ) { + const file_info: Deno.FileInfo = await Deno.lstat(entry.path); if (file_info.isSymlink) { if (counter < offset) { ++counter; continue; } - const resolved_item_path = await Deno.readLink(item_file.path); - const normalized_path = path.normalize(path.resolve(path.dirname(item_file.path), resolved_item_path)); + const resolved_item_path = await Deno.readLink(entry.path); + const normalized_path = path.normalize(path.resolve(path.dirname(entry.path), resolved_item_path)); if (Deno.env.get('FSDB_DEBUG')) { console.dir({ + entry, resolved_item_path, normalized_path }); @@ -211,7 +228,7 @@ export class FSDB_INDEXER_SYMLINKS implements FSDB_INDEXER { } if (this.config.organize_id) { - organized_paths.push(...this.config.organize_id(item_id)); + organized_paths.push(...this.config.organize_id(item_id, parsed_filename.name)); } else { organized_paths.push(`${item_id}.json`); } diff --git a/tests/04_indexing_sanity_checks.test.ts b/tests/04_indexing_sanity_checks.test.ts index 4019d1c..bbb6240 100644 --- a/tests/04_indexing_sanity_checks.test.ts +++ b/tests/04_indexing_sanity_checks.test.ts @@ -1,21 +1,22 @@ -import * as asserts from "@std/assert"; -import * as fsdb from "../fsdb.ts"; -import { FSDB_INDEXER_SYMLINKS } from "../indexers.ts"; -import { get_data_dir, random_email_address, random_phone_number } from "./helpers.ts"; -import lurid from "@andyburke/lurid"; -import by_email from "../organizers/by_email.ts"; -import by_character from "../organizers/by_character.ts"; -import by_phone from "../organizers/by_phone.ts"; -import { sentence } from "jsr:@ndaidong/txtgen"; +import * as asserts from '@std/assert'; +import * as fsdb from '../fsdb.ts'; +import { FSDB_INDEXER_SYMLINKS } from '../indexers.ts'; +import { get_data_dir, random_email_address, random_phone_number } from './helpers.ts'; +import lurid from '@andyburke/lurid'; +import by_email from '../organizers/by_email.ts'; +import by_character from '../organizers/by_character.ts'; +import by_phone from '../organizers/by_phone.ts'; +import { sentence } from 'jsr:@ndaidong/txtgen'; +import { by_lurid } from '@andyburke/fsdb/organizers'; Deno.test({ - name: "index some items", + name: 'index some items', permissions: { env: true, // https://github.com/denoland/deno/discussions/17258 read: true, - write: true, + write: true }, fn: async () => { type ITEM = { @@ -27,40 +28,47 @@ Deno.test({ }; const item_collection: fsdb.FSDB_COLLECTION = new fsdb.FSDB_COLLECTION({ - name: "test-04-items", - root: get_data_dir() + "/test-04-items", + name: 'test-04-items', + root: get_data_dir() + '/test-04-items', indexers: { email: new FSDB_INDEXER_SYMLINKS({ - name: "email", - field: "email", - organize: by_email, + name: 'email', + field: 'email', + organize: by_email }), phone: new FSDB_INDEXER_SYMLINKS({ - name: "phone", - field: "phone", - organize: by_phone, + name: 'phone', + field: 'phone', + organize: by_phone }), stable: new FSDB_INDEXER_SYMLINKS({ - name: "stable", - field: "stable", + name: 'stable', + field: 'stable', to_many: true, - organize: by_character, + organize: by_character + }), + custom_organizing_test: new FSDB_INDEXER_SYMLINKS({ + name: 'custom_organizing_test', + organize: (word) => [word], + organize_id: (id: string) => { + return [id.substring(0, 4), ...by_lurid(id)]; + }, + get_values_to_index: (item: ITEM) => item.value.split(/\W/).filter((word) => word.length > 3), + to_many: true }), by_character_test: new FSDB_INDEXER_SYMLINKS({ - name: "by_character_test", + name: 'by_character_test', organize: by_character, - get_values_to_index: (item: ITEM) => - item.value.split(/\W/).filter((word) => word.length > 3), - to_many: true, + get_values_to_index: (item: ITEM) => item.value.split(/\W/).filter((word) => word.length > 3), + to_many: true }), by_possibly_undefined: new FSDB_INDEXER_SYMLINKS({ - name: "by_possibly_undefined", + name: 'by_possibly_undefined', organize: by_character, - get_values_to_index: (item: ITEM) => - item.email.indexOf(".com") > 0 ? [item.email] : [], - to_many: true, - }), - }, + get_values_to_index: (item: ITEM) => item.email.indexOf('.com') > 0 ? [item.email] : [], + to_many: true + }) + } }); asserts.assert(item_collection); @@ -71,8 +79,8 @@ Deno.test({ id: lurid(), email: random_email_address(), phone: random_phone_number(), - stable: "stable", - value: sentence(), + stable: 'stable', + value: sentence() }; items.push(item); @@ -100,8 +108,7 @@ Deno.test({ const words_in_value: string[] = item.value .split(/\W/) .filter((word) => word.length > 3); - const random_word_in_value: string = - words_in_value[Math.floor(Math.random() * words_in_value.length)]; + const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)]; const fetched_by_word_in_value: ITEM[] = ( await item_collection.find({ by_character_test: random_word_in_value }) ).map((entry) => entry.load()); @@ -109,8 +116,19 @@ Deno.test({ asserts.assertGreater(fetched_by_word_in_value.length, 0); asserts.assert( fetched_by_word_in_value.find( - (word_in_value_item) => word_in_value_item.id === item.id, - ), + (word_in_value_item) => word_in_value_item.id === item.id + ) + ); + + const fetched_by_custom_organization_index: ITEM[] = ( + await item_collection.find({ custom_organizing_test: random_word_in_value }) + ).map((entry) => entry.load()); + asserts.assertLess(fetched_by_custom_organization_index.length, items.length); + asserts.assertGreater(fetched_by_custom_organization_index.length, 0); + asserts.assert( + fetched_by_custom_organization_index.find( + (word_in_value_item) => word_in_value_item.id === item.id + ) ); } @@ -119,7 +137,7 @@ Deno.test({ asserts.assert(random_item); const criteria: Record = { - stable: "stable", + stable: 'stable' }; if (Math.random() < 0.5) { @@ -154,5 +172,5 @@ Deno.test({ // ) => entry.load()); // asserts.assertFalse(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id)); // } - }, + } }); diff --git a/utils/walk.ts b/utils/walk.ts index e31b401..537ca1b 100644 --- a/utils/walk.ts +++ b/utils/walk.ts @@ -27,12 +27,11 @@ export async function* walk( const entries: WALK_ENTRY[] = []; - for await (const dir_entry of Deno.readDir(root_path)) { - const full_path = path.join(root, dir_entry.name); - const info = await Deno.lstat(full_path); - const entry = { - path: full_path, - info, + const root_info: Deno.FileInfo = await Deno.lstat(root_path); + if (!root_info.isDirectory) { + const entry: WALK_ENTRY = { + path: root_path, + info: root_info, depth, load: function () { return JSON.parse(Deno.readTextFileSync(this.path)) as T; @@ -40,6 +39,21 @@ export async function* walk( }; entries.push(entry); + } else { + for await (const dir_entry of Deno.readDir(root_path)) { + const full_path = path.join(root, dir_entry.name); + const info = await Deno.lstat(full_path); + const entry = { + path: full_path, + info, + depth, + load: function () { + return JSON.parse(Deno.readTextFileSync(this.path)) as T; + } + }; + + entries.push(entry); + } } if (sort) {