fix: recurse for indexed items properly

This commit is contained in:
Andy Burke 2025-11-06 22:00:01 -08:00
parent e46f9cefb7
commit 8f70191586
3 changed files with 103 additions and 54 deletions

View file

@ -7,6 +7,7 @@ import * as fs from '@std/fs';
import { FSDB_INDEXER, FSDB_SEARCH_OPTIONS } from '../fsdb.ts';
import * as path from '@std/path';
import sanitize from '../utils/sanitize.ts';
import { walk, WALK_ENTRY } from '../utils/walk.ts';
interface FSDB_INDEXER_SYMLINKS_CONFIG_SHARED {
name: string;
@ -14,7 +15,7 @@ interface FSDB_INDEXER_SYMLINKS_CONFIG_SHARED {
id_field?: string;
to_many?: boolean;
organize?: (value: string) => string[];
organize_id?: (value: string) => string[];
organize_id?: (value: string, organized: string) => string[];
}
interface FSDB_INDEXER_SYMLINKS_CONFIG_WITH_FIELD extends FSDB_INDEXER_SYMLINKS_CONFIG_SHARED {
@ -93,7 +94,6 @@ export class FSDB_INDEXER_SYMLINKS<T> implements FSDB_INDEXER<T> {
const filename: string = organized_paths.pop() ?? ''; // remove filename
const parsed_filename = path.parse(filename);
organized_paths.push(parsed_filename.name); // add back filename without extension for a directory
organized_paths.push('*'); // wildcard to get all references
}
const limit = options?.limit ?? 100;
@ -109,20 +109,37 @@ export class FSDB_INDEXER_SYMLINKS<T> implements FSDB_INDEXER<T> {
});
}
const glob_pattern = path.resolve(path.join(this.config.root, ...organized_paths));
for await (const item_file of fs.expandGlob(glob_pattern)) {
const file_info: Deno.FileInfo = await Deno.lstat(item_file.path);
const resolved_path = path.resolve(path.join(this.config.root, ...organized_paths));
for await (
const entry of walk(resolved_path, {
filter: (entry: WALK_ENTRY<T>): boolean => {
const extension = path.extname(entry.path);
if (extension.toLowerCase() !== '.json') {
return false;
}
if (entry.info.isDirectory) {
return false;
}
return true;
},
sort: options?.sort
})
) {
const file_info: Deno.FileInfo = await Deno.lstat(entry.path);
if (file_info.isSymlink) {
if (counter < offset) {
++counter;
continue;
}
const resolved_item_path = await Deno.readLink(item_file.path);
const normalized_path = path.normalize(path.resolve(path.dirname(item_file.path), resolved_item_path));
const resolved_item_path = await Deno.readLink(entry.path);
const normalized_path = path.normalize(path.resolve(path.dirname(entry.path), resolved_item_path));
if (Deno.env.get('FSDB_DEBUG')) {
console.dir({
entry,
resolved_item_path,
normalized_path
});
@ -211,7 +228,7 @@ export class FSDB_INDEXER_SYMLINKS<T> implements FSDB_INDEXER<T> {
}
if (this.config.organize_id) {
organized_paths.push(...this.config.organize_id(item_id));
organized_paths.push(...this.config.organize_id(item_id, parsed_filename.name));
} else {
organized_paths.push(`${item_id}.json`);
}

View file

@ -1,21 +1,22 @@
import * as asserts from "@std/assert";
import * as fsdb from "../fsdb.ts";
import { FSDB_INDEXER_SYMLINKS } from "../indexers.ts";
import { get_data_dir, random_email_address, random_phone_number } from "./helpers.ts";
import lurid from "@andyburke/lurid";
import by_email from "../organizers/by_email.ts";
import by_character from "../organizers/by_character.ts";
import by_phone from "../organizers/by_phone.ts";
import { sentence } from "jsr:@ndaidong/txtgen";
import * as asserts from '@std/assert';
import * as fsdb from '../fsdb.ts';
import { FSDB_INDEXER_SYMLINKS } from '../indexers.ts';
import { get_data_dir, random_email_address, random_phone_number } from './helpers.ts';
import lurid from '@andyburke/lurid';
import by_email from '../organizers/by_email.ts';
import by_character from '../organizers/by_character.ts';
import by_phone from '../organizers/by_phone.ts';
import { sentence } from 'jsr:@ndaidong/txtgen';
import { by_lurid } from '@andyburke/fsdb/organizers';
Deno.test({
name: "index some items",
name: 'index some items',
permissions: {
env: true,
// https://github.com/denoland/deno/discussions/17258
read: true,
write: true,
write: true
},
fn: async () => {
type ITEM = {
@ -27,40 +28,47 @@ Deno.test({
};
const item_collection: fsdb.FSDB_COLLECTION<ITEM> = new fsdb.FSDB_COLLECTION<ITEM>({
name: "test-04-items",
root: get_data_dir() + "/test-04-items",
name: 'test-04-items',
root: get_data_dir() + '/test-04-items',
indexers: {
email: new FSDB_INDEXER_SYMLINKS<ITEM>({
name: "email",
field: "email",
organize: by_email,
name: 'email',
field: 'email',
organize: by_email
}),
phone: new FSDB_INDEXER_SYMLINKS<ITEM>({
name: "phone",
field: "phone",
organize: by_phone,
name: 'phone',
field: 'phone',
organize: by_phone
}),
stable: new FSDB_INDEXER_SYMLINKS<ITEM>({
name: "stable",
field: "stable",
name: 'stable',
field: 'stable',
to_many: true,
organize: by_character,
organize: by_character
}),
custom_organizing_test: new FSDB_INDEXER_SYMLINKS<ITEM>({
name: 'custom_organizing_test',
organize: (word) => [word],
organize_id: (id: string) => {
return [id.substring(0, 4), ...by_lurid(id)];
},
get_values_to_index: (item: ITEM) => item.value.split(/\W/).filter((word) => word.length > 3),
to_many: true
}),
by_character_test: new FSDB_INDEXER_SYMLINKS<ITEM>({
name: "by_character_test",
name: 'by_character_test',
organize: by_character,
get_values_to_index: (item: ITEM) =>
item.value.split(/\W/).filter((word) => word.length > 3),
to_many: true,
get_values_to_index: (item: ITEM) => item.value.split(/\W/).filter((word) => word.length > 3),
to_many: true
}),
by_possibly_undefined: new FSDB_INDEXER_SYMLINKS<ITEM>({
name: "by_possibly_undefined",
name: 'by_possibly_undefined',
organize: by_character,
get_values_to_index: (item: ITEM) =>
item.email.indexOf(".com") > 0 ? [item.email] : [],
to_many: true,
}),
},
get_values_to_index: (item: ITEM) => item.email.indexOf('.com') > 0 ? [item.email] : [],
to_many: true
})
}
});
asserts.assert(item_collection);
@ -71,8 +79,8 @@ Deno.test({
id: lurid(),
email: random_email_address(),
phone: random_phone_number(),
stable: "stable",
value: sentence(),
stable: 'stable',
value: sentence()
};
items.push(item);
@ -100,8 +108,7 @@ Deno.test({
const words_in_value: string[] = item.value
.split(/\W/)
.filter((word) => word.length > 3);
const random_word_in_value: string =
words_in_value[Math.floor(Math.random() * words_in_value.length)];
const random_word_in_value: string = words_in_value[Math.floor(Math.random() * words_in_value.length)];
const fetched_by_word_in_value: ITEM[] = (
await item_collection.find({ by_character_test: random_word_in_value })
).map((entry) => entry.load());
@ -109,8 +116,19 @@ Deno.test({
asserts.assertGreater(fetched_by_word_in_value.length, 0);
asserts.assert(
fetched_by_word_in_value.find(
(word_in_value_item) => word_in_value_item.id === item.id,
),
(word_in_value_item) => word_in_value_item.id === item.id
)
);
const fetched_by_custom_organization_index: ITEM[] = (
await item_collection.find({ custom_organizing_test: random_word_in_value })
).map((entry) => entry.load());
asserts.assertLess(fetched_by_custom_organization_index.length, items.length);
asserts.assertGreater(fetched_by_custom_organization_index.length, 0);
asserts.assert(
fetched_by_custom_organization_index.find(
(word_in_value_item) => word_in_value_item.id === item.id
)
);
}
@ -119,7 +137,7 @@ Deno.test({
asserts.assert(random_item);
const criteria: Record<string, string> = {
stable: "stable",
stable: 'stable'
};
if (Math.random() < 0.5) {
@ -154,5 +172,5 @@ Deno.test({
// ) => entry.load());
// asserts.assertFalse(fetched_by_word_in_value.find((word_in_value_item) => word_in_value_item.id === item.id));
// }
},
}
});

View file

@ -27,6 +27,19 @@ export async function* walk<T>(
const entries: WALK_ENTRY<T>[] = [];
const root_info: Deno.FileInfo = await Deno.lstat(root_path);
if (!root_info.isDirectory) {
const entry: WALK_ENTRY<T> = {
path: root_path,
info: root_info,
depth,
load: function () {
return JSON.parse(Deno.readTextFileSync(this.path)) as T;
}
};
entries.push(entry);
} else {
for await (const dir_entry of Deno.readDir(root_path)) {
const full_path = path.join(root, dir_entry.name);
const info = await Deno.lstat(full_path);
@ -41,6 +54,7 @@ export async function* walk<T>(
entries.push(entry);
}
}
if (sort) {
entries.sort(sort);