Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change search to look at entire blocks instead of just individual lines #58

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ export default function VolumeDataProvider({
return override;
};

const initialPageNum = parseInt(useSearchParams().get('page') ?? '', 10) || null;
const initialHighlightBlock = parseInt(useSearchParams().get('block') ?? '', 10) || null;
const initialPageValue = useSearchParams().get('page');
const initialPageNum = initialPageValue !== null ? parseInt(initialPageValue, 10) : null;
const blockValue = useSearchParams().get('block');
const initialHighlightBlock = blockValue !== null ? parseInt(blockValue, 10) : null;

const getCurrentPage = () => {
const page = initialPageNum ? initialPageNum - 1 : (volume.readings[0]?.page ?? 0);
const page = initialPageNum !== null ? initialPageNum - 1 : (volume.readings[0]?.page ?? 0);
if (page > 0 && getUseTwoPages()) {
if (getFirstPageIsCover() && page % 2 === 0) {
return page - 1;
Expand All @@ -82,7 +84,7 @@ export default function VolumeDataProvider({

const [currentPage, setCurrentPage] = useState(getCurrentPage());
const [highlightBlock, setHighlightBlock] = useState<HighlightBlock>(
initialHighlightBlock ? {
initialHighlightBlock !== null ? {
page: getCurrentPage(), block: initialHighlightBlock,
} : null,
);
Expand Down
2 changes: 2 additions & 0 deletions app/api/page/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export async function POST(request: NextRequest) {

const fileData = Buffer.from(await file.arrayBuffer());
const fileName = getFileHash(fileData);
const blockText = ocrData?.blocks.map((block: any) => block.lines.join('')) ?? [];

await fs.writeFile(
`${process.env.IMAGE_PATH}/${volumeId}/${fileName}`,
Expand All @@ -61,6 +62,7 @@ export async function POST(request: NextRequest) {
ocr: ocrData,
fileName,
uploadedById: session.user.userId,
blockText,
},
});

Expand Down
68 changes: 29 additions & 39 deletions app/api/search/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export async function GET(request: NextRequest) {
}
const { searchParams } = new URL(request.url);
const q = searchParams.get('q')?.trim();
const offset = parseInt(searchParams.get('offset') ?? '0', 10);

if (q === '') {
return NextResponse.json([]);
Expand All @@ -26,49 +27,38 @@ export async function GET(request: NextRequest) {
}

const rawQuery = `
SELECT
page.number,
page."volumeId",
"Volume".number AS "volumeNumber",
"Series".id AS "seriesId",
"Series"."japaneseName" AS "japaneseName",
"Series"."englishName" AS "englishName",
text,
score,
CASE WHEN "Reading".id IS NULL THEN false ELSE true END AS isReading,
CAST(block_number - 1 AS SMALLINT) AS "blockNumber"
FROM (
SELECT
number,
"volumeId",
score,
string_agg(line, '') text,
block_number
FROM (
SELECT
"Page".id,
"Page".number,
"volumeId",
ocr ->> 'blocks' blocks,
pgroonga_score("Page".tableoid, "Page".ctid) AS score
FROM "Page"
INNER JOIN "Volume" ON "Volume".id = "Page"."volumeId"
INNER JOIN "Series" ON "Series".id = "Volume"."seriesId"
WHERE ocr ->> 'blocks' &@ $1 ${nsfwFilter}
),
jsonb_array_elements(blocks::jsonb) WITH ORDINALITY AS t(block, block_number),
jsonb_array_elements_text(block -> 'lines') AS line
WHERE block &@ $1
GROUP BY id, number, "volumeId", block, score, block_number
"Page".id as "pageId",
CAST((idx - 1) AS INT) AS "blockNumber",
t.text_element AS text,
"Page".number,
"Page"."volumeId" as volumeid,
"Volume".number AS "volumeNumber",
"Series".id AS "seriesId",
"Series"."japaneseName" AS "japaneseName",
"Series"."englishName" AS "englishName",
(CASE WHEN "Reading".id IS NOT NULL THEN true ELSE false END) AS "isReading"
FROM "Page"
LEFT JOIN "Reading" ON "Reading"."userId" = $2 AND "Reading"."volumeId" = "Page"."volumeId"
INNER JOIN "Volume" ON "Volume".id = "Page"."volumeId"
INNER JOIN "Series" ON "Series".id = "Volume"."seriesId"
CROSS JOIN LATERAL unnest("blockText") WITH ORDINALITY AS t(text_element, idx)
WHERE
"Page"."blockText" &@ $1
AND
t.text_element &@ $1
${nsfwFilter}
ORDER BY "isReading" DESC, "Page"."volumeId", number ASC
LIMIT 20
) page
JOIN "Volume" ON "Volume".id = "volumeId"
JOIN "Series" ON "Series".id = "Volume"."seriesId"
LEFT JOIN "Reading" ON "Reading"."volumeId" = "Volume"."id" AND "Reading"."userId" = $2
ORDER BY isReading DESC, score, number ASC
OFFSET $3
`;

const pages: SearchResult[] = await prisma.$queryRawUnsafe(rawQuery, q, session.user.userId);
const pages: SearchResult[] = await prisma.$queryRawUnsafe(
rawQuery,
q,
session.user.userId,
offset,
);

if (pages == null) {
return NextResponse.json({ error: 'No such lines' }, { status: 404 });
Expand Down
45 changes: 39 additions & 6 deletions components/SearchBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,61 @@ import { SearchResult } from 'search';
export function SearchBar() {
const [search, setSearch] = useState('');
const [searchResults, setSearchResults] = useState<SearchResult[]>([]);
const [offset, setOffset] = useState(0);
const [searchIsExhausted, setSearchIsExhausted] = useState(false);
const [isLoading, setIsLoading] = useState(false);
const ref = useRef<HTMLDivElement>(null);
const [
searchTimeoutId, setSearchTimeoutId,
] = useState<ReturnType<typeof setTimeout> | null>(null);
const [searchAbortController, setSearchAbortController] = useState<AbortController | null>(null);

const onScroll = (event: any) => {
// visible height + pixel scrolled >= total height
if (event.target.offsetHeight + event.target.scrollTop >= event.target.scrollHeight) {
if (searchIsExhausted || isLoading) {
return;
}
setOffset(searchResults.length);
}
};

useEffect(() => {
setSearchResults([]);
setOffset(0);
setSearchIsExhausted(false);
}, [search]);

useEffect(() => {
(async () => {
if (searchTimeoutId) {
clearTimeout(searchTimeoutId);
}

setIsLoading(true);
const newTimeoutId = setTimeout(async () => {
if (searchAbortController && !searchAbortController.signal.aborted) {
searchAbortController.abort();
}
const newAbortController = new AbortController();
setSearchAbortController(newAbortController);
await fetch(`/api/search?q=${search}`, { signal: newAbortController.signal })
await fetch(`/api/search?q=${search}&offset=${offset}`, { signal: newAbortController.signal })
.then(async (results) => {
setSearchAbortController(null);
setSearchResults(await results.json() as SearchResult[]);
const newResults = await results.json() as SearchResult[];
if (newResults.length === 0) {
setSearchIsExhausted(true);
return;
}
setSearchResults((originalSearchResults) => [...originalSearchResults, ...newResults]);
})
.catch(() => {});
.catch(() => {})
.finally(() => { setIsLoading(false); });
}, 300);
setSearchTimeoutId(newTimeoutId);
})();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [search]);
}, [search, offset]);

return (
<div className="w-full dropdown dropdown-end" id="search">
Expand All @@ -57,15 +83,15 @@ export function SearchBar() {
}
}}
/>
<div className="overflow-auto top-14 z-50 flex-col max-h-96 rounded-md w-[24rem] dropdown-content bg-base-300">
<div className="overflow-auto top-14 z-50 flex-col max-h-96 rounded-md w-[24rem] dropdown-content bg-base-300" onScroll={onScroll}>
<ul
className="menu menu-compact"
// use ref to calculate the width of parent
style={{ width: ref.current?.clientWidth }}
>
{searchResults.map((item, index) => (
<li
key={`${item.seriesId}-${item.volumeNumber}-${item.number}-${item.text}`}
key={`${item.pageId}-${item.blockNumber}`}
tabIndex={index + 1}
className="w-full border-b border-b-base-content/10"
>
Expand Down Expand Up @@ -95,6 +121,13 @@ export function SearchBar() {
</Link>
</li>
))}
{isLoading && (
<li className="w-full">
<div className="flex flex-col">
<p className="center">Loading...</p>
</div>
</li>
)}
</ul>
</div>
</div>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
BEGIN TRANSACTION ISOLATION LEVEL SERIALIZABLE;

ALTER TABLE "Page" ADD COLUMN "blockText" TEXT[];

UPDATE "Page"
SET "blockText" = agg_blocks.all_blocks
FROM (
SELECT id, array_agg(block_lines ORDER BY block_index) AS all_blocks
FROM (
SELECT "Page".id, block_index, string_agg(line, '' ORDER BY line_index) AS block_lines
FROM "Page",
LATERAL jsonb_array_elements(ocr->'blocks') WITH ORDINALITY AS block(block_content, block_index)
CROSS JOIN LATERAL jsonb_array_elements_text(block_content->'lines') WITH ORDINALITY AS line(line, line_index)
GROUP BY "Page".id, block_index
) AS blocks
GROUP BY id
) AS agg_blocks
WHERE "Page".id = agg_blocks.id;

COMMIT;

DROP INDEX IF EXISTS pgroonga_blocks_index;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- CreateIndex
CREATE INDEX pgroonga_blocks_index ON public."Page" USING pgroonga ("blockText");
2 changes: 2 additions & 0 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,12 @@ model Page {
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
uploadedById String
blockText String[]
uploadedBy User @relation(fields: [uploadedById], references: [id])
volume Volume @relation(fields: [volumeId], references: [id])

@@unique([number, volumeId], name: "volumeNum")
@@index([blockText], map: "pgroonga_blocks_index")
}

model Reading {
Expand Down
2 changes: 1 addition & 1 deletion types/search.d.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
declare module 'search' {
export interface SearchResult {
pageId: number;
number: number;
volumeid: string;
volumeNumber: number;
seriesId: string;
japaneseName: string;
englishName: string;
text: string;
score: number;
isReading: boolean;
blockNumber: number;
}
Expand Down
Loading