Skip to content

Commit

Permalink
feat: 支持多页正文规则 (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
aooiuu committed Aug 22, 2024
1 parent ab26703 commit 802a815
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 64 deletions.
30 changes: 19 additions & 11 deletions docs/rule/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ outline: deep
"userAgent": "",
"enableDiscover": false,
"discoverUrl": "",
"discoverNextUrl": "",
"discoverList": "",
"discoverTags": "",
"discoverName": "",
Expand All @@ -48,26 +49,30 @@ outline: deep
"chapterRoads": "",
"chapterRoadName": "",
"chapterUrl": "",
"chapterNextUrl": "",
"chapterList": "",
"chapterName": "",
"chapterResult": "",
"contentUrl": "",
"contentNextUrl": "",
"contentItems": ""
}
```

```typescript
export interface Rule {
// ===== 通用字段 =====
host: string; // 域名
id: string; // uuid
name: string; // 书源名称
sort: number; // 书源排序
contentType: ContentType; // 书源类型
cookies?: string;
loadJs: string; // 全局JS脚本
author: string; // 规则作者
userAgent: string; // Headers JSON字符串

// 搜索
enableSearch?: boolean; // 搜索 - 启用
// ===== 解析流程 - 搜索 =====
enableSearch: boolean; // 搜索 - 启用
searchUrl: string; // 搜索 - 地址
searchList: string; // 搜索 - 列表
searchCover: string; // 搜索 - 封面
Expand All @@ -77,16 +82,19 @@ export interface Rule {
searchDescription: string; // 搜索 - 描述
searchResult: string; // 搜索 - 结果

// 章节列表
// ===== 解析流程 - 章节列表 =====
chapterUrl: string; // 章节列表 - 请求地址
chapterName: string; // 章节列表 - 标题
chapterList: string; // 章节列表 - 列表
chapterCover: string; // 章节列表 - 封面
chapterTime: string; // 章节列表 - 时间
chapterResult: string; // 章节列表 - 结果
contentItems: string; // 章节列表 - 内容
chapterResult: string; // 章节列表 - 结果 ? 暂未使用
enableMultiRoads: boolean; // 启用多线路 暂不支持
chapterRoads: string; // 线路列表 暂不支持
chapterNextUrl: string; // 章节列表下一页地址

// 发现
// ===== 解析流程 - 发现页 =====
enableDiscover: boolean; // 发现页 - 是否启用
discoverUrl: string; // 发现页 - 请求地址
discoverList: string; // 发现页 - 列表
Expand All @@ -95,14 +103,14 @@ export interface Rule {
discoverAuthor: string; // 发现页 - 作者
discoverDescription: string; // 发现页 - 描述
discoverResult: string; // 发现页 - 结果
// discoverItems: string; // ? 暂未使用
// discoverItems: string
discoverTags: string;
discoverChapter: string;
discoverNextUrl?: string;
discoverNextUrl: string; // 下一页地址

// 线路
enableMultiRoads: boolean; // 启用多线路
chapterRoads: string; // 线路列表
// ===== 解析流程 - 正文 =====
contentUrl: string;
contentNextUrl: string;
}

enum ContentType {
Expand Down
88 changes: 68 additions & 20 deletions packages/core/src/analyzer/RuleManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,28 +124,76 @@ export class RuleManager {
return chapterItems;
}

async getContent(result: string): Promise<string[]> {
async getContent(lastResult: string): Promise<string[]> {
JSEngine.setEnvironment({
result
});
const contentUrl = this.rule.contentUrl !== 'null' ? this.rule.contentUrl : null;
const { body, params } = await fetch(await this.parseUrl(contentUrl || result), '', result, this.rule);
JSEngine.setEnvironment({
page: 1,
lastResult: result,
result: body,
baseUrl: params.url
result: lastResult
});
let list = await this.analyzerManager.getStringList(this.rule.contentItems, body);
if (this.rule.contentType === ContentType.NOVEL) {
list = list
.join('\n')
.replace(/\n+/g, '\n')
.trim()
.split('\n')
.map((e) => e.trim());
}
return list;
const hasNextUrlRule = !!this.rule.contentNextUrl;
const url = this.rule.contentUrl || lastResult;

const result: string[] = [];
let page = 1;
let contentUrlRule = '';
let next = '';
const pagePattern = /(\$page)|((^|[^a-zA-Z'"_/-])page([^a-zA-Z0-9'"]|$))/;

do {
contentUrlRule = '';
if (page === 1) {
contentUrlRule = url;
} else if (hasNextUrlRule) {
if (next) {
contentUrlRule = next;
}
} else if (pagePattern.test(url)) {
contentUrlRule = url;
}

if (!contentUrlRule) {
return result;
}

try {
let contentUrl = '';
let body = '';

if (contentUrlRule !== 'null') {
const res = await fetch(await this.parseUrl(contentUrlRule), '', lastResult, this.rule);
contentUrl = res.params.url;
body = res.body;
}

JSEngine.setEnvironment({
page,
lastResult: lastResult,
result: body,
baseUrl: contentUrl
});

if (hasNextUrlRule) {
next = await this.analyzerManager.getString(this.rule.contentNextUrl, body);
} else {
next = '';
}

let list = await this.analyzerManager.getStringList(this.rule.contentItems, body);
if (this.rule.contentType === ContentType.NOVEL) {
list = list
.join('\n')
.replace(/\n+/g, '\n')
.trim()
.split('\n')
.map((e) => e.trim());
result.push(...list);
}
} catch (error) {
console.warn(error);
break;
}
page++;
// eslint-disable-next-line no-constant-condition
} while (true);
return result;
}

// 获取获取分类
Expand Down
45 changes: 22 additions & 23 deletions packages/rule-utils/src/rule.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@ export enum ContentType {
}

export interface Rule {
// ===== 通用字段 =====
host: string; // 域名
id: string; // uuid
name: string; // 书源名称
sort: number; // 书源排序
contentType: ContentType; // 书源类型
cookies?: string;
loadJs?: string; // 全局JS脚本
loadJs: string; // 全局JS脚本
author: string; // 规则作者
userAgent: string; // Headers JSON字符串

// 搜索
enableSearch?: boolean; // 搜索 - 启用
// ===== 解析流程 - 搜索 =====
enableSearch: boolean; // 搜索 - 启用
searchUrl: string; // 搜索 - 地址
searchList: string; // 搜索 - 列表
searchCover: string; // 搜索 - 封面
Expand All @@ -32,17 +33,19 @@ export interface Rule {
searchDescription: string; // 搜索 - 描述
searchResult: string; // 搜索 - 结果

// 章节列表
// ===== 解析流程 - 章节列表 =====
chapterUrl: string; // 章节列表 - 请求地址
chapterName: string; // 章节列表 - 标题
chapterList: string; // 章节列表 - 列表
chapterCover: string; // 章节列表 - 封面
chapterTime: string; // 章节列表 - 时间
chapterResult: string; // 章节列表 - 结果

contentItems: string; // 章节列表 - 内容
enableMultiRoads: boolean; // 启用多线路 暂不支持
chapterRoads: string; // 线路列表 暂不支持
chapterNextUrl: string; // 章节列表下一页地址

// 发现
// ===== 解析流程 - 发现页 =====
enableDiscover: boolean; // 发现页 - 是否启用
discoverUrl: string; // 发现页 - 请求地址
discoverList: string; // 发现页 - 列表
Expand All @@ -54,32 +57,23 @@ export interface Rule {
// discoverItems: string
discoverTags: string;
discoverChapter: string;
discoverNextUrl?: string;

// 线路
enableMultiRoads: boolean; // 启用多线路
chapterRoads: string; // 线路列表
discoverNextUrl: string; // 下一页地址

// 几种形式
// 1.纯文本:
// 如: "userAgent": "Mozilla/5.0 xxx"
// 2.JSON文本
// 如: "userAgent": "{\"User-Agent\":\"Mozilla/5.0 xxx\",\"Cookie\":\"token=123;\"}"
// 3.JSON对象
// 如: "userAgent": {Cookie: ""}
userAgent?: string; // Headers JSON字符串
// ===== 解析流程 - 正文 =====
contentUrl: string;
contentNextUrl: string;

// ===== 暂不支持 =====
createTime?: number;
modifiedTime?: number;

enableUpload?: boolean;
icon?: string;
group?: string;
useCryptoJS?: boolean;
searchTags?: string;
chapterRoadName?: string;
contentUrl?: string;
viewStyle?: number;
cookies?: string;
}

export function createRule(rule: Partial<Rule>): Rule {
Expand All @@ -91,7 +85,9 @@ export function createRule(rule: Partial<Rule>): Rule {
createTime: now,
modifiedTime: now,
enableUpload: false,
author: 'AnyReader',
author: '',
loadJs: '',
cookies: '',
name: '',
host: '',
icon: '',
Expand All @@ -102,6 +98,7 @@ export function createRule(rule: Partial<Rule>): Rule {
userAgent: '',
enableDiscover: false,
discoverUrl: '',
discoverNextUrl: '',
discoverList: '',
discoverTags: '',
discoverName: '',
Expand All @@ -127,10 +124,12 @@ export function createRule(rule: Partial<Rule>): Rule {
chapterRoads: '',
chapterRoadName: '',
chapterUrl: '',
chapterNextUrl: '',
chapterList: '',
chapterName: '',
chapterResult: '',
contentUrl: '',
contentNextUrl: '',
contentItems: '',
viewStyle: 0
},
Expand Down
7 changes: 0 additions & 7 deletions packages/shared/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,10 @@
"ws": "^8.18.0"
},
"devDependencies": {
"@rollup/plugin-alias": "^5.0.0",
"@rollup/plugin-commonjs": "^25.0.2",
"@rollup/plugin-json": "^6.0.0",
"@rollup/plugin-node-resolve": "^15.1.0",
"@types/encoding-japanese": "^2.0.5",
"@types/lodash-es": "^4.17.12",
"@types/ws": "^8.5.11",
"reflect-metadata": "^0.2.2",
"rollup": "^3.26.0",
"rollup-plugin-dts": "^5.3.0",
"rollup-plugin-esbuild": "^5.0.0",
"ts-jest": "^29.1.1",
"typescript": "^5.5.3"
}
Expand Down
30 changes: 30 additions & 0 deletions packages/shared/src/entity/ResourceRule.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,4 +326,34 @@ export class ResourceRule {
})
@JoinColumn({ name: 'id' })
extra!: RuleExtra;

@Column({
name: 'content_next_url',
type: 'text',
default: '',
comment: '正文下一页地址'
})
contentNextUrl!: string;

@Column({
name: 'chapter_next_url',
type: 'text',
default: '',
comment: '章节列表下一页地址'
})
chapterNextUrl!: string;

@Column({
name: 'discover_next_url',
type: 'text',
default: ''
})
discoverNextUrl!: string;

@Column({
name: 'load_js',
type: 'text',
default: ''
})
loadJs!: string;
}
9 changes: 8 additions & 1 deletion packages/web/src/pages/pc/rule-info/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,14 @@ export const FORM_ITEMS: FormItem[] = [

{
prop: 'contentUrl',
label: '地址',
label: '正文地址',
show: (item: Rule) => [ContentType.NOVEL, ContentType.VIDEO, ContentType.MANGA].includes(item.contentType),
formStep: 4,
debug: true
},
{
prop: 'contentNextUrl',
label: '正文下一页地址',
show: (item: Rule) => [ContentType.NOVEL, ContentType.VIDEO, ContentType.MANGA].includes(item.contentType),
formStep: 4,
debug: true
Expand Down
8 changes: 6 additions & 2 deletions packages/web/src/pages/pc/rules/index.vue
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
<div ref="tableWarpRef" class="flex-1 overflow-hidden" @drop="drop" @dragover.prevent @dragenter.prevent>
<a-table
row-key="id"
:loading="loading"
:loading="loading || loading2"
:pagination="{
defaultPageSize: 10,
showTotal: (total: number) => `总数: ${total}`
Expand Down Expand Up @@ -517,7 +517,11 @@ function delTimeoutRules() {
// };
// }
const { drop, dropFile } = useDropRules(({ count }) => {
const {
drop,
dropFile,
loading: loading2
} = useDropRules(({ count }) => {
message.success({
content: `导入${count}条数据`
});
Expand Down

0 comments on commit 802a815

Please sign in to comment.