跳转至

API 文档

__description__ = 'A useful CLI tool for downloading posts in Kemono.cr / .su / .party' module-attribute

__title__ = 'KToolBox' module-attribute

__version__ = 'v0.24.0' module-attribute

__main__

main()

Source code in ktoolbox/__main__.py
10
11
12
13
14
15
16
17
18
19
20
21
def main():
    try:
        # Handle -v flag before Fire takes over
        if len(sys.argv) > 1 and sys.argv[1] in ['-v', '--version']:
            print(__version__)
            return

        logger_init(cli_use=True)
        uvloop_init()
        fire.Fire(KToolBoxCli)
    except KeyboardInterrupt:
        logger.error("KToolBox was interrupted by the user")

action

ActionRet

Bases: BaseRet[_T]

Return data model of action call

Source code in ktoolbox/action/base.py
10
11
12
class ActionRet(BaseRet[_T]):
    """Return data model of action call"""
    pass

FetchInterruptError

Bases: Exception

Exception for interrupt of data fetching

Source code in ktoolbox/action/fetch.py
11
12
13
14
15
16
class FetchInterruptError(Exception):
    """Exception for interrupt of data fetching"""

    def __init__(self, *args, ret: BaseRet = None):
        super().__init__(*args)
        self.ret = ret

ret = ret instance-attribute

__init__(*args, ret=None)

Source code in ktoolbox/action/fetch.py
14
15
16
def __init__(self, *args, ret: BaseRet = None):
    super().__init__(*args)
    self.ret = ret

create_job_from_creator(service, creator_id, path, *, all_pages=False, offset=0, length=50, save_creator_indices=False, mix_posts=None, start_time, end_time, keywords=None, keywords_exclude=None) async

Create a list of download job from a creator

Parameters:

Name Type Description Default
service str

The service where the post is located

required
creator_id str

The ID of the creator

required
path Path

The path for downloading posts, which needs to be sanitized

required
all_pages bool

Fetch all posts, offset and length will be ignored if enabled

False
offset int

Result offset (or start offset)

0
length Optional[int]

The number of posts to fetch

50
save_creator_indices bool

Record CreatorIndices data.

False
mix_posts bool

Save all files from different posts at same path, save_creator_indices will be ignored if enabled

None
start_time Optional[datetime]

Start time of the time range

required
end_time Optional[datetime]

End time of the time range

required
keywords Optional[Set[str]]

Set of keywords to filter posts by title (case-insensitive)

None
keywords_exclude Optional[Set[str]]

Set of keywords to exclude posts by title (case-insensitive)

None
Source code in ktoolbox/action/job.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
async def create_job_from_creator(
        service: str,
        creator_id: str,
        path: Path,
        *,
        all_pages: bool = False,
        offset: int = 0,
        length: Optional[int] = 50,
        save_creator_indices: bool = False,
        mix_posts: bool = None,
        start_time: Optional[datetime],
        end_time: Optional[datetime],
        keywords: Optional[Set[str]] = None,
        keywords_exclude: Optional[Set[str]] = None
) -> ActionRet[List[Job]]:
    """
    Create a list of download job from a creator

    :param service: The service where the post is located
    :param creator_id: The ID of the creator
    :param path: The path for downloading posts, which needs to be sanitized
    :param all_pages: Fetch all posts, ``offset`` and ``length`` will be ignored if enabled
    :param offset: Result offset (or start offset)
    :param length: The number of posts to fetch
    :param save_creator_indices: Record ``CreatorIndices`` data.
    :param mix_posts: Save all files from different posts at same path, \
     ``save_creator_indices`` will be ignored if enabled
    :param start_time: Start time of the time range
    :param end_time: End time of the time range
    :param keywords: Set of keywords to filter posts by title (case-insensitive)
    :param keywords_exclude: Set of keywords to exclude posts by title (case-insensitive)
    """
    mix_posts = config.job.mix_posts if mix_posts is None else mix_posts

    # Get posts
    logger.info(f"Start fetching posts from creator {creator_id}")
    post_list: List[Post] = []
    start_offset = offset - offset % 50
    if all_pages:
        page_counter = count()
    else:
        page_num = length // 50 + 1
        page_counter = iter(range(page_num))

    try:
        async for part in fetch_creator_posts(service=service, creator_id=creator_id, o=start_offset):
            if next(page_counter, None) is not None:
                post_list += part
            else:
                break
    except FetchInterruptError as e:
        return ActionRet(**e.ret.model_dump(mode="python"))

    if not all_pages:
        post_list = post_list[offset % 50:][:length]
    else:
        post_list = post_list[offset % 50:]

    # Filter posts by publish time
    if start_time or end_time:
        post_list = list(filter_posts_by_date(post_list, start_time, end_time))

    # Filter posts by keywords
    if keywords:
        post_list = list(filter_posts_by_keywords(post_list, keywords))

    # Filter out posts by exclude keywords
    if keywords_exclude:
        post_list = list(filter_posts_by_keywords_exclude(post_list, keywords_exclude))

    logger.info(f"Get {len(post_list)} posts after filtering, start creating jobs")

    # Filter posts and generate ``CreatorIndices``
    if not mix_posts:
        if save_creator_indices:
            # Generate posts_path with year/month grouping if enabled
            posts_path = {}
            for post in post_list:
                grouped_base_path = generate_grouped_post_path(post, path)
                posts_path[post.id] = grouped_base_path / sanitize_filename(post.title)

            indices = CreatorIndices(
                creator_id=creator_id,
                service=service,
                posts={post.id: post for post in post_list},
                posts_path=posts_path
            )
            async with aiofiles.open(
                    path / DataStorageNameEnum.CreatorIndicesData.value,
                    "w",
                    encoding="utf-8"
            ) as f:
                await f.write(indices.model_dump_json(indent=config.json_dump_indent))

    if config.job.include_revisions:
        logger.warning("`job.include_revisions` is enabled and will fetch post revisions, "
                       "which may take time. Disable if not needed.")
    if config.job.extract_content or config.job.extract_external_links or config.job.extract_content_images:
        logger.warning(
            "`job.extract_content` or `job.extract_external_links` or `job.extract_content_images` is enabled "
            "and will fetch post content one by one, which may take time. Disable if not needed.")

    job_list: List[Job] = []
    for post in post_list:
        # Get post path
        if mix_posts:
            post_path = path
        else:
            # Apply year/month grouping if enabled
            grouped_base_path = generate_grouped_post_path(post, path)
            post_path = grouped_base_path / generate_post_path_name(post)

        # Generate jobs for the main post
        try:
            job_list += await create_job_from_post(
                post=post,
                post_path=post_path,
                post_dir=not mix_posts,
                dump_post_data=not mix_posts
            )
        except FetchInterruptError as e:
            return ActionRet(**e.ret.model_dump(mode="python"))

        # If include_revisions is enabled, fetch and download revisions for this post
        if config.job.include_revisions and not mix_posts:
            try:
                revisions_ret = await get_post_revisions_api(
                    service=service,
                    creator_id=creator_id,
                    post_id=post.id
                )
                if revisions_ret and revisions_ret.data:
                    for revision in revisions_ret.data:
                        if revision.revision_id:  # Only process actual revisions
                            revision_path = post_path / config.job.post_structure.revisions / generate_post_path_name(
                                revision)
                            try:
                                revision_jobs = await create_job_from_post(
                                    post=revision,
                                    post_path=revision_path,
                                    dump_post_data=True
                                )
                            except FetchInterruptError as e:
                                return ActionRet(**e.ret.model_dump(mode="python"))
                            job_list += revision_jobs
            except Exception as e:
                logger.warning(f"Failed to fetch revisions for post {post.id}: {e}")

    return ActionRet(data=job_list)

create_job_from_post(post, post_path, *, post_dir=True, dump_post_data=True) async

Create a list of download job from a post data

Parameters:

Name Type Description Default
post Union[Post, Revision]

post data

required
post_path Path

Path of the post directory, which needs to be sanitized

required
post_dir bool

Whether to create post directory

True
dump_post_data bool

Whether to dump post data (post.json) in post directory

True

Raises:

Type Description
FetchInterruptError

If fetching post content fails

Source code in ktoolbox/action/job.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
async def create_job_from_post(
        post: Union[Post, Revision],
        post_path: Path,
        *,
        post_dir: bool = True,
        dump_post_data: bool = True
) -> List[Job]:
    """
    Create a list of download job from a post data

    :param post: post data
    :param post_path: Path of the post directory, which needs to be sanitized
    :param post_dir: Whether to create post directory
    :param dump_post_data: Whether to dump post data (post.json) in post directory
    :raise FetchInterruptError: If fetching post content fails
    """
    post_path.mkdir(parents=True, exist_ok=True)

    # Load ``PostStructureConfiguration``
    if post_dir:
        attachments_path = post_path / config.job.post_structure.attachments  # attachments
        attachments_path.mkdir(exist_ok=True)
        content_path = post_path / config.job.post_structure.content  # content
        content_path.parent.mkdir(exist_ok=True)
        external_links_path = post_path / config.job.post_structure.external_links  # external_links
        external_links_path.parent.mkdir(exist_ok=True)
    else:
        attachments_path = post_path
        content_path = None
        external_links_path = None

    if dump_post_data:
        async with aiofiles.open(str(post_path / DataStorageNameEnum.PostData.value), "w", encoding="utf-8") as f:
            await f.write(
                post.model_dump_json(indent=config.json_dump_indent)
            )

    # Filter and create jobs for ``Post.attachment``
    jobs: List[Job] = []
    sequential_counter = 1  # Counter for sequential filenames
    if config.job.download_attachments:
        for i, attachment in enumerate(post.attachments):  # type: int, Attachment
            if not attachment.path:
                continue
            file_path_obj = Path(attachment.name) if is_valid_filename(attachment.name) else Path(
                urlparse(attachment.path).path
            )
            if (not config.job.allow_list or any(
                    map(
                        lambda x: fnmatch(file_path_obj.name, x),
                        config.job.allow_list
                    )
            )) and not any(
                map(
                    lambda x: fnmatch(file_path_obj.name, x),
                    config.job.block_list
                )
            ):
                # Check if file extension should be excluded from sequential naming
                should_use_sequential = (config.job.sequential_filename and
                                         file_path_obj.suffix.lower() not in config.job.sequential_filename_excludes)
                if should_use_sequential:
                    basic_filename = f"{sequential_counter}{file_path_obj.suffix}"
                    sequential_counter += 1
                else:
                    basic_filename = file_path_obj.name
                alt_filename = generate_filename(post, basic_filename, config.job.filename_format)
                jobs.append(
                    Job(
                        path=attachments_path,
                        alt_filename=alt_filename,
                        server_path=attachment.path,
                        type=PostFileTypeEnum.Attachment,
                        post=post
                    )
                )

    # Filter and create jobs for ``Post.file``
    if config.job.download_file and post.file and post.file.path:
        post_file_name = Path(post.file.name) if is_valid_filename(post.file.name) else Path(
            urlparse(post.file.path).path
        )
        post_file_name = Path(generate_filename(post, post_file_name.name, config.job.post_structure.file))
        if (not config.job.allow_list or any(
                map(
                    lambda x: fnmatch(post_file_name.name, x),
                    config.job.allow_list
                )
        )) and not any(
            map(
                lambda x: fnmatch(post_file_name.name, x),
                config.job.block_list
            )
        ):
            jobs.append(
                Job(
                    path=post_path,
                    alt_filename=post_file_name.name,
                    server_path=post.file.path,
                    type=PostFileTypeEnum.File,
                    post=post
                )
            )
    # ``post.substring`` is used to determine if the post has content, but it's only partial
    if (post.content or post.substring) and post_dir and (
            config.job.extract_content or config.job.extract_external_links or config.job.extract_content_images
    ):
        # If post has no content, fetch it from get_post API
        if not post.content:
            get_post_ret = await get_post_api(
                service=post.service,
                creator_id=post.user,
                post_id=post.id,
                revision_id=post.revision_id if isinstance(post, Revision) else None
            )
            if get_post_ret:
                post = get_post_ret.data.post
            else:
                logger.error(
                    generate_msg(
                        "Failed to fetch post content",
                        post_name=post.title or "Unknown",
                        post_id=post.id,
                        creator_id=post.user,
                        service=post.service
                    )
                )
                raise FetchInterruptError(ret=get_post_ret)

        # If post content is still empty, skip content extraction
        if post.content:
            # Write content file
            if config.job.extract_content:
                async with aiofiles.open(content_path, "w", encoding=config.downloader.encoding) as f:
                    await f.write(post.content)

            # Extract and write external links file
            if config.job.extract_external_links:
                external_links = extract_external_links(post.content, config.job.external_link_patterns)
                if external_links:
                    async with aiofiles.open(external_links_path, "w", encoding=config.downloader.encoding) as f:
                        # Write each link on a separate line
                        for link in sorted(external_links):
                            await f.write(f"{link}\n")

            # Extract content images
            if config.job.extract_content_images:
                content_image_sources = extract_content_images(post.content)
                for image_src in content_image_sources:
                    if not image_src or not image_src.strip():
                        continue

                    # Handle relative paths by making them absolute
                    # noinspection HttpUrlsUsage
                    if image_src.startswith('/') and not image_src.startswith('//'):
                        # Relative path - construct full URL
                        image_path = image_src
                    elif image_src.startswith('http://') or image_src.startswith('https://'):
                        # Absolute URL - extract path
                        image_path = urlparse(image_src).path
                    else:
                        # Skip data URLs, protocol-relative URLs, or other non-path sources
                        continue

                    if not image_path or not image_path.strip():
                        continue

                    # Generate filename from the image path
                    image_file_path = Path(image_path)

                    # Apply "allow/block list" filtering first (before incrementing counter)
                    if config.job.sequential_filename:
                        basic_filename = f"{sequential_counter + 1}{image_file_path.suffix}"
                    else:
                        basic_filename = image_file_path.name

                    alt_filename = generate_filename(post, basic_filename, config.job.filename_format)

                    if (not config.job.allow_list or any(
                            map(
                                lambda x: fnmatch(alt_filename, x),
                                config.job.allow_list
                            )
                    )) and not any(
                        map(
                            lambda x: fnmatch(alt_filename, x),
                            config.job.block_list
                        )
                    ):
                        # Regenerate filename with correct counter
                        should_use_sequential = (config.job.sequential_filename and
                                                 image_file_path.suffix.lower() not in config.job.sequential_filename_excludes)
                        if should_use_sequential:
                            basic_filename = f"{sequential_counter}{image_file_path.suffix}"
                            alt_filename = generate_filename(post, basic_filename, config.job.filename_format)
                            sequential_counter += 1

                        jobs.append(
                            Job(
                                path=attachments_path,
                                alt_filename=alt_filename,
                                server_path=image_path,
                                type=PostFileTypeEnum.Attachment
                            )
                        )

    return jobs

extract_content_images(content)

Extract image sources from HTML content

Parameters:

Name Type Description Default
content str

HTML content string

required

Returns:

Type Description
List[str]

List of image source URLs/paths

Source code in ktoolbox/action/utils.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
def extract_content_images(content: str) -> List[str]:
    """
    Extract image sources from HTML content

    :param content: HTML content string
    :return: List of image source URLs/paths
    """
    if not content:
        return []

    parser = _ContentImageParser()
    try:
        parser.feed(content)
    except Exception as e:
        logger.warning(f"Failed to parse HTML content for images: {e}")
        return []

    return parser.image_sources

fetch_creator_posts(service, creator_id, o=0) async

Fetch posts from a creator

Parameters:

Name Type Description Default
service str

The service where the post is located

required
creator_id str

The ID of the creator

required
o int

Result offset, stepping of 50 is enforced

0

Returns:

Type Description
AsyncGenerator[List[Post], Any]

Async generator of several list of posts

Raises:

Type Description
FetchInterruptError

Exception for interrupt of data fetching

Source code in ktoolbox/action/fetch.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
async def fetch_creator_posts(service: str, creator_id: str, o: int = 0) -> AsyncGenerator[List[Post], Any]:
    """
    Fetch posts from a creator

    :param service: The service where the post is located
    :param creator_id: The ID of the creator
    :param o: Result offset, stepping of 50 is enforced
    :return: Async generator of several list of posts
    :raise FetchInterruptError: Exception for interrupt of data fetching
    """
    while True:
        ret = await get_creator_post(service=service, creator_id=creator_id, o=o)
        if ret:
            yield ret.data
            if len(ret.data) < SEARCH_STEP:
                break
            else:
                o += SEARCH_STEP
        else:
            raise FetchInterruptError(ret=ret)

filter_posts_by_date(post_list, start_date, end_date)

Filter posts by publish date range

Parameters:

Name Type Description Default
post_list List[Post]

List of posts

required
start_date Optional[datetime]

Start time of the time range

required
end_date Optional[datetime]

End time of the time range

required
Source code in ktoolbox/action/utils.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def filter_posts_by_date(
        post_list: List[Post],
        start_date: Optional[datetime],
        end_date: Optional[datetime]
) -> Generator[Post, Any, Any]:
    """
    Filter posts by publish date range

    :param post_list: List of posts
    :param start_date: Start time of the time range
    :param end_date: End time of the time range
    """
    post_filter = filter(lambda x: _match_post_date(x, start_date, end_date), post_list)
    yield from post_filter

filter_posts_by_indices(posts, indices)

Compare and filter posts by CreatorIndices data

Only keep posts that was edited after last download.

Parameters:

Name Type Description Default
posts List[Post]

Posts to filter

required
indices CreatorIndices

CreatorIndices data to use

required

Returns:

Type Description
Tuple[List[Post], CreatorIndices]

A updated List[Post] and updated new CreatorIndices instance

Source code in ktoolbox/action/utils.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def filter_posts_by_indices(posts: List[Post], indices: CreatorIndices) -> Tuple[List[Post], CreatorIndices]:
    """
    Compare and filter posts by ``CreatorIndices`` data

    Only keep posts that was edited after last download.

    :param posts: Posts to filter
    :param indices: ``CreatorIndices`` data to use
    :return: A updated ``List[Post]`` and updated **new** ``CreatorIndices`` instance
    """
    new_list = list(
        filter(
            lambda x: x.id not in indices.posts or x.edited > indices.posts[x.id].edited, posts
        )
    )
    new_indices = indices.model_copy(deep=True)
    for post in new_list:
        new_indices.posts[post.id] = post
    return new_list, new_indices

filter_posts_by_keywords(post_list, keywords)

Filter posts by keywords in title

Parameters:

Name Type Description Default
post_list List[Post]

List of posts

required
keywords Optional[Set[str]]

Set of keywords to search for (case-insensitive), None means no filtering

required
Source code in ktoolbox/action/utils.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def filter_posts_by_keywords(
        post_list: List[Post],
        keywords: Optional[Set[str]]
) -> Generator[Post, Any, Any]:
    """
    Filter posts by keywords in title

    :param post_list: List of posts
    :param keywords: Set of keywords to search for (case-insensitive), None means no filtering
    """
    if not keywords:
        yield from post_list
        return

    post_filter = filter(lambda x: match_post_keywords(x, keywords), post_list)
    yield from post_filter

filter_posts_by_keywords_exclude(post_list, keywords_exclude)

Filter out posts that contain any of the specified keywords in title

Parameters:

Name Type Description Default
post_list List[Post]

List of posts

required
keywords_exclude Optional[Set[str]]

Set of keywords to exclude (case-insensitive), None means no filtering

required
Source code in ktoolbox/action/utils.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
def filter_posts_by_keywords_exclude(
        post_list: List[Post],
        keywords_exclude: Optional[Set[str]]
) -> Generator[Post, Any, Any]:
    """
    Filter out posts that contain any of the specified keywords in title

    :param post_list: List of posts
    :param keywords_exclude: Set of keywords to exclude (case-insensitive), None means no filtering
    """
    if not keywords_exclude:
        yield from post_list
        return

    # Exclude posts that match any of the exclude keywords
    post_filter = filter(lambda x: not match_post_keywords(x, keywords_exclude), post_list)
    yield from post_filter

generate_filename(post, basic_name, filename_format)

Generate download filename

Source code in ktoolbox/action/utils.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def generate_filename(post: Post, basic_name: str, filename_format: str) -> str:
    """Generate download filename"""
    basic_name_path = Path(basic_name)
    basic_name_filename = basic_name.replace(basic_name_path.suffix, "")
    try:
        return sanitize_filename(
            filename_format.format(
                basic_name_filename,
                id=post.id,
                user=post.user,
                service=post.service,
                title=post.title,
                added=post.added.strftime(TIME_FORMAT) if post.added else "",
                published=post.published.strftime(TIME_FORMAT) if post.published else "",
                edited=post.edited.strftime(TIME_FORMAT) if post.edited else ""
            ) + basic_name_path.suffix
        )
    except KeyError as e:
        logger.error(
            f"`JobConfiguration.filename_format` or `PostStructureConfiguration.file` contains invalid key: {e}")
        exit(1)

generate_grouped_post_path(post, base_path)

Generate the full path for a post considering year/month grouping.

Parameters:

Name Type Description Default
post Post

Post object

required
base_path Path

Base path (usually creator directory)

required

Returns:

Type Description
Path

Full path where the post should be saved

Source code in ktoolbox/action/utils.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def generate_grouped_post_path(post: Post, base_path: Path) -> Path:
    """
    Generate the full path for a post considering year/month grouping.

    :param post: Post object
    :param base_path: Base path (usually creator directory)
    :return: Full path where the post should be saved
    """
    result_path = base_path

    if config.job.group_by_year:
        year_dirname = generate_year_dirname(post)
        result_path = result_path / year_dirname

        if config.job.group_by_month:
            month_dirname = generate_month_dirname(post)
            result_path = result_path / month_dirname

    return result_path

generate_month_dirname(post)

Generate month directory name for post grouping.

Source code in ktoolbox/action/utils.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def generate_month_dirname(post: Post) -> str:
    """Generate month directory name for post grouping."""
    # Use published date, fall back to added date
    post_date = post.published or post.added
    if not post_date:
        return "unknown"

    try:
        return sanitize_filename(
            config.job.month_dirname_format.format(
                year=post_date.year,
                month=post_date.month
            )
        )
    except KeyError as e:
        logger.error(f"`JobConfiguration.month_dirname_format` contains invalid key: {e}")
        exit(1)

generate_post_path_name(post)

Generate directory name for post to save.

Source code in ktoolbox/action/utils.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def generate_post_path_name(post: Post) -> str:
    """Generate directory name for post to save."""
    if not post.title:
        return post.id
    else:
        try:
            return sanitize_filename(
                config.job.post_dirname_format.format(
                    id=post.id,
                    user=post.user,
                    service=post.service,
                    title=post.title,
                    added=post.added.strftime(TIME_FORMAT) if post.added else "",
                    published=post.published.strftime(TIME_FORMAT) if post.published else "",
                    edited=post.edited.strftime(TIME_FORMAT) if post.edited else ""
                )
            )
        except KeyError as e:
            logger.error(f"`JobConfiguration.post_dirname_format` contains invalid key: {e}")
            exit(1)

generate_year_dirname(post)

Generate year directory name for post grouping.

Source code in ktoolbox/action/utils.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def generate_year_dirname(post: Post) -> str:
    """Generate year directory name for post grouping."""
    # Use published date, fall back to added date
    post_date = post.published or post.added
    if not post_date:
        return "unknown"

    try:
        return sanitize_filename(
            config.job.year_dirname_format.format(
                year=post_date.year
            )
        )
    except KeyError as e:
        logger.error(f"`JobConfiguration.year_dirname_format` contains invalid key: {e}")
        exit(1)

match_post_keywords(post, keywords)

Check if the post contains any of the specified keywords.

Parameters:

Name Type Description Default
post Post

Target post object

required
keywords Set[str]

Set of keywords to search for (case-insensitive)

required

Returns:

Type Description
bool

Whether the post contains any of the keywords in title

Source code in ktoolbox/action/utils.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def match_post_keywords(post: Post, keywords: Set[str]) -> bool:
    """
    Check if the post contains any of the specified keywords.

    :param post: Target post object
    :param keywords: Set of keywords to search for (case-insensitive)
    :return: Whether the post contains any of the keywords in title
    """
    if not keywords:
        return True

    # Only search in post title
    searchable_text = ""
    if post.title:
        searchable_text = post.title.lower()

    # Check if any keyword is found in the title
    return any(keyword.lower() in searchable_text for keyword in keywords)

search_creator(id=None, name=None, service=None) async

Search creator with multiple keywords support.

Parameters:

Name Type Description Default
id str

The ID of the creator

None
name str

The name of the creator

None
service str

The service for the creator

None
Source code in ktoolbox/action/search.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
async def search_creator(id: str = None, name: str = None, service: str = None) -> BaseRet[Iterator[Creator]]:
    """
    Search creator with multiple keywords support.

    :param id: The ID of the creator
    :param name: The name of the creator
    :param service: The service for the creator
    """

    def filter_func(creator: Creator):
        """Filter creators with attributes"""
        if id is not None and creator.id != id:
            return False
        if name is not None and name not in creator.name:
            return False
        if service is not None and creator.service != service:
            return False
        return True

    ret = await get_creators()
    if not ret:
        base_ret = BaseRet.model_validate(ret.model_dump())
        base_ret.data = iter([])
        return base_ret
    creators = ret.data
    return ActionRet(data=iter(filter(filter_func, creators)))

search_creator_post(id=None, name=None, service=None, q=None, o=None) async

Search posts from creator with multiple keywords support.

Parameters:

Name Type Description Default
id str

The ID of the creator

None
name str

The name of the creator

None
service str

The service for the creator

None
q str

Search query

None
o str

Result offset, stepping of 50 is enforced

None
Source code in ktoolbox/action/search.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
async def search_creator_post(
        id: str = None,
        name: str = None,
        service: str = None,
        q: str = None,
        o: str = None
) -> BaseRet[List[Post]]:
    """
    Search posts from creator with multiple keywords support.

    :param id: The ID of the creator
    :param name: The name of the creator
    :param service: The service for the creator
    :param q: Search query
    :param o: Result offset, stepping of 50 is enforced
    """

    async def inner(**kwargs):
        posts: List[Post] = []
        if any([id, name, service]):
            if id is not None and service:  # ``get_creator_post`` required
                ret = await get_creator_post(
                    service=service,
                    creator_id=id,
                    q=q,
                    o=o
                )
                return ActionRet(data=ret.data) if ret else ret
            else:  # else need to get ``id`` and ``service``
                creators_ret = await search_creator(id=id, name=name, service=service)
                if not creators_ret:
                    return ActionRet(**creators_ret.model_dump(mode="python"))
                else:
                    for creator in creators_ret.data:
                        ret = await get_creator_post(
                            service=creator.service,
                            creator_id=creator.id,
                            q=q,
                            o=o
                        )
                        if ret:
                            posts += ret.data
                    return ActionRet(data=posts)
        else:
            return ActionRet(
                code=RetCodeEnum.MissingParameter,
                message=generate_msg(
                    "Missing `id`, `name`, `service` parameter, at least given one of them.",
                    **kwargs
                )
            )

    return await inner(id=id, name=name, service=service, q=q, o=o)

base

__all__ = ['ActionRet'] module-attribute

ActionRet

Bases: BaseRet[_T]

Return data model of action call

Source code in ktoolbox/action/base.py
10
11
12
class ActionRet(BaseRet[_T]):
    """Return data model of action call"""
    pass

fetch

__all__ = ['FetchInterruptError', 'fetch_creator_posts'] module-attribute

FetchInterruptError

Bases: Exception

Exception for interrupt of data fetching

Source code in ktoolbox/action/fetch.py
11
12
13
14
15
16
class FetchInterruptError(Exception):
    """Exception for interrupt of data fetching"""

    def __init__(self, *args, ret: BaseRet = None):
        super().__init__(*args)
        self.ret = ret
ret = ret instance-attribute
__init__(*args, ret=None)
Source code in ktoolbox/action/fetch.py
14
15
16
def __init__(self, *args, ret: BaseRet = None):
    super().__init__(*args)
    self.ret = ret

fetch_creator_posts(service, creator_id, o=0) async

Fetch posts from a creator

Parameters:

Name Type Description Default
service str

The service where the post is located

required
creator_id str

The ID of the creator

required
o int

Result offset, stepping of 50 is enforced

0

Returns:

Type Description
AsyncGenerator[List[Post], Any]

Async generator of several list of posts

Raises:

Type Description
FetchInterruptError

Exception for interrupt of data fetching

Source code in ktoolbox/action/fetch.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
async def fetch_creator_posts(service: str, creator_id: str, o: int = 0) -> AsyncGenerator[List[Post], Any]:
    """
    Fetch posts from a creator

    :param service: The service where the post is located
    :param creator_id: The ID of the creator
    :param o: Result offset, stepping of 50 is enforced
    :return: Async generator of several list of posts
    :raise FetchInterruptError: Exception for interrupt of data fetching
    """
    while True:
        ret = await get_creator_post(service=service, creator_id=creator_id, o=o)
        if ret:
            yield ret.data
            if len(ret.data) < SEARCH_STEP:
                break
            else:
                o += SEARCH_STEP
        else:
            raise FetchInterruptError(ret=ret)

job

__all__ = ['create_job_from_post', 'create_job_from_creator'] module-attribute

create_job_from_creator(service, creator_id, path, *, all_pages=False, offset=0, length=50, save_creator_indices=False, mix_posts=None, start_time, end_time, keywords=None, keywords_exclude=None) async

Create a list of download job from a creator

Parameters:

Name Type Description Default
service str

The service where the post is located

required
creator_id str

The ID of the creator

required
path Path

The path for downloading posts, which needs to be sanitized

required
all_pages bool

Fetch all posts, offset and length will be ignored if enabled

False
offset int

Result offset (or start offset)

0
length Optional[int]

The number of posts to fetch

50
save_creator_indices bool

Record CreatorIndices data.

False
mix_posts bool

Save all files from different posts at same path, save_creator_indices will be ignored if enabled

None
start_time Optional[datetime]

Start time of the time range

required
end_time Optional[datetime]

End time of the time range

required
keywords Optional[Set[str]]

Set of keywords to filter posts by title (case-insensitive)

None
keywords_exclude Optional[Set[str]]

Set of keywords to exclude posts by title (case-insensitive)

None
Source code in ktoolbox/action/job.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
async def create_job_from_creator(
        service: str,
        creator_id: str,
        path: Path,
        *,
        all_pages: bool = False,
        offset: int = 0,
        length: Optional[int] = 50,
        save_creator_indices: bool = False,
        mix_posts: bool = None,
        start_time: Optional[datetime],
        end_time: Optional[datetime],
        keywords: Optional[Set[str]] = None,
        keywords_exclude: Optional[Set[str]] = None
) -> ActionRet[List[Job]]:
    """
    Create a list of download job from a creator

    :param service: The service where the post is located
    :param creator_id: The ID of the creator
    :param path: The path for downloading posts, which needs to be sanitized
    :param all_pages: Fetch all posts, ``offset`` and ``length`` will be ignored if enabled
    :param offset: Result offset (or start offset)
    :param length: The number of posts to fetch
    :param save_creator_indices: Record ``CreatorIndices`` data.
    :param mix_posts: Save all files from different posts at same path, \
     ``save_creator_indices`` will be ignored if enabled
    :param start_time: Start time of the time range
    :param end_time: End time of the time range
    :param keywords: Set of keywords to filter posts by title (case-insensitive)
    :param keywords_exclude: Set of keywords to exclude posts by title (case-insensitive)
    """
    mix_posts = config.job.mix_posts if mix_posts is None else mix_posts

    # Get posts
    logger.info(f"Start fetching posts from creator {creator_id}")
    post_list: List[Post] = []
    start_offset = offset - offset % 50
    if all_pages:
        page_counter = count()
    else:
        page_num = length // 50 + 1
        page_counter = iter(range(page_num))

    try:
        async for part in fetch_creator_posts(service=service, creator_id=creator_id, o=start_offset):
            if next(page_counter, None) is not None:
                post_list += part
            else:
                break
    except FetchInterruptError as e:
        return ActionRet(**e.ret.model_dump(mode="python"))

    if not all_pages:
        post_list = post_list[offset % 50:][:length]
    else:
        post_list = post_list[offset % 50:]

    # Filter posts by publish time
    if start_time or end_time:
        post_list = list(filter_posts_by_date(post_list, start_time, end_time))

    # Filter posts by keywords
    if keywords:
        post_list = list(filter_posts_by_keywords(post_list, keywords))

    # Filter out posts by exclude keywords
    if keywords_exclude:
        post_list = list(filter_posts_by_keywords_exclude(post_list, keywords_exclude))

    logger.info(f"Get {len(post_list)} posts after filtering, start creating jobs")

    # Filter posts and generate ``CreatorIndices``
    if not mix_posts:
        if save_creator_indices:
            # Generate posts_path with year/month grouping if enabled
            posts_path = {}
            for post in post_list:
                grouped_base_path = generate_grouped_post_path(post, path)
                posts_path[post.id] = grouped_base_path / sanitize_filename(post.title)

            indices = CreatorIndices(
                creator_id=creator_id,
                service=service,
                posts={post.id: post for post in post_list},
                posts_path=posts_path
            )
            async with aiofiles.open(
                    path / DataStorageNameEnum.CreatorIndicesData.value,
                    "w",
                    encoding="utf-8"
            ) as f:
                await f.write(indices.model_dump_json(indent=config.json_dump_indent))

    if config.job.include_revisions:
        logger.warning("`job.include_revisions` is enabled and will fetch post revisions, "
                       "which may take time. Disable if not needed.")
    if config.job.extract_content or config.job.extract_external_links or config.job.extract_content_images:
        logger.warning(
            "`job.extract_content` or `job.extract_external_links` or `job.extract_content_images` is enabled "
            "and will fetch post content one by one, which may take time. Disable if not needed.")

    job_list: List[Job] = []
    for post in post_list:
        # Get post path
        if mix_posts:
            post_path = path
        else:
            # Apply year/month grouping if enabled
            grouped_base_path = generate_grouped_post_path(post, path)
            post_path = grouped_base_path / generate_post_path_name(post)

        # Generate jobs for the main post
        try:
            job_list += await create_job_from_post(
                post=post,
                post_path=post_path,
                post_dir=not mix_posts,
                dump_post_data=not mix_posts
            )
        except FetchInterruptError as e:
            return ActionRet(**e.ret.model_dump(mode="python"))

        # If include_revisions is enabled, fetch and download revisions for this post
        if config.job.include_revisions and not mix_posts:
            try:
                revisions_ret = await get_post_revisions_api(
                    service=service,
                    creator_id=creator_id,
                    post_id=post.id
                )
                if revisions_ret and revisions_ret.data:
                    for revision in revisions_ret.data:
                        if revision.revision_id:  # Only process actual revisions
                            revision_path = post_path / config.job.post_structure.revisions / generate_post_path_name(
                                revision)
                            try:
                                revision_jobs = await create_job_from_post(
                                    post=revision,
                                    post_path=revision_path,
                                    dump_post_data=True
                                )
                            except FetchInterruptError as e:
                                return ActionRet(**e.ret.model_dump(mode="python"))
                            job_list += revision_jobs
            except Exception as e:
                logger.warning(f"Failed to fetch revisions for post {post.id}: {e}")

    return ActionRet(data=job_list)

create_job_from_post(post, post_path, *, post_dir=True, dump_post_data=True) async

Create a list of download job from a post data

Parameters:

Name Type Description Default
post Union[Post, Revision]

post data

required
post_path Path

Path of the post directory, which needs to be sanitized

required
post_dir bool

Whether to create post directory

True
dump_post_data bool

Whether to dump post data (post.json) in post directory

True

Raises:

Type Description
FetchInterruptError

If fetching post content fails

Source code in ktoolbox/action/job.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
async def create_job_from_post(
        post: Union[Post, Revision],
        post_path: Path,
        *,
        post_dir: bool = True,
        dump_post_data: bool = True
) -> List[Job]:
    """
    Create a list of download job from a post data

    :param post: post data
    :param post_path: Path of the post directory, which needs to be sanitized
    :param post_dir: Whether to create post directory
    :param dump_post_data: Whether to dump post data (post.json) in post directory
    :raise FetchInterruptError: If fetching post content fails
    """
    post_path.mkdir(parents=True, exist_ok=True)

    # Load ``PostStructureConfiguration``
    if post_dir:
        attachments_path = post_path / config.job.post_structure.attachments  # attachments
        attachments_path.mkdir(exist_ok=True)
        content_path = post_path / config.job.post_structure.content  # content
        content_path.parent.mkdir(exist_ok=True)
        external_links_path = post_path / config.job.post_structure.external_links  # external_links
        external_links_path.parent.mkdir(exist_ok=True)
    else:
        attachments_path = post_path
        content_path = None
        external_links_path = None

    if dump_post_data:
        async with aiofiles.open(str(post_path / DataStorageNameEnum.PostData.value), "w", encoding="utf-8") as f:
            await f.write(
                post.model_dump_json(indent=config.json_dump_indent)
            )

    # Filter and create jobs for ``Post.attachment``
    jobs: List[Job] = []
    sequential_counter = 1  # Counter for sequential filenames
    if config.job.download_attachments:
        for i, attachment in enumerate(post.attachments):  # type: int, Attachment
            if not attachment.path:
                continue
            file_path_obj = Path(attachment.name) if is_valid_filename(attachment.name) else Path(
                urlparse(attachment.path).path
            )
            if (not config.job.allow_list or any(
                    map(
                        lambda x: fnmatch(file_path_obj.name, x),
                        config.job.allow_list
                    )
            )) and not any(
                map(
                    lambda x: fnmatch(file_path_obj.name, x),
                    config.job.block_list
                )
            ):
                # Check if file extension should be excluded from sequential naming
                should_use_sequential = (config.job.sequential_filename and
                                         file_path_obj.suffix.lower() not in config.job.sequential_filename_excludes)
                if should_use_sequential:
                    basic_filename = f"{sequential_counter}{file_path_obj.suffix}"
                    sequential_counter += 1
                else:
                    basic_filename = file_path_obj.name
                alt_filename = generate_filename(post, basic_filename, config.job.filename_format)
                jobs.append(
                    Job(
                        path=attachments_path,
                        alt_filename=alt_filename,
                        server_path=attachment.path,
                        type=PostFileTypeEnum.Attachment,
                        post=post
                    )
                )

    # Filter and create jobs for ``Post.file``
    if config.job.download_file and post.file and post.file.path:
        post_file_name = Path(post.file.name) if is_valid_filename(post.file.name) else Path(
            urlparse(post.file.path).path
        )
        post_file_name = Path(generate_filename(post, post_file_name.name, config.job.post_structure.file))
        if (not config.job.allow_list or any(
                map(
                    lambda x: fnmatch(post_file_name.name, x),
                    config.job.allow_list
                )
        )) and not any(
            map(
                lambda x: fnmatch(post_file_name.name, x),
                config.job.block_list
            )
        ):
            jobs.append(
                Job(
                    path=post_path,
                    alt_filename=post_file_name.name,
                    server_path=post.file.path,
                    type=PostFileTypeEnum.File,
                    post=post
                )
            )
    # ``post.substring`` is used to determine if the post has content, but it's only partial
    if (post.content or post.substring) and post_dir and (
            config.job.extract_content or config.job.extract_external_links or config.job.extract_content_images
    ):
        # If post has no content, fetch it from get_post API
        if not post.content:
            get_post_ret = await get_post_api(
                service=post.service,
                creator_id=post.user,
                post_id=post.id,
                revision_id=post.revision_id if isinstance(post, Revision) else None
            )
            if get_post_ret:
                post = get_post_ret.data.post
            else:
                logger.error(
                    generate_msg(
                        "Failed to fetch post content",
                        post_name=post.title or "Unknown",
                        post_id=post.id,
                        creator_id=post.user,
                        service=post.service
                    )
                )
                raise FetchInterruptError(ret=get_post_ret)

        # If post content is still empty, skip content extraction
        if post.content:
            # Write content file
            if config.job.extract_content:
                async with aiofiles.open(content_path, "w", encoding=config.downloader.encoding) as f:
                    await f.write(post.content)

            # Extract and write external links file
            if config.job.extract_external_links:
                external_links = extract_external_links(post.content, config.job.external_link_patterns)
                if external_links:
                    async with aiofiles.open(external_links_path, "w", encoding=config.downloader.encoding) as f:
                        # Write each link on a separate line
                        for link in sorted(external_links):
                            await f.write(f"{link}\n")

            # Extract content images
            if config.job.extract_content_images:
                content_image_sources = extract_content_images(post.content)
                for image_src in content_image_sources:
                    if not image_src or not image_src.strip():
                        continue

                    # Handle relative paths by making them absolute
                    # noinspection HttpUrlsUsage
                    if image_src.startswith('/') and not image_src.startswith('//'):
                        # Relative path - construct full URL
                        image_path = image_src
                    elif image_src.startswith('http://') or image_src.startswith('https://'):
                        # Absolute URL - extract path
                        image_path = urlparse(image_src).path
                    else:
                        # Skip data URLs, protocol-relative URLs, or other non-path sources
                        continue

                    if not image_path or not image_path.strip():
                        continue

                    # Generate filename from the image path
                    image_file_path = Path(image_path)

                    # Apply "allow/block list" filtering first (before incrementing counter)
                    if config.job.sequential_filename:
                        basic_filename = f"{sequential_counter + 1}{image_file_path.suffix}"
                    else:
                        basic_filename = image_file_path.name

                    alt_filename = generate_filename(post, basic_filename, config.job.filename_format)

                    if (not config.job.allow_list or any(
                            map(
                                lambda x: fnmatch(alt_filename, x),
                                config.job.allow_list
                            )
                    )) and not any(
                        map(
                            lambda x: fnmatch(alt_filename, x),
                            config.job.block_list
                        )
                    ):
                        # Regenerate filename with correct counter
                        should_use_sequential = (config.job.sequential_filename and
                                                 image_file_path.suffix.lower() not in config.job.sequential_filename_excludes)
                        if should_use_sequential:
                            basic_filename = f"{sequential_counter}{image_file_path.suffix}"
                            alt_filename = generate_filename(post, basic_filename, config.job.filename_format)
                            sequential_counter += 1

                        jobs.append(
                            Job(
                                path=attachments_path,
                                alt_filename=alt_filename,
                                server_path=image_path,
                                type=PostFileTypeEnum.Attachment
                            )
                        )

    return jobs

search

__all__ = ['search_creator', 'search_creator_post'] module-attribute

search_creator(id=None, name=None, service=None) async

Search creator with multiple keywords support.

Parameters:

Name Type Description Default
id str

The ID of the creator

None
name str

The name of the creator

None
service str

The service for the creator

None
Source code in ktoolbox/action/search.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
async def search_creator(id: str = None, name: str = None, service: str = None) -> BaseRet[Iterator[Creator]]:
    """
    Search creator with multiple keywords support.

    :param id: The ID of the creator
    :param name: The name of the creator
    :param service: The service for the creator
    """

    def filter_func(creator: Creator):
        """Filter creators with attributes"""
        if id is not None and creator.id != id:
            return False
        if name is not None and name not in creator.name:
            return False
        if service is not None and creator.service != service:
            return False
        return True

    ret = await get_creators()
    if not ret:
        base_ret = BaseRet.model_validate(ret.model_dump())
        base_ret.data = iter([])
        return base_ret
    creators = ret.data
    return ActionRet(data=iter(filter(filter_func, creators)))

search_creator_post(id=None, name=None, service=None, q=None, o=None) async

Search posts from creator with multiple keywords support.

Parameters:

Name Type Description Default
id str

The ID of the creator

None
name str

The name of the creator

None
service str

The service for the creator

None
q str

Search query

None
o str

Result offset, stepping of 50 is enforced

None
Source code in ktoolbox/action/search.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
async def search_creator_post(
        id: str = None,
        name: str = None,
        service: str = None,
        q: str = None,
        o: str = None
) -> BaseRet[List[Post]]:
    """
    Search posts from creator with multiple keywords support.

    :param id: The ID of the creator
    :param name: The name of the creator
    :param service: The service for the creator
    :param q: Search query
    :param o: Result offset, stepping of 50 is enforced
    """

    async def inner(**kwargs):
        posts: List[Post] = []
        if any([id, name, service]):
            if id is not None and service:  # ``get_creator_post`` required
                ret = await get_creator_post(
                    service=service,
                    creator_id=id,
                    q=q,
                    o=o
                )
                return ActionRet(data=ret.data) if ret else ret
            else:  # else need to get ``id`` and ``service``
                creators_ret = await search_creator(id=id, name=name, service=service)
                if not creators_ret:
                    return ActionRet(**creators_ret.model_dump(mode="python"))
                else:
                    for creator in creators_ret.data:
                        ret = await get_creator_post(
                            service=creator.service,
                            creator_id=creator.id,
                            q=q,
                            o=o
                        )
                        if ret:
                            posts += ret.data
                    return ActionRet(data=posts)
        else:
            return ActionRet(
                code=RetCodeEnum.MissingParameter,
                message=generate_msg(
                    "Missing `id`, `name`, `service` parameter, at least given one of them.",
                    **kwargs
                )
            )

    return await inner(id=id, name=name, service=service, q=q, o=o)

utils

TIME_FORMAT = '%Y-%m-%d' module-attribute

__all__ = ['generate_post_path_name', 'generate_filename', 'generate_year_dirname', 'generate_month_dirname', 'generate_grouped_post_path', 'filter_posts_by_date', 'filter_posts_by_indices', 'match_post_keywords', 'filter_posts_by_keywords', 'filter_posts_by_keywords_exclude', 'extract_content_images'] module-attribute

extract_content_images(content)

Extract image sources from HTML content

Parameters:

Name Type Description Default
content str

HTML content string

required

Returns:

Type Description
List[str]

List of image source URLs/paths

Source code in ktoolbox/action/utils.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
def extract_content_images(content: str) -> List[str]:
    """
    Extract image sources from HTML content

    :param content: HTML content string
    :return: List of image source URLs/paths
    """
    if not content:
        return []

    parser = _ContentImageParser()
    try:
        parser.feed(content)
    except Exception as e:
        logger.warning(f"Failed to parse HTML content for images: {e}")
        return []

    return parser.image_sources

filter_posts_by_date(post_list, start_date, end_date)

Filter posts by publish date range

Parameters:

Name Type Description Default
post_list List[Post]

List of posts

required
start_date Optional[datetime]

Start time of the time range

required
end_date Optional[datetime]

End time of the time range

required
Source code in ktoolbox/action/utils.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def filter_posts_by_date(
        post_list: List[Post],
        start_date: Optional[datetime],
        end_date: Optional[datetime]
) -> Generator[Post, Any, Any]:
    """
    Filter posts by publish date range

    :param post_list: List of posts
    :param start_date: Start time of the time range
    :param end_date: End time of the time range
    """
    post_filter = filter(lambda x: _match_post_date(x, start_date, end_date), post_list)
    yield from post_filter

filter_posts_by_indices(posts, indices)

Compare and filter posts by CreatorIndices data

Only keep posts that was edited after last download.

Parameters:

Name Type Description Default
posts List[Post]

Posts to filter

required
indices CreatorIndices

CreatorIndices data to use

required

Returns:

Type Description
Tuple[List[Post], CreatorIndices]

A updated List[Post] and updated new CreatorIndices instance

Source code in ktoolbox/action/utils.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def filter_posts_by_indices(posts: List[Post], indices: CreatorIndices) -> Tuple[List[Post], CreatorIndices]:
    """
    Compare and filter posts by ``CreatorIndices`` data

    Only keep posts that was edited after last download.

    :param posts: Posts to filter
    :param indices: ``CreatorIndices`` data to use
    :return: A updated ``List[Post]`` and updated **new** ``CreatorIndices`` instance
    """
    new_list = list(
        filter(
            lambda x: x.id not in indices.posts or x.edited > indices.posts[x.id].edited, posts
        )
    )
    new_indices = indices.model_copy(deep=True)
    for post in new_list:
        new_indices.posts[post.id] = post
    return new_list, new_indices

filter_posts_by_keywords(post_list, keywords)

Filter posts by keywords in title

Parameters:

Name Type Description Default
post_list List[Post]

List of posts

required
keywords Optional[Set[str]]

Set of keywords to search for (case-insensitive), None means no filtering

required
Source code in ktoolbox/action/utils.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def filter_posts_by_keywords(
        post_list: List[Post],
        keywords: Optional[Set[str]]
) -> Generator[Post, Any, Any]:
    """
    Filter posts by keywords in title

    :param post_list: List of posts
    :param keywords: Set of keywords to search for (case-insensitive), None means no filtering
    """
    if not keywords:
        yield from post_list
        return

    post_filter = filter(lambda x: match_post_keywords(x, keywords), post_list)
    yield from post_filter

filter_posts_by_keywords_exclude(post_list, keywords_exclude)

Filter out posts that contain any of the specified keywords in title

Parameters:

Name Type Description Default
post_list List[Post]

List of posts

required
keywords_exclude Optional[Set[str]]

Set of keywords to exclude (case-insensitive), None means no filtering

required
Source code in ktoolbox/action/utils.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
def filter_posts_by_keywords_exclude(
        post_list: List[Post],
        keywords_exclude: Optional[Set[str]]
) -> Generator[Post, Any, Any]:
    """
    Filter out posts that contain any of the specified keywords in title

    :param post_list: List of posts
    :param keywords_exclude: Set of keywords to exclude (case-insensitive), None means no filtering
    """
    if not keywords_exclude:
        yield from post_list
        return

    # Exclude posts that match any of the exclude keywords
    post_filter = filter(lambda x: not match_post_keywords(x, keywords_exclude), post_list)
    yield from post_filter

generate_filename(post, basic_name, filename_format)

Generate download filename

Source code in ktoolbox/action/utils.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def generate_filename(post: Post, basic_name: str, filename_format: str) -> str:
    """Generate download filename"""
    basic_name_path = Path(basic_name)
    basic_name_filename = basic_name.replace(basic_name_path.suffix, "")
    try:
        return sanitize_filename(
            filename_format.format(
                basic_name_filename,
                id=post.id,
                user=post.user,
                service=post.service,
                title=post.title,
                added=post.added.strftime(TIME_FORMAT) if post.added else "",
                published=post.published.strftime(TIME_FORMAT) if post.published else "",
                edited=post.edited.strftime(TIME_FORMAT) if post.edited else ""
            ) + basic_name_path.suffix
        )
    except KeyError as e:
        logger.error(
            f"`JobConfiguration.filename_format` or `PostStructureConfiguration.file` contains invalid key: {e}")
        exit(1)

generate_grouped_post_path(post, base_path)

Generate the full path for a post considering year/month grouping.

Parameters:

Name Type Description Default
post Post

Post object

required
base_path Path

Base path (usually creator directory)

required

Returns:

Type Description
Path

Full path where the post should be saved

Source code in ktoolbox/action/utils.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def generate_grouped_post_path(post: Post, base_path: Path) -> Path:
    """
    Generate the full path for a post considering year/month grouping.

    :param post: Post object
    :param base_path: Base path (usually creator directory)
    :return: Full path where the post should be saved
    """
    result_path = base_path

    if config.job.group_by_year:
        year_dirname = generate_year_dirname(post)
        result_path = result_path / year_dirname

        if config.job.group_by_month:
            month_dirname = generate_month_dirname(post)
            result_path = result_path / month_dirname

    return result_path

generate_month_dirname(post)

Generate month directory name for post grouping.

Source code in ktoolbox/action/utils.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def generate_month_dirname(post: Post) -> str:
    """Generate month directory name for post grouping."""
    # Use published date, fall back to added date
    post_date = post.published or post.added
    if not post_date:
        return "unknown"

    try:
        return sanitize_filename(
            config.job.month_dirname_format.format(
                year=post_date.year,
                month=post_date.month
            )
        )
    except KeyError as e:
        logger.error(f"`JobConfiguration.month_dirname_format` contains invalid key: {e}")
        exit(1)

generate_post_path_name(post)

Generate directory name for post to save.

Source code in ktoolbox/action/utils.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def generate_post_path_name(post: Post) -> str:
    """Generate directory name for post to save."""
    if not post.title:
        return post.id
    else:
        try:
            return sanitize_filename(
                config.job.post_dirname_format.format(
                    id=post.id,
                    user=post.user,
                    service=post.service,
                    title=post.title,
                    added=post.added.strftime(TIME_FORMAT) if post.added else "",
                    published=post.published.strftime(TIME_FORMAT) if post.published else "",
                    edited=post.edited.strftime(TIME_FORMAT) if post.edited else ""
                )
            )
        except KeyError as e:
            logger.error(f"`JobConfiguration.post_dirname_format` contains invalid key: {e}")
            exit(1)

generate_year_dirname(post)

Generate year directory name for post grouping.

Source code in ktoolbox/action/utils.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def generate_year_dirname(post: Post) -> str:
    """Generate year directory name for post grouping."""
    # Use published date, fall back to added date
    post_date = post.published or post.added
    if not post_date:
        return "unknown"

    try:
        return sanitize_filename(
            config.job.year_dirname_format.format(
                year=post_date.year
            )
        )
    except KeyError as e:
        logger.error(f"`JobConfiguration.year_dirname_format` contains invalid key: {e}")
        exit(1)

match_post_keywords(post, keywords)

Check if the post contains any of the specified keywords.

Parameters:

Name Type Description Default
post Post

Target post object

required
keywords Set[str]

Set of keywords to search for (case-insensitive)

required

Returns:

Type Description
bool

Whether the post contains any of the keywords in title

Source code in ktoolbox/action/utils.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def match_post_keywords(post: Post, keywords: Set[str]) -> bool:
    """
    Check if the post contains any of the specified keywords.

    :param post: Target post object
    :param keywords: Set of keywords to search for (case-insensitive)
    :return: Whether the post contains any of the keywords in title
    """
    if not keywords:
        return True

    # Only search in post title
    searchable_text = ""
    if post.title:
        searchable_text = post.title.lower()

    # Check if any keyword is found in the title
    return any(keyword.lower() in searchable_text for keyword in keywords)

api

  • Kemono API version: 1.0.0

  • current App commit hash: 7ee4a7b18ee92a442c13950c05dc8236cfb14a60

APIRet

Bases: BaseRet[_T]

Return data model of API call

Source code in ktoolbox/api/base.py
62
63
64
class APIRet(BaseRet[_T]):
    """Return data model of API call"""
    pass

APITenacityStop

Bases: stop_base

APIs Stop strategies

Source code in ktoolbox/api/base.py
21
22
23
24
25
26
27
28
class APITenacityStop(stop_base):
    """APIs Stop strategies"""

    def __call__(self, retry_state: RetryCallState) -> bool:
        if config.api.retry_times is None:
            return stop_never(retry_state)
        else:
            return stop_after_attempt(config.api.retry_times)(retry_state)

__call__(retry_state)

Source code in ktoolbox/api/base.py
24
25
26
27
28
def __call__(self, retry_state: RetryCallState) -> bool:
    if config.api.retry_times is None:
        return stop_never(retry_state)
    else:
        return stop_after_attempt(config.api.retry_times)(retry_state)

BaseAPI

Bases: ABC, Generic[_T]

Source code in ktoolbox/api/base.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
class BaseAPI(ABC, Generic[_T]):
    path: str = "/"
    method: Literal["get", "post"]
    extra_validator: Optional[Callable[[str], BaseModel]] = None
    client = httpx.AsyncClient(
        verify=config.ssl_verify,
        headers={"Accept": "text/css"},
        cookies={"session": config.api.session_key} if config.api.session_key else None
    )

    Response = BaseModel
    """API response model"""

    @classmethod
    def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
        """Handle API response"""
        try:
            if cls.extra_validator:
                res_model = cls.extra_validator(res.text)
            else:
                res_model = cls.Response.model_validate_json(res.text)
        except (ValueError, ValidationError) as e:
            return APIRet(
                code=RetCodeEnum.JsonDecodeError if isinstance(e, ValueError) else RetCodeEnum.ValidationError,
                message=generate_msg(url=res.url, status_code=res.status_code, response=res.text),
                exception=e
            )
        else:
            data = res_model.root if isinstance(res_model, RootModel) else res_model
            return APIRet(data=data)

    @classmethod
    @_retry
    async def request(cls, path: str = None, **kwargs) -> APIRet[_T]:
        """
        Make a request to the API
        :param path: Fully initialed URL path
        :param kwargs: Keyword arguments of ``httpx._client.AsyncClient.request``
        """
        if path is None:
            path = cls.path
        url_parts = [config.api.scheme, config.api.netloc, f"{config.api.path}{path}", '', '', '']
        url = str(urlunparse(url_parts))
        try:
            res = await cls.client.request(
                method=cls.method,
                url=url,
                timeout=config.api.timeout,
                follow_redirects=True,
                **kwargs
            )
        except Exception as e:
            return APIRet(
                code=RetCodeEnum.NetWorkError,
                message=generate_msg(url=url),
                exception=e
            )
        else:
            return cls.handle_res(res)

    @classmethod
    @abstractmethod
    async def __call__(cls, *args, **kwargs) -> APIRet[Response]:
        """Function to call API"""
        ...

Response = BaseModel class-attribute instance-attribute

API response model

client = httpx.AsyncClient(verify=config.ssl_verify, headers={'Accept': 'text/css'}, cookies={'session': config.api.session_key} if config.api.session_key else None) class-attribute instance-attribute

extra_validator: Optional[Callable[[str], BaseModel]] = None class-attribute instance-attribute

method: Literal['get', 'post'] instance-attribute

path: str = '/' class-attribute instance-attribute

__call__(*args, **kwargs) abstractmethod async classmethod

Function to call API

Source code in ktoolbox/api/base.py
127
128
129
130
131
@classmethod
@abstractmethod
async def __call__(cls, *args, **kwargs) -> APIRet[Response]:
    """Function to call API"""
    ...

handle_res(res) classmethod

Handle API response

Source code in ktoolbox/api/base.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@classmethod
def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
    """Handle API response"""
    try:
        if cls.extra_validator:
            res_model = cls.extra_validator(res.text)
        else:
            res_model = cls.Response.model_validate_json(res.text)
    except (ValueError, ValidationError) as e:
        return APIRet(
            code=RetCodeEnum.JsonDecodeError if isinstance(e, ValueError) else RetCodeEnum.ValidationError,
            message=generate_msg(url=res.url, status_code=res.status_code, response=res.text),
            exception=e
        )
    else:
        data = res_model.root if isinstance(res_model, RootModel) else res_model
        return APIRet(data=data)

request(path=None, **kwargs) async classmethod

Make a request to the API

Parameters:

Name Type Description Default
path str

Fully initialed URL path

None
kwargs

Keyword arguments of httpx._client.AsyncClient.request

{}
Source code in ktoolbox/api/base.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@classmethod
@_retry
async def request(cls, path: str = None, **kwargs) -> APIRet[_T]:
    """
    Make a request to the API
    :param path: Fully initialed URL path
    :param kwargs: Keyword arguments of ``httpx._client.AsyncClient.request``
    """
    if path is None:
        path = cls.path
    url_parts = [config.api.scheme, config.api.netloc, f"{config.api.path}{path}", '', '', '']
    url = str(urlunparse(url_parts))
    try:
        res = await cls.client.request(
            method=cls.method,
            url=url,
            timeout=config.api.timeout,
            follow_redirects=True,
            **kwargs
        )
    except Exception as e:
        return APIRet(
            code=RetCodeEnum.NetWorkError,
            message=generate_msg(url=url),
            exception=e
        )
    else:
        return cls.handle_res(res)

base

__all__ = ['APITenacityStop', 'APIRet', 'BaseAPI'] module-attribute

APIRet

Bases: BaseRet[_T]

Return data model of API call

Source code in ktoolbox/api/base.py
62
63
64
class APIRet(BaseRet[_T]):
    """Return data model of API call"""
    pass

APITenacityStop

Bases: stop_base

APIs Stop strategies

Source code in ktoolbox/api/base.py
21
22
23
24
25
26
27
28
class APITenacityStop(stop_base):
    """APIs Stop strategies"""

    def __call__(self, retry_state: RetryCallState) -> bool:
        if config.api.retry_times is None:
            return stop_never(retry_state)
        else:
            return stop_after_attempt(config.api.retry_times)(retry_state)
__call__(retry_state)
Source code in ktoolbox/api/base.py
24
25
26
27
28
def __call__(self, retry_state: RetryCallState) -> bool:
    if config.api.retry_times is None:
        return stop_never(retry_state)
    else:
        return stop_after_attempt(config.api.retry_times)(retry_state)

BaseAPI

Bases: ABC, Generic[_T]

Source code in ktoolbox/api/base.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
class BaseAPI(ABC, Generic[_T]):
    path: str = "/"
    method: Literal["get", "post"]
    extra_validator: Optional[Callable[[str], BaseModel]] = None
    client = httpx.AsyncClient(
        verify=config.ssl_verify,
        headers={"Accept": "text/css"},
        cookies={"session": config.api.session_key} if config.api.session_key else None
    )

    Response = BaseModel
    """API response model"""

    @classmethod
    def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
        """Handle API response"""
        try:
            if cls.extra_validator:
                res_model = cls.extra_validator(res.text)
            else:
                res_model = cls.Response.model_validate_json(res.text)
        except (ValueError, ValidationError) as e:
            return APIRet(
                code=RetCodeEnum.JsonDecodeError if isinstance(e, ValueError) else RetCodeEnum.ValidationError,
                message=generate_msg(url=res.url, status_code=res.status_code, response=res.text),
                exception=e
            )
        else:
            data = res_model.root if isinstance(res_model, RootModel) else res_model
            return APIRet(data=data)

    @classmethod
    @_retry
    async def request(cls, path: str = None, **kwargs) -> APIRet[_T]:
        """
        Make a request to the API
        :param path: Fully initialed URL path
        :param kwargs: Keyword arguments of ``httpx._client.AsyncClient.request``
        """
        if path is None:
            path = cls.path
        url_parts = [config.api.scheme, config.api.netloc, f"{config.api.path}{path}", '', '', '']
        url = str(urlunparse(url_parts))
        try:
            res = await cls.client.request(
                method=cls.method,
                url=url,
                timeout=config.api.timeout,
                follow_redirects=True,
                **kwargs
            )
        except Exception as e:
            return APIRet(
                code=RetCodeEnum.NetWorkError,
                message=generate_msg(url=url),
                exception=e
            )
        else:
            return cls.handle_res(res)

    @classmethod
    @abstractmethod
    async def __call__(cls, *args, **kwargs) -> APIRet[Response]:
        """Function to call API"""
        ...
Response = BaseModel class-attribute instance-attribute

API response model

client = httpx.AsyncClient(verify=config.ssl_verify, headers={'Accept': 'text/css'}, cookies={'session': config.api.session_key} if config.api.session_key else None) class-attribute instance-attribute
extra_validator: Optional[Callable[[str], BaseModel]] = None class-attribute instance-attribute
method: Literal['get', 'post'] instance-attribute
path: str = '/' class-attribute instance-attribute
__call__(*args, **kwargs) abstractmethod async classmethod

Function to call API

Source code in ktoolbox/api/base.py
127
128
129
130
131
@classmethod
@abstractmethod
async def __call__(cls, *args, **kwargs) -> APIRet[Response]:
    """Function to call API"""
    ...
handle_res(res) classmethod

Handle API response

Source code in ktoolbox/api/base.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@classmethod
def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
    """Handle API response"""
    try:
        if cls.extra_validator:
            res_model = cls.extra_validator(res.text)
        else:
            res_model = cls.Response.model_validate_json(res.text)
    except (ValueError, ValidationError) as e:
        return APIRet(
            code=RetCodeEnum.JsonDecodeError if isinstance(e, ValueError) else RetCodeEnum.ValidationError,
            message=generate_msg(url=res.url, status_code=res.status_code, response=res.text),
            exception=e
        )
    else:
        data = res_model.root if isinstance(res_model, RootModel) else res_model
        return APIRet(data=data)
request(path=None, **kwargs) async classmethod

Make a request to the API

Parameters:

Name Type Description Default
path str

Fully initialed URL path

None
kwargs

Keyword arguments of httpx._client.AsyncClient.request

{}
Source code in ktoolbox/api/base.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@classmethod
@_retry
async def request(cls, path: str = None, **kwargs) -> APIRet[_T]:
    """
    Make a request to the API
    :param path: Fully initialed URL path
    :param kwargs: Keyword arguments of ``httpx._client.AsyncClient.request``
    """
    if path is None:
        path = cls.path
    url_parts = [config.api.scheme, config.api.netloc, f"{config.api.path}{path}", '', '', '']
    url = str(urlunparse(url_parts))
    try:
        res = await cls.client.request(
            method=cls.method,
            url=url,
            timeout=config.api.timeout,
            follow_redirects=True,
            **kwargs
        )
    except Exception as e:
        return APIRet(
            code=RetCodeEnum.NetWorkError,
            message=generate_msg(url=url),
            exception=e
        )
    else:
        return cls.handle_res(res)

misc

get_app_version = GetAppVersion.__call__ module-attribute

Show current App commit hash

GetAppVersion

Bases: BaseAPI

Source code in ktoolbox/api/misc/get_app_version.py
 8
 9
10
11
12
13
14
15
16
17
18
19
class GetAppVersion(BaseAPI):
    path = "/app_version"
    method = "get"

    class Response(RootModel[str]):
        root: str

    extra_validator = Response.model_validate_strings

    @classmethod
    async def __call__(cls) -> APIRet[str]:
        return await cls.request()
extra_validator = Response.model_validate_strings class-attribute instance-attribute
method = 'get' class-attribute instance-attribute
path = '/app_version' class-attribute instance-attribute
Response

Bases: RootModel[str]

Source code in ktoolbox/api/misc/get_app_version.py
12
13
class Response(RootModel[str]):
    root: str
root: str instance-attribute
__call__() async classmethod
Source code in ktoolbox/api/misc/get_app_version.py
17
18
19
@classmethod
async def __call__(cls) -> APIRet[str]:
    return await cls.request()

model

Announcement

Bases: BaseModel

Source code in ktoolbox/api/model/announcement.py
 9
10
11
12
13
14
15
16
17
class Announcement(BaseModel):
    service: Optional[str] = None
    user_id: Optional[str] = None
    hash: Optional[str] = None
    """sha256"""
    content: Optional[str] = None
    added: Optional[datetime] = None
    # noinspection SpellCheckingInspection
    """isoformat UTC"""
added: Optional[datetime] = None class-attribute instance-attribute

isoformat UTC

content: Optional[str] = None class-attribute instance-attribute
hash: Optional[str] = None class-attribute instance-attribute

sha256

service: Optional[str] = None class-attribute instance-attribute
user_id: Optional[str] = None class-attribute instance-attribute

Attachment

Bases: BaseModel

Source code in ktoolbox/api/model/post.py
16
17
18
class Attachment(BaseModel):
    name: Optional[str] = None
    path: Optional[str] = None
name: Optional[str] = None class-attribute instance-attribute
path: Optional[str] = None class-attribute instance-attribute

Creator

Bases: BaseModel

Source code in ktoolbox/api/model/creator.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
class Creator(BaseModel):
    # noinspection SpellCheckingInspection
    favorited: int
    # noinspection SpellCheckingInspection
    """The number of times this creator has been favorited"""
    id: str
    """The ID of the creator"""
    indexed: datetime
    """Timestamp when the creator was indexed, Unix time as integer"""
    name: str
    """The name of the creator"""
    service: str
    """The service for the creator"""
    updated: datetime
    """Timestamp when the creator was last updated, Unix time as integer"""
favorited: int instance-attribute

The number of times this creator has been favorited

id: str instance-attribute

The ID of the creator

indexed: datetime instance-attribute

Timestamp when the creator was indexed, Unix time as integer

name: str instance-attribute

The name of the creator

service: str instance-attribute

The service for the creator

updated: datetime instance-attribute

Timestamp when the creator was last updated, Unix time as integer

File

Bases: BaseModel

Source code in ktoolbox/api/model/post.py
11
12
13
class File(BaseModel):
    name: Optional[str] = None
    path: Optional[str] = None
name: Optional[str] = None class-attribute instance-attribute
path: Optional[str] = None class-attribute instance-attribute

Post

Bases: BaseModel

Source code in ktoolbox/api/model/post.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class Post(BaseModel):
    id: Optional[str] = None
    user: Optional[str] = None
    service: Optional[str] = None
    title: Optional[str] = None
    content: Optional[str] = None
    substring: Optional[str] = None
    embed: Optional[Dict[str, Any]] = None
    shared_file: Optional[bool] = None
    added: Optional[datetime] = None
    published: Optional[datetime] = None
    edited: Optional[datetime] = None
    file: Optional[File] = None
    attachments: Optional[List[Attachment]] = None
added: Optional[datetime] = None class-attribute instance-attribute
attachments: Optional[List[Attachment]] = None class-attribute instance-attribute
content: Optional[str] = None class-attribute instance-attribute
edited: Optional[datetime] = None class-attribute instance-attribute
embed: Optional[Dict[str, Any]] = None class-attribute instance-attribute
file: Optional[File] = None class-attribute instance-attribute
id: Optional[str] = None class-attribute instance-attribute
published: Optional[datetime] = None class-attribute instance-attribute
service: Optional[str] = None class-attribute instance-attribute
shared_file: Optional[bool] = None class-attribute instance-attribute
substring: Optional[str] = None class-attribute instance-attribute
title: Optional[str] = None class-attribute instance-attribute
user: Optional[str] = None class-attribute instance-attribute

Revision

Bases: Post

Revision model that extends Post with revision_id field

Source code in ktoolbox/api/model/post.py
37
38
39
class Revision(Post):
    """Revision model that extends Post with revision_id field"""
    revision_id: Optional[int] = None
revision_id: Optional[int] = None class-attribute instance-attribute

announcement

__all__ = ['Announcement'] module-attribute
Announcement

Bases: BaseModel

Source code in ktoolbox/api/model/announcement.py
 9
10
11
12
13
14
15
16
17
class Announcement(BaseModel):
    service: Optional[str] = None
    user_id: Optional[str] = None
    hash: Optional[str] = None
    """sha256"""
    content: Optional[str] = None
    added: Optional[datetime] = None
    # noinspection SpellCheckingInspection
    """isoformat UTC"""
added: Optional[datetime] = None class-attribute instance-attribute

isoformat UTC

content: Optional[str] = None class-attribute instance-attribute
hash: Optional[str] = None class-attribute instance-attribute

sha256

service: Optional[str] = None class-attribute instance-attribute
user_id: Optional[str] = None class-attribute instance-attribute

creator

__all__ = ['Creator'] module-attribute
Creator

Bases: BaseModel

Source code in ktoolbox/api/model/creator.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
class Creator(BaseModel):
    # noinspection SpellCheckingInspection
    favorited: int
    # noinspection SpellCheckingInspection
    """The number of times this creator has been favorited"""
    id: str
    """The ID of the creator"""
    indexed: datetime
    """Timestamp when the creator was indexed, Unix time as integer"""
    name: str
    """The name of the creator"""
    service: str
    """The service for the creator"""
    updated: datetime
    """Timestamp when the creator was last updated, Unix time as integer"""
favorited: int instance-attribute

The number of times this creator has been favorited

id: str instance-attribute

The ID of the creator

indexed: datetime instance-attribute

Timestamp when the creator was indexed, Unix time as integer

name: str instance-attribute

The name of the creator

service: str instance-attribute

The service for the creator

updated: datetime instance-attribute

Timestamp when the creator was last updated, Unix time as integer

post

__all__ = ['File', 'Attachment', 'Post', 'Revision'] module-attribute
Attachment

Bases: BaseModel

Source code in ktoolbox/api/model/post.py
16
17
18
class Attachment(BaseModel):
    name: Optional[str] = None
    path: Optional[str] = None
name: Optional[str] = None class-attribute instance-attribute
path: Optional[str] = None class-attribute instance-attribute
File

Bases: BaseModel

Source code in ktoolbox/api/model/post.py
11
12
13
class File(BaseModel):
    name: Optional[str] = None
    path: Optional[str] = None
name: Optional[str] = None class-attribute instance-attribute
path: Optional[str] = None class-attribute instance-attribute
Post

Bases: BaseModel

Source code in ktoolbox/api/model/post.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class Post(BaseModel):
    id: Optional[str] = None
    user: Optional[str] = None
    service: Optional[str] = None
    title: Optional[str] = None
    content: Optional[str] = None
    substring: Optional[str] = None
    embed: Optional[Dict[str, Any]] = None
    shared_file: Optional[bool] = None
    added: Optional[datetime] = None
    published: Optional[datetime] = None
    edited: Optional[datetime] = None
    file: Optional[File] = None
    attachments: Optional[List[Attachment]] = None
added: Optional[datetime] = None class-attribute instance-attribute
attachments: Optional[List[Attachment]] = None class-attribute instance-attribute
content: Optional[str] = None class-attribute instance-attribute
edited: Optional[datetime] = None class-attribute instance-attribute
embed: Optional[Dict[str, Any]] = None class-attribute instance-attribute
file: Optional[File] = None class-attribute instance-attribute
id: Optional[str] = None class-attribute instance-attribute
published: Optional[datetime] = None class-attribute instance-attribute
service: Optional[str] = None class-attribute instance-attribute
shared_file: Optional[bool] = None class-attribute instance-attribute
substring: Optional[str] = None class-attribute instance-attribute
title: Optional[str] = None class-attribute instance-attribute
user: Optional[str] = None class-attribute instance-attribute
Revision

Bases: Post

Revision model that extends Post with revision_id field

Source code in ktoolbox/api/model/post.py
37
38
39
class Revision(Post):
    """Revision model that extends Post with revision_id field"""
    revision_id: Optional[int] = None
revision_id: Optional[int] = None class-attribute instance-attribute

posts

get_announcement = GetAnnouncement.__call__ module-attribute

get_creator_post = GetCreatorPost.__call__ module-attribute

get_creators = GetCreators.__call__ module-attribute

get_post = GetPost.__call__ module-attribute

get_post_revisions = GetPostRevisions.__call__ module-attribute

GetAnnouncement

Bases: BaseAPI

Source code in ktoolbox/api/posts/get_announcement.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class GetAnnouncement(BaseAPI):
    path = "/{service}/user/{creator_id}/announcements"
    method = "get"

    class Response(RootModel[List[Announcement]]):
        root: List[Announcement]

    @classmethod
    async def __call__(cls, service: str, creator_id: str) -> APIRet[List[Announcement]]:
        """
        Get creator announcements

        :param service: The service name
        :param creator_id: The creator's ID
        """
        return await cls.request(path=cls.path.format(service=service, creator_id=creator_id))
method = 'get' class-attribute instance-attribute
path = '/{service}/user/{creator_id}/announcements' class-attribute instance-attribute
Response

Bases: RootModel[List[Announcement]]

Source code in ktoolbox/api/posts/get_announcement.py
15
16
class Response(RootModel[List[Announcement]]):
    root: List[Announcement]
root: List[Announcement] instance-attribute
__call__(service, creator_id) async classmethod

Get creator announcements

Parameters:

Name Type Description Default
service str

The service name

required
creator_id str

The creator's ID

required
Source code in ktoolbox/api/posts/get_announcement.py
18
19
20
21
22
23
24
25
26
@classmethod
async def __call__(cls, service: str, creator_id: str) -> APIRet[List[Announcement]]:
    """
    Get creator announcements

    :param service: The service name
    :param creator_id: The creator's ID
    """
    return await cls.request(path=cls.path.format(service=service, creator_id=creator_id))

GetCreatorPost

Bases: BaseAPI

Source code in ktoolbox/api/posts/get_creator_post.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class GetCreatorPost(BaseAPI):
    path = "/{service}/user/{creator_id}/posts"
    method = "get"

    class Response(RootModel[List[Post]]):
        root: List[Post]

    @classmethod
    async def __call__(cls, service: str, creator_id: str, *, q: str = None, o: int = None) -> APIRet[List[Post]]:
        """
        Get a list of creator posts

        :param service: The service where the post is located
        :param creator_id: The ID of the creator
        :param q: Search query
        :param o: Result offset, stepping of 50 is enforced
        """
        return await cls.request(
            path=cls.path.format(service=service, creator_id=creator_id),
            params={
                "q": q,
                "o": o
            }
        )

    @classmethod
    def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
        return APIRet(data=[]) if res.status_code == 404 else super().handle_res(res)
method = 'get' class-attribute instance-attribute
path = '/{service}/user/{creator_id}/posts' class-attribute instance-attribute
Response

Bases: RootModel[List[Post]]

Source code in ktoolbox/api/posts/get_creator_post.py
18
19
class Response(RootModel[List[Post]]):
    root: List[Post]
root: List[Post] instance-attribute
__call__(service, creator_id, *, q=None, o=None) async classmethod

Get a list of creator posts

Parameters:

Name Type Description Default
service str

The service where the post is located

required
creator_id str

The ID of the creator

required
q str

Search query

None
o int

Result offset, stepping of 50 is enforced

None
Source code in ktoolbox/api/posts/get_creator_post.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
@classmethod
async def __call__(cls, service: str, creator_id: str, *, q: str = None, o: int = None) -> APIRet[List[Post]]:
    """
    Get a list of creator posts

    :param service: The service where the post is located
    :param creator_id: The ID of the creator
    :param q: Search query
    :param o: Result offset, stepping of 50 is enforced
    """
    return await cls.request(
        path=cls.path.format(service=service, creator_id=creator_id),
        params={
            "q": q,
            "o": o
        }
    )
handle_res(res) classmethod
Source code in ktoolbox/api/posts/get_creator_post.py
39
40
41
@classmethod
def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
    return APIRet(data=[]) if res.status_code == 404 else super().handle_res(res)

GetCreators

Bases: BaseAPI

List All Creators

Source code in ktoolbox/api/posts/get_creators.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class GetCreators(BaseAPI):
    """List All Creators"""
    path = "/creators"
    method = "get"

    class Response(RootModel[List[Creator]]):
        root: List[Creator]

    @classmethod
    async def __call__(cls) -> APIRet[List[Creator]]:
        """
        List of all creators

        List all creators with details. I blame DDG for .txt.
        """
        return await cls.request()
method = 'get' class-attribute instance-attribute
path = '/creators' class-attribute instance-attribute
Response

Bases: RootModel[List[Creator]]

Source code in ktoolbox/api/posts/get_creators.py
16
17
class Response(RootModel[List[Creator]]):
    root: List[Creator]
root: List[Creator] instance-attribute
__call__() async classmethod

List of all creators

List all creators with details. I blame DDG for .txt.

Source code in ktoolbox/api/posts/get_creators.py
19
20
21
22
23
24
25
26
@classmethod
async def __call__(cls) -> APIRet[List[Creator]]:
    """
    List of all creators

    List all creators with details. I blame DDG for .txt.
    """
    return await cls.request()

GetPost

Bases: BaseAPI

Source code in ktoolbox/api/posts/get_post.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
class GetPost(BaseAPI):
    path = "/{service}/user/{creator_id}/post/{post_id}"
    method = "get"

    class Response(BaseModel):
        post: Post
        props: Optional[PostProps] = None

    @classmethod
    async def __call__(cls, service: str, creator_id: str, post_id: str, revision_id: Optional[str] = None) -> APIRet[Response]:
        """
        Get a specific post or revision

        :param service: The service name
        :param creator_id: The creator's ID
        :param post_id: The post ID
        :param revision_id: The revision ID (optional, for revision posts)
        """
        if revision_id:
            path = f"/{service}/user/{creator_id}/post/{post_id}/revision/{revision_id}"
        else:
            path = cls.path.format(
                service=service,
                creator_id=creator_id,
                post_id=post_id
            )

        return await cls.request(path=path)
method = 'get' class-attribute instance-attribute
path = '/{service}/user/{creator_id}/post/{post_id}' class-attribute instance-attribute
Response

Bases: BaseModel

Source code in ktoolbox/api/posts/get_post.py
20
21
22
class Response(BaseModel):
    post: Post
    props: Optional[PostProps] = None
post: Post instance-attribute
props: Optional[PostProps] = None class-attribute instance-attribute
__call__(service, creator_id, post_id, revision_id=None) async classmethod

Get a specific post or revision

Parameters:

Name Type Description Default
service str

The service name

required
creator_id str

The creator's ID

required
post_id str

The post ID

required
revision_id Optional[str]

The revision ID (optional, for revision posts)

None
Source code in ktoolbox/api/posts/get_post.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
@classmethod
async def __call__(cls, service: str, creator_id: str, post_id: str, revision_id: Optional[str] = None) -> APIRet[Response]:
    """
    Get a specific post or revision

    :param service: The service name
    :param creator_id: The creator's ID
    :param post_id: The post ID
    :param revision_id: The revision ID (optional, for revision posts)
    """
    if revision_id:
        path = f"/{service}/user/{creator_id}/post/{post_id}/revision/{revision_id}"
    else:
        path = cls.path.format(
            service=service,
            creator_id=creator_id,
            post_id=post_id
        )

    return await cls.request(path=path)

GetPostRevisions

Bases: BaseAPI

Source code in ktoolbox/api/posts/get_post_revisions.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
class GetPostRevisions(BaseAPI):
    path = "/{service}/user/{creator_id}/post/{post_id}/revisions"
    method = "get"

    class Response(RootModel):
        root: List[Revision]

    @classmethod
    async def __call__(cls, service: str, creator_id: str, post_id: str) -> APIRet[Response]:
        """
        Get all revisions of a specific post

        :param service: The service name
        :param creator_id: The creator's ID
        :param post_id: The post ID
        """
        path = cls.path.format(
            service=service,
            creator_id=creator_id,
            post_id=post_id
        )

        return await cls.request(path=path)

    @classmethod
    def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
        return APIRet(data=[]) if res.status_code == 404 else super().handle_res(res)
method = 'get' class-attribute instance-attribute
path = '/{service}/user/{creator_id}/post/{post_id}/revisions' class-attribute instance-attribute
Response

Bases: RootModel

Source code in ktoolbox/api/posts/get_post_revisions.py
18
19
class Response(RootModel):
    root: List[Revision]
root: List[Revision] instance-attribute
__call__(service, creator_id, post_id) async classmethod

Get all revisions of a specific post

Parameters:

Name Type Description Default
service str

The service name

required
creator_id str

The creator's ID

required
post_id str

The post ID

required
Source code in ktoolbox/api/posts/get_post_revisions.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
@classmethod
async def __call__(cls, service: str, creator_id: str, post_id: str) -> APIRet[Response]:
    """
    Get all revisions of a specific post

    :param service: The service name
    :param creator_id: The creator's ID
    :param post_id: The post ID
    """
    path = cls.path.format(
        service=service,
        creator_id=creator_id,
        post_id=post_id
    )

    return await cls.request(path=path)
handle_res(res) classmethod
Source code in ktoolbox/api/posts/get_post_revisions.py
38
39
40
@classmethod
def handle_res(cls, res: httpx.Response) -> APIRet[_T]:
    return APIRet(data=[]) if res.status_code == 404 else super().handle_res(res)

utils

SEARCH_STEP = 50 module-attribute

Searching APIs result steps

__all__ = ['SEARCH_STEP', 'get_creator_icon', 'get_creator_banner'] module-attribute

get_creator_banner(creator_id, service)

Get the creator banner for a given creator ID and service.

Returns:

Type Description
str

The banner URL.

Source code in ktoolbox/api/utils.py
21
22
23
24
25
26
27
28
def get_creator_banner(creator_id: str, service: str) -> str:
    """
    Get the creator banner for a given creator ID and service.

    :return: The banner URL.
    """
    url_parts = [config.api.scheme, config.api.statics_netloc, f"/banners/{service}/{creator_id}", '', '', '']
    return str(urlunparse(url_parts))

get_creator_icon(creator_id, service)

Get the creator icon for a given creator ID and service.

Returns:

Type Description
str

The icon URL.

Source code in ktoolbox/api/utils.py
11
12
13
14
15
16
17
18
def get_creator_icon(creator_id: str, service: str) -> str:
    """
    Get the creator icon for a given creator ID and service.

    :return: The icon URL.
    """
    url_parts = [config.api.scheme, config.api.statics_netloc, f"/icons/{service}/{creator_id}", '', '', '']
    return str(urlunparse(url_parts))

cli

__all__ = ['KToolBoxCli'] module-attribute

KToolBoxCli

Source code in ktoolbox/cli.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
class KToolBoxCli:
    _update_checked = False  # Class variable to track if update check was performed

    @classmethod
    async def _ensure_update_check(cls):
        """Ensure update check is performed once per session"""
        if not cls._update_checked:
            try:
                await check_for_updates()
            except Exception:
                # Silently fail update check to not interfere with main functionality
                pass
            finally:
                cls._update_checked = True

    @staticmethod
    async def version():
        """Show KToolBox version"""
        # Always check for updates when version is explicitly requested
        await check_for_updates()
        return __version__

    @staticmethod
    async def site_version():
        # noinspection SpellCheckingInspection
        """Show current Kemono site app commit hash"""
        logger.info(repr(config))
        ret = await get_app_version()
        return ret.data if ret else ret.message

    @staticmethod
    async def config_editor():
        """Launch graphical KToolBox configuration editor"""
        try:
            from ktoolbox.editor import run_config_editor
            run_config_editor()
        except ModuleNotFoundError:
            logger.error(
                "You need to install extra dependencies to use the editor, "
                "run `pip install ktoolbox[urwid]` "
                "or `pipx install ktoolbox[urwid] --force` if you are using pipx"
            )

    @staticmethod
    async def example_env():
        """Generate an example configuration ``.env`` file."""
        print(
            render(
                OutputFormat.DOTENV,
                class_path=("ktoolbox.configuration.Configuration",)
            )
        )

    # noinspection PyShadowingBuiltins
    @staticmethod
    async def search_creator(
            name: str = None,
            id: str = None,
            service: str = None,
            *,
            dump: Path = None
    ):
        """
        Search creator, you can use multiple parameters as keywords.

        :param id: The ID of the creator
        :param name: The name of the creator
        :param service: The service for the creator
        :param dump: Dump the result to a JSON file
        """
        logger.info(repr(config))
        ret = await search_creator_action(id=id, name=name, service=service)
        if ret:
            result_list = list(ret.data)
            if dump:
                await dump_search(result_list, dump)
            return result_list or TextEnum.SearchResultEmpty.value
        else:
            return ret.message

    # noinspection PyShadowingBuiltins
    @staticmethod
    async def search_creator_post(
            id: str = None,
            name: str = None,
            service: str = None,
            q: str = None,
            o: int = None,
            *,
            dump: Path = None
    ):
        """
        Search posts from creator, you can use multiple parameters as keywords.

        :param id: The ID of the creator
        :param name: The name of the creator
        :param service: The service for the creator
        :param q: Search query
        :param o: Result offset, stepping of 50 is enforced
        :param dump: Dump the result to a JSON file
        """
        logger.info(repr(config))
        ret = await search_creator_post_action(id=id, name=name, service=service, q=q, o=o)
        if ret:
            if dump:
                await dump_search(ret.data, dump)
            return ret.data or TextEnum.SearchResultEmpty.value
        else:
            return ret.message

    @staticmethod
    async def get_post(service: str, creator_id: str, post_id: str, revision_id: str = None, *, dump: Path = None):
        """
        Get a specific post or revision

        :param service: The service name
        :param creator_id: The creator's ID
        :param post_id: The post ID
        :param revision_id: The revision ID (optional, for revision posts)
        :param dump: Dump the result to a JSON file
        """
        logger.info(repr(config))
        ret = await get_post_api(
            service=service,
            creator_id=creator_id,
            post_id=post_id,
            revision_id=revision_id
        )
        if ret:
            if dump:
                async with aiofiles.open(str(dump), "w", encoding="utf-8") as f:
                    await f.write(
                        ret.data.post.model_dump_json(indent=config.json_dump_indent)
                    )
            return ret.data.post
        else:
            return ret.message

    @staticmethod
    @overload
    async def download_post(
            url: str,
            path: Union[Path, str] = Path("."),
            *,
            dump_post_data=True
    ):
        ...

    @staticmethod
    @overload
    async def download_post(
            service: str,
            creator_id: str,
            post_id: str,
            revision_id: str = None,
            path: Union[Path, str] = Path("."),
            *,
            dump_post_data=True
    ):
        ...

    @staticmethod
    async def download_post(
            url: str = None,
            service: str = None,
            creator_id: str = None,
            post_id: str = None,
            revision_id: str = None,
            path: Union[Path, str] = Path("."),
            *,
            dump_post_data=True
    ):
        """
        Download a specific post or revision

        :param url: The post URL
        :param service: The service name
        :param creator_id: The creator's ID
        :param post_id: The post ID
        :param revision_id: The revision ID (optional, for revision posts)
        :param path: Download path, default is current directory
        :param dump_post_data: Whether to dump post data (post.json) in post directory
        """
        # Check for updates on first command run
        await KToolBoxCli._ensure_update_check()

        logger.info(repr(config))
        # Get service, creator_id, post_id, revision_id
        if url:
            service, creator_id, post_id, revision_id = parse_webpage_url(url)
        if not all([service, creator_id, post_id]):
            return generate_msg(
                TextEnum.MissingParams.value,
                use_at_lease_one=[
                    ["url"],
                    ["service", "creator_id", "post_id"]
                ])

        path = path if isinstance(path, Path) else Path(path)
        ret = await get_post_api(
            service=service,
            creator_id=creator_id,
            post_id=post_id,
            revision_id=revision_id
        )
        if ret:
            post_path = path / generate_post_path_name(ret.data.post)

            # For revision posts, create a revision subfolder
            if revision_id:
                post_path = post_path / "revision" / revision_id

            # Download the main post
            try:
                job_list = await create_job_from_post(
                    post=ret.data.post,
                    post_path=post_path,
                    dump_post_data=dump_post_data
                )
            except FetchInterruptError:
                return None

            # If include_revisions is enabled and we have revisions data
            if (config.job.include_revisions and
                    ret.data.props and
                    ret.data.props.revisions and
                    not revision_id):  # Don't process revisions if we're already downloading a specific revision

                for revision_order, revision_data in ret.data.props.revisions:
                    if revision_data.revision_id:  # Only process actual revisions, not the main post
                        revision_path = post_path / config.job.post_structure.revisions / generate_post_path_name(
                            revision_data)
                        try:
                            revision_jobs = await create_job_from_post(
                                post=revision_data,
                                post_path=revision_path,
                                dump_post_data=dump_post_data
                            )
                        except FetchInterruptError:
                            return None
                        job_list.extend(revision_jobs)

            job_runner = JobRunner(job_list=job_list)
            await job_runner.start()
            return None
        else:
            return ret.message

    @staticmethod
    @overload
    async def sync_creator(
            url: str,
            path: Union[Path, str] = Path("."),
            *,
            save_creator_indices: bool = False,
            mix_posts: bool = None,
            start_time: str = None,
            end_time: str = None,
            offset: int = 0,
            length: int = None,
            keywords: Tuple[str] = None,
            keywords_exclude: Tuple[str] = None
    ):
        ...

    @staticmethod
    @overload
    async def sync_creator(
            service: str,
            creator_id: str,
            path: Union[Path, str] = Path("."),
            *,
            save_creator_indices: bool = False,
            mix_posts: bool = None,
            start_time: str = None,
            end_time: str = None,
            offset: int = 0,
            length: int = None,
            keywords: Tuple[str] = None,
            keywords_exclude: Tuple[str] = None
    ):
        ...

    @staticmethod
    async def sync_creator(
            url: str = None,
            service: str = None,
            creator_id: str = None,
            path: Union[Path, str] = Path("."),
            *,
            save_creator_indices: bool = False,
            mix_posts: bool = None,
            start_time: str = None,
            end_time: str = None,
            offset: int = 0,
            length: int = None,
            keywords: Tuple[str] = None,
            keywords_exclude: Tuple[str] = None
    ):
        """
        Sync posts from a creator

        You can update the directory anytime after download finished, \
        such as to update after creator published new posts.

        * ``start_time`` & ``end_time`` example: ``2023-12-7``, ``2023-12-07``

        :param url: The post URL
        :param service: The service where the post is located
        :param creator_id: The ID of the creator
        :param path: Download path, default is current directory
        :param save_creator_indices: Record ``CreatorIndices`` data
        :param mix_posts: Save all_pages files from different posts at same path, \
            ``save_creator_indices`` will be ignored if enabled
        :param start_time: Start time of the published time range for posts downloading. \
            Set to ``0`` if ``None`` was given. \
            Time format: ``%Y-%m-%d``
        :param end_time: End time of the published time range for posts downloading. \
            Set to latest time (infinity) if ``None`` was given. \
            Time format: ``%Y-%m-%d``
        :param offset: Result offset (or start offset)
        :param length: The number of posts to fetch, defaults to fetching all posts after ``offset``.
        :param keywords: Comma-separated keywords to filter posts by title (case-insensitive)
        :param keywords_exclude: Comma-separated keywords to exclude posts by title (case-insensitive)
        """
        # Check for updates on first command run
        await KToolBoxCli._ensure_update_check()
        logger.info(repr(config))
        # Get service, creator_id
        if url:
            service, creator_id, _, _ = parse_webpage_url(url)
        if not all([service, creator_id]):
            return generate_msg(
                TextEnum.MissingParams.value,
                use_at_lease_one=[
                    ["url"],
                    ["service", "creator_id"]
                ])

        path = path if isinstance(path, Path) else Path(path)

        # Get creator name
        creator_name = creator_id
        creator_ret = await search_creator_action(id=creator_id, service=service)
        if creator_ret:
            creator = next(creator_ret.data, None)
            if creator:
                creator_name = creator.name
                logger.info(
                    generate_msg(
                        "Got creator information",
                        name=creator.name,
                        id=creator.id
                    )
                )
        else:
            logger.error(
                generate_msg(
                    f"Failed to fetch the name of creator <{creator_id}>",
                    detail=creator_ret.message
                )
            )
            return creator_ret.message

        creator_path = path / sanitize_filename(creator_name)
        creator_path.mkdir(exist_ok=True)

        keywords = [keywords] if isinstance(keywords, str) else keywords
        keyword_set = set(keywords) if keywords else config.job.keywords
        if keywords:
            logger.info(f"Filtering posts by keywords: {', '.join(keyword_set)}")

        keywords_exclude = [keywords_exclude] if isinstance(keywords_exclude, str) else keywords_exclude
        keyword_exclude_set = set(keywords_exclude) if keywords_exclude else config.job.keywords_exclude
        if keywords_exclude:
            logger.info(f"Excluding posts by keywords: {', '.join(keyword_exclude_set)}")

        ret = await create_job_from_creator(
            service=service,
            creator_id=creator_id,
            path=creator_path,
            all_pages=not length,
            offset=offset,
            length=length,
            save_creator_indices=save_creator_indices,
            mix_posts=mix_posts,
            start_time=datetime.strptime(start_time, "%Y-%m-%d") if start_time else None,
            end_time=datetime.strptime(end_time, "%Y-%m-%d") if end_time else None,
            keywords=keyword_set,
            keywords_exclude=keyword_exclude_set
        )
        if ret:
            job_runner = JobRunner(job_list=ret.data)
            await job_runner.start()
            return None
        else:
            return ret.message

config_editor() async staticmethod

Launch graphical KToolBox configuration editor

Source code in ktoolbox/cli.py
53
54
55
56
57
58
59
60
61
62
63
64
@staticmethod
async def config_editor():
    """Launch graphical KToolBox configuration editor"""
    try:
        from ktoolbox.editor import run_config_editor
        run_config_editor()
    except ModuleNotFoundError:
        logger.error(
            "You need to install extra dependencies to use the editor, "
            "run `pip install ktoolbox[urwid]` "
            "or `pipx install ktoolbox[urwid] --force` if you are using pipx"
        )

download_post(url=None, service=None, creator_id=None, post_id=None, revision_id=None, path=Path('.'), *, dump_post_data=True) async staticmethod

Download a specific post or revision

Parameters:

Name Type Description Default
url str

The post URL

None
service str

The service name

None
creator_id str

The creator's ID

None
post_id str

The post ID

None
revision_id str

The revision ID (optional, for revision posts)

None
path Union[Path, str]

Download path, default is current directory

Path('.')
dump_post_data

Whether to dump post data (post.json) in post directory

True
Source code in ktoolbox/cli.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
@staticmethod
async def download_post(
        url: str = None,
        service: str = None,
        creator_id: str = None,
        post_id: str = None,
        revision_id: str = None,
        path: Union[Path, str] = Path("."),
        *,
        dump_post_data=True
):
    """
    Download a specific post or revision

    :param url: The post URL
    :param service: The service name
    :param creator_id: The creator's ID
    :param post_id: The post ID
    :param revision_id: The revision ID (optional, for revision posts)
    :param path: Download path, default is current directory
    :param dump_post_data: Whether to dump post data (post.json) in post directory
    """
    # Check for updates on first command run
    await KToolBoxCli._ensure_update_check()

    logger.info(repr(config))
    # Get service, creator_id, post_id, revision_id
    if url:
        service, creator_id, post_id, revision_id = parse_webpage_url(url)
    if not all([service, creator_id, post_id]):
        return generate_msg(
            TextEnum.MissingParams.value,
            use_at_lease_one=[
                ["url"],
                ["service", "creator_id", "post_id"]
            ])

    path = path if isinstance(path, Path) else Path(path)
    ret = await get_post_api(
        service=service,
        creator_id=creator_id,
        post_id=post_id,
        revision_id=revision_id
    )
    if ret:
        post_path = path / generate_post_path_name(ret.data.post)

        # For revision posts, create a revision subfolder
        if revision_id:
            post_path = post_path / "revision" / revision_id

        # Download the main post
        try:
            job_list = await create_job_from_post(
                post=ret.data.post,
                post_path=post_path,
                dump_post_data=dump_post_data
            )
        except FetchInterruptError:
            return None

        # If include_revisions is enabled and we have revisions data
        if (config.job.include_revisions and
                ret.data.props and
                ret.data.props.revisions and
                not revision_id):  # Don't process revisions if we're already downloading a specific revision

            for revision_order, revision_data in ret.data.props.revisions:
                if revision_data.revision_id:  # Only process actual revisions, not the main post
                    revision_path = post_path / config.job.post_structure.revisions / generate_post_path_name(
                        revision_data)
                    try:
                        revision_jobs = await create_job_from_post(
                            post=revision_data,
                            post_path=revision_path,
                            dump_post_data=dump_post_data
                        )
                    except FetchInterruptError:
                        return None
                    job_list.extend(revision_jobs)

        job_runner = JobRunner(job_list=job_list)
        await job_runner.start()
        return None
    else:
        return ret.message

example_env() async staticmethod

Generate an example configuration .env file.

Source code in ktoolbox/cli.py
66
67
68
69
70
71
72
73
74
@staticmethod
async def example_env():
    """Generate an example configuration ``.env`` file."""
    print(
        render(
            OutputFormat.DOTENV,
            class_path=("ktoolbox.configuration.Configuration",)
        )
    )

get_post(service, creator_id, post_id, revision_id=None, *, dump=None) async staticmethod

Get a specific post or revision

Parameters:

Name Type Description Default
service str

The service name

required
creator_id str

The creator's ID

required
post_id str

The post ID

required
revision_id str

The revision ID (optional, for revision posts)

None
dump Path

Dump the result to a JSON file

None
Source code in ktoolbox/cli.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
@staticmethod
async def get_post(service: str, creator_id: str, post_id: str, revision_id: str = None, *, dump: Path = None):
    """
    Get a specific post or revision

    :param service: The service name
    :param creator_id: The creator's ID
    :param post_id: The post ID
    :param revision_id: The revision ID (optional, for revision posts)
    :param dump: Dump the result to a JSON file
    """
    logger.info(repr(config))
    ret = await get_post_api(
        service=service,
        creator_id=creator_id,
        post_id=post_id,
        revision_id=revision_id
    )
    if ret:
        if dump:
            async with aiofiles.open(str(dump), "w", encoding="utf-8") as f:
                await f.write(
                    ret.data.post.model_dump_json(indent=config.json_dump_indent)
                )
        return ret.data.post
    else:
        return ret.message

search_creator(name=None, id=None, service=None, *, dump=None) async staticmethod

Search creator, you can use multiple parameters as keywords.

Parameters:

Name Type Description Default
id str

The ID of the creator

None
name str

The name of the creator

None
service str

The service for the creator

None
dump Path

Dump the result to a JSON file

None
Source code in ktoolbox/cli.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
@staticmethod
async def search_creator(
        name: str = None,
        id: str = None,
        service: str = None,
        *,
        dump: Path = None
):
    """
    Search creator, you can use multiple parameters as keywords.

    :param id: The ID of the creator
    :param name: The name of the creator
    :param service: The service for the creator
    :param dump: Dump the result to a JSON file
    """
    logger.info(repr(config))
    ret = await search_creator_action(id=id, name=name, service=service)
    if ret:
        result_list = list(ret.data)
        if dump:
            await dump_search(result_list, dump)
        return result_list or TextEnum.SearchResultEmpty.value
    else:
        return ret.message

search_creator_post(id=None, name=None, service=None, q=None, o=None, *, dump=None) async staticmethod

Search posts from creator, you can use multiple parameters as keywords.

Parameters:

Name Type Description Default
id str

The ID of the creator

None
name str

The name of the creator

None
service str

The service for the creator

None
q str

Search query

None
o int

Result offset, stepping of 50 is enforced

None
dump Path

Dump the result to a JSON file

None
Source code in ktoolbox/cli.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
@staticmethod
async def search_creator_post(
        id: str = None,
        name: str = None,
        service: str = None,
        q: str = None,
        o: int = None,
        *,
        dump: Path = None
):
    """
    Search posts from creator, you can use multiple parameters as keywords.

    :param id: The ID of the creator
    :param name: The name of the creator
    :param service: The service for the creator
    :param q: Search query
    :param o: Result offset, stepping of 50 is enforced
    :param dump: Dump the result to a JSON file
    """
    logger.info(repr(config))
    ret = await search_creator_post_action(id=id, name=name, service=service, q=q, o=o)
    if ret:
        if dump:
            await dump_search(ret.data, dump)
        return ret.data or TextEnum.SearchResultEmpty.value
    else:
        return ret.message

site_version() async staticmethod

Show current Kemono site app commit hash

Source code in ktoolbox/cli.py
45
46
47
48
49
50
51
@staticmethod
async def site_version():
    # noinspection SpellCheckingInspection
    """Show current Kemono site app commit hash"""
    logger.info(repr(config))
    ret = await get_app_version()
    return ret.data if ret else ret.message

sync_creator(url=None, service=None, creator_id=None, path=Path('.'), *, save_creator_indices=False, mix_posts=None, start_time=None, end_time=None, offset=0, length=None, keywords=None, keywords_exclude=None) async staticmethod

Sync posts from a creator

You can update the directory anytime after download finished, such as to update after creator published new posts.

  • start_time & end_time example: 2023-12-7, 2023-12-07

Parameters:

Name Type Description Default
url str

The post URL

None
service str

The service where the post is located

None
creator_id str

The ID of the creator

None
path Union[Path, str]

Download path, default is current directory

Path('.')
save_creator_indices bool

Record CreatorIndices data

False
mix_posts bool

Save all_pages files from different posts at same path, save_creator_indices will be ignored if enabled

None
start_time str

Start time of the published time range for posts downloading. Set to 0 if None was given. Time format: %Y-%m-%d

None
end_time str

End time of the published time range for posts downloading. Set to latest time (infinity) if None was given. Time format: %Y-%m-%d

None
offset int

Result offset (or start offset)

0
length int

The number of posts to fetch, defaults to fetching all posts after offset.

None
keywords Tuple[str]

Comma-separated keywords to filter posts by title (case-insensitive)

None
keywords_exclude Tuple[str]

Comma-separated keywords to exclude posts by title (case-insensitive)

None
Source code in ktoolbox/cli.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
@staticmethod
async def sync_creator(
        url: str = None,
        service: str = None,
        creator_id: str = None,
        path: Union[Path, str] = Path("."),
        *,
        save_creator_indices: bool = False,
        mix_posts: bool = None,
        start_time: str = None,
        end_time: str = None,
        offset: int = 0,
        length: int = None,
        keywords: Tuple[str] = None,
        keywords_exclude: Tuple[str] = None
):
    """
    Sync posts from a creator

    You can update the directory anytime after download finished, \
    such as to update after creator published new posts.

    * ``start_time`` & ``end_time`` example: ``2023-12-7``, ``2023-12-07``

    :param url: The post URL
    :param service: The service where the post is located
    :param creator_id: The ID of the creator
    :param path: Download path, default is current directory
    :param save_creator_indices: Record ``CreatorIndices`` data
    :param mix_posts: Save all_pages files from different posts at same path, \
        ``save_creator_indices`` will be ignored if enabled
    :param start_time: Start time of the published time range for posts downloading. \
        Set to ``0`` if ``None`` was given. \
        Time format: ``%Y-%m-%d``
    :param end_time: End time of the published time range for posts downloading. \
        Set to latest time (infinity) if ``None`` was given. \
        Time format: ``%Y-%m-%d``
    :param offset: Result offset (or start offset)
    :param length: The number of posts to fetch, defaults to fetching all posts after ``offset``.
    :param keywords: Comma-separated keywords to filter posts by title (case-insensitive)
    :param keywords_exclude: Comma-separated keywords to exclude posts by title (case-insensitive)
    """
    # Check for updates on first command run
    await KToolBoxCli._ensure_update_check()
    logger.info(repr(config))
    # Get service, creator_id
    if url:
        service, creator_id, _, _ = parse_webpage_url(url)
    if not all([service, creator_id]):
        return generate_msg(
            TextEnum.MissingParams.value,
            use_at_lease_one=[
                ["url"],
                ["service", "creator_id"]
            ])

    path = path if isinstance(path, Path) else Path(path)

    # Get creator name
    creator_name = creator_id
    creator_ret = await search_creator_action(id=creator_id, service=service)
    if creator_ret:
        creator = next(creator_ret.data, None)
        if creator:
            creator_name = creator.name
            logger.info(
                generate_msg(
                    "Got creator information",
                    name=creator.name,
                    id=creator.id
                )
            )
    else:
        logger.error(
            generate_msg(
                f"Failed to fetch the name of creator <{creator_id}>",
                detail=creator_ret.message
            )
        )
        return creator_ret.message

    creator_path = path / sanitize_filename(creator_name)
    creator_path.mkdir(exist_ok=True)

    keywords = [keywords] if isinstance(keywords, str) else keywords
    keyword_set = set(keywords) if keywords else config.job.keywords
    if keywords:
        logger.info(f"Filtering posts by keywords: {', '.join(keyword_set)}")

    keywords_exclude = [keywords_exclude] if isinstance(keywords_exclude, str) else keywords_exclude
    keyword_exclude_set = set(keywords_exclude) if keywords_exclude else config.job.keywords_exclude
    if keywords_exclude:
        logger.info(f"Excluding posts by keywords: {', '.join(keyword_exclude_set)}")

    ret = await create_job_from_creator(
        service=service,
        creator_id=creator_id,
        path=creator_path,
        all_pages=not length,
        offset=offset,
        length=length,
        save_creator_indices=save_creator_indices,
        mix_posts=mix_posts,
        start_time=datetime.strptime(start_time, "%Y-%m-%d") if start_time else None,
        end_time=datetime.strptime(end_time, "%Y-%m-%d") if end_time else None,
        keywords=keyword_set,
        keywords_exclude=keyword_exclude_set
    )
    if ret:
        job_runner = JobRunner(job_list=ret.data)
        await job_runner.start()
        return None
    else:
        return ret.message

version() async staticmethod

Show KToolBox version

Source code in ktoolbox/cli.py
38
39
40
41
42
43
@staticmethod
async def version():
    """Show KToolBox version"""
    # Always check for updates when version is explicitly requested
    await check_for_updates()
    return __version__

configuration

__all__ = ['config', 'APIConfiguration', 'DownloaderConfiguration', 'PostStructureConfiguration', 'JobConfiguration', 'LoggerConfiguration', 'Configuration'] module-attribute

config = Configuration() module-attribute

APIConfiguration

Bases: BaseModel

Kemono API Configuration

Attributes:

Name Type Description Default
scheme Literal['http', 'https']

Kemono API URL scheme

'https'
netloc str

Kemono API URL netloc

'kemono.cr'
statics_netloc str

URL netloc of Kemono server for static files (e.g. images)

'img.kemono.cr'
files_netloc str

URL netloc of Kemono server for post files

'kemono.cr'
path str

Kemono API URL root path

'/api/v1'
timeout float

API request timeout

5.0
retry_times int

API request retry times (when request failed)

3
retry_interval float

Seconds of API request retry interval

2.0
session_key str

Session key that can be found in cookies after a successful login

''
Source code in ktoolbox/configuration.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class APIConfiguration(BaseModel):
    """
    Kemono API Configuration

    :ivar scheme: Kemono API URL scheme
    :ivar netloc: Kemono API URL netloc
    :ivar statics_netloc: URL netloc of Kemono server for static files (e.g. images)
    :ivar files_netloc: URL netloc of Kemono server for post files
    :ivar path: Kemono API URL root path
    :ivar timeout: API request timeout
    :ivar retry_times: API request retry times (when request failed)
    :ivar retry_interval: Seconds of API request retry interval
    :ivar session_key: Session key that can be found in cookies after a successful login
    """
    scheme: Literal["http", "https"] = "https"
    netloc: str = "kemono.cr"
    statics_netloc: str = "img.kemono.cr"
    files_netloc: str = "kemono.cr"
    path: str = "/api/v1"
    timeout: float = 5.0
    retry_times: int = 3
    retry_interval: float = 2.0
    session_key: str = ""

files_netloc: str = 'kemono.cr' class-attribute instance-attribute

netloc: str = 'kemono.cr' class-attribute instance-attribute

path: str = '/api/v1' class-attribute instance-attribute

retry_interval: float = 2.0 class-attribute instance-attribute

retry_times: int = 3 class-attribute instance-attribute

scheme: Literal['http', 'https'] = 'https' class-attribute instance-attribute

session_key: str = '' class-attribute instance-attribute

statics_netloc: str = 'img.kemono.cr' class-attribute instance-attribute

timeout: float = 5.0 class-attribute instance-attribute

Configuration

Bases: BaseSettings

KToolBox Configuration

Attributes:

Name Type Description Default
api APIConfiguration

Kemono API Configuration

APIConfiguration()
downloader DownloaderConfiguration

File Downloader Configuration

DownloaderConfiguration()
job JobConfiguration

Download jobs Configuration

JobConfiguration()
logger LoggerConfiguration

Logger configuration

LoggerConfiguration()
ssl_verify bool

Enable SSL certificate verification for Kemono API server and download server

True
json_dump_indent int

Indent of JSON file dump

4
use_uvloop bool

Use uvloop/winloop for asyncio performance optimization Uses winloop on Windows and uvloop on Unix-like systems for better concurrent performance. Install winloop on Windows with pip install ktoolbox[winloop] or uvloop on Unix with pip install ktoolbox[uvloop].

True
Source code in ktoolbox/configuration.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
class Configuration(BaseSettings):
    # noinspection SpellCheckingInspection,GrazieInspection
    """
    KToolBox Configuration

    :ivar api: Kemono API Configuration
    :ivar downloader: File Downloader Configuration
    :ivar job: Download jobs Configuration
    :ivar logger: Logger configuration
    :ivar ssl_verify: Enable SSL certificate verification for Kemono API server and download server
    :ivar json_dump_indent: Indent of JSON file dump
    :ivar use_uvloop: Use uvloop/winloop for asyncio performance optimization \
    Uses winloop on Windows and uvloop on Unix-like systems for better concurrent performance. \
    Install winloop on Windows with `pip install ktoolbox[winloop]` \
    or uvloop on Unix with `pip install ktoolbox[uvloop]`.
    """
    api: APIConfiguration = APIConfiguration()
    downloader: DownloaderConfiguration = DownloaderConfiguration()
    job: JobConfiguration = JobConfiguration()
    logger: LoggerConfiguration = LoggerConfiguration()

    ssl_verify: bool = True
    json_dump_indent: int = 4
    use_uvloop: bool = True

    # noinspection SpellCheckingInspection
    model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(
        env_prefix='ktoolbox_',
        env_nested_delimiter='__',
        env_file=['.env', 'prod.env'],
        env_file_encoding='utf-8',
        extra='ignore'
    )

api: APIConfiguration = APIConfiguration() class-attribute instance-attribute

downloader: DownloaderConfiguration = DownloaderConfiguration() class-attribute instance-attribute

job: JobConfiguration = JobConfiguration() class-attribute instance-attribute

json_dump_indent: int = 4 class-attribute instance-attribute

logger: LoggerConfiguration = LoggerConfiguration() class-attribute instance-attribute

model_config: SettingsConfigDict = SettingsConfigDict(env_prefix='ktoolbox_', env_nested_delimiter='__', env_file=['.env', 'prod.env'], env_file_encoding='utf-8', extra='ignore') class-attribute

ssl_verify: bool = True class-attribute instance-attribute

use_uvloop: bool = True class-attribute instance-attribute

DownloaderConfiguration

Bases: BaseModel

File Downloader Configuration

Attributes:

Name Type Description Default
scheme Literal['http', 'https']

Downloader URL scheme

'https'
timeout float

Downloader request timeout

30.0
encoding str

Charset for filename parsing and post content, external_links saving

'utf-8'
buffer_size int

Number of bytes of file I/O buffer for each downloading file

20480
chunk_size int

Number of bytes of chunk of downloader stream

1024
temp_suffix str

Temp filename suffix of downloading files

'tmp'
retry_times int

Downloader retry times (when download failed)

10
retry_stop_never bool

Never stop downloader from retrying (when download failed) (retry_times will be ignored when enabled)

False
retry_interval float

Seconds of downloader retry interval

3.0
tps_limit float

Maximum connections established per second

5.0
use_bucket bool

Enable local storage bucket mode

False
bucket_path Path

Path of local storage bucket

Path('./.ktoolbox/bucket_storage')
reverse_proxy str

Reverse proxy format for download URL. Customize the filename format by inserting an empty {} to represent the original URL. For example: https://example.com/{} will be https://example.com/https://n1.kemono.su/data/66/83/xxxxx.jpg; https://example.com/?url={} will be https://example.com/?url=https://n1.kemono.su/data/66/83/xxxxx.jpg

'{}'
keep_metadata bool

Keep the file metadata when downloading files (e.g. last modified time, etc.)

True
Source code in ktoolbox/configuration.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
class DownloaderConfiguration(BaseModel):
    """
    File Downloader Configuration

    :ivar scheme: Downloader URL scheme
    :ivar timeout: Downloader request timeout
    :ivar encoding: Charset for filename parsing and post ``content``, ``external_links`` saving
    :ivar buffer_size: Number of bytes of file I/O buffer for each downloading file
    :ivar chunk_size: Number of bytes of chunk of downloader stream
    :ivar temp_suffix: Temp filename suffix of downloading files
    :ivar retry_times: Downloader retry times (when download failed)
    :ivar retry_stop_never: Never stop downloader from retrying (when download failed) \
    (``retry_times`` will be ignored when enabled)
    :ivar retry_interval: Seconds of downloader retry interval
    :ivar tps_limit: Maximum connections established per second
    :ivar use_bucket: Enable local storage bucket mode
    :ivar bucket_path: Path of local storage bucket
    :ivar reverse_proxy: Reverse proxy format for download URL. \
    Customize the filename format by inserting an empty ``{}`` to represent the original URL. \
    For example: ``https://example.com/{}`` will be ``https://example.com/https://n1.kemono.su/data/66/83/xxxxx.jpg``;  \
    ``https://example.com/?url={}`` will be ``https://example.com/?url=https://n1.kemono.su/data/66/83/xxxxx.jpg``
    :ivar keep_metadata: Keep the file metadata when downloading files (e.g. last modified time, etc.)
    """
    scheme: Literal["http", "https"] = "https"
    timeout: float = 30.0
    encoding: str = "utf-8"
    buffer_size: int = 20480
    chunk_size: int = 1024
    temp_suffix: str = "tmp"
    retry_times: int = 10
    retry_stop_never: bool = False
    retry_interval: float = 3.0
    tps_limit: float = 5.0
    use_bucket: bool = False
    bucket_path: Path = Path("./.ktoolbox/bucket_storage")
    reverse_proxy: str = "{}"
    keep_metadata: bool = True

    @model_validator(mode="after")
    def check_bucket_path(self) -> "DownloaderConfiguration":
        if self.use_bucket:
            # noinspection PyBroadException
            try:
                bucket_path = Path(self.bucket_path)
                bucket_path.mkdir(parents=True, exist_ok=True)
                with tempfile.TemporaryFile(dir=bucket_path) as temp_file:
                    temp_link_file_path = f"{bucket_path / temp_file.name}.hlink"
                    os.link(temp_file.name, temp_link_file_path)
                    os.remove(temp_link_file_path)
            except Exception:
                self.use_bucket = False
                logger.exception(f"`DownloaderConfiguration.bucket_path` is not available, "
                                 f"`DownloaderConfiguration.use_bucket` has been disabled.")
        return self

bucket_path: Path = Path('./.ktoolbox/bucket_storage') class-attribute instance-attribute

buffer_size: int = 20480 class-attribute instance-attribute

chunk_size: int = 1024 class-attribute instance-attribute

encoding: str = 'utf-8' class-attribute instance-attribute

keep_metadata: bool = True class-attribute instance-attribute

retry_interval: float = 3.0 class-attribute instance-attribute

retry_stop_never: bool = False class-attribute instance-attribute

retry_times: int = 10 class-attribute instance-attribute

reverse_proxy: str = '{}' class-attribute instance-attribute

scheme: Literal['http', 'https'] = 'https' class-attribute instance-attribute

temp_suffix: str = 'tmp' class-attribute instance-attribute

timeout: float = 30.0 class-attribute instance-attribute

tps_limit: float = 5.0 class-attribute instance-attribute

use_bucket: bool = False class-attribute instance-attribute

check_bucket_path()

Source code in ktoolbox/configuration.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@model_validator(mode="after")
def check_bucket_path(self) -> "DownloaderConfiguration":
    if self.use_bucket:
        # noinspection PyBroadException
        try:
            bucket_path = Path(self.bucket_path)
            bucket_path.mkdir(parents=True, exist_ok=True)
            with tempfile.TemporaryFile(dir=bucket_path) as temp_file:
                temp_link_file_path = f"{bucket_path / temp_file.name}.hlink"
                os.link(temp_file.name, temp_link_file_path)
                os.remove(temp_link_file_path)
        except Exception:
            self.use_bucket = False
            logger.exception(f"`DownloaderConfiguration.bucket_path` is not available, "
                             f"`DownloaderConfiguration.use_bucket` has been disabled.")
    return self

JobConfiguration

Bases: BaseModel

Download jobs Configuration

  • Available properties for post_dirname_format and filename_format

    Property Type
    id String
    user String
    service String
    title String
    added Date
    published Date
    edited Date
  • Available properties for year_dirname_format and month_dirname_format

    Property Type
    year String
    month String
  • Python Format Specification Mini-Language reference:

    https://docs.python.org/3.13/library/string.html#format-specification-mini-language

Attributes:

Name Type Description Default
count int

Number of coroutines for concurrent download

4
include_revisions bool

Include and download revision posts when available

False
post_dirname_format str

Customize the post directory name format, you can use some of the properties in Post. e.g. [{published}]{id} could result dirname [2024-1-1]123123, {user}_{published}_{title} could result dirname like 234234_2024-1-1_TheTitle. Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: {title:.6} could shorten the title length to 6 characters like HiEveryoneThisIsALongTitle to HiEver

'{title}'
post_structure PostStructureConfiguration

Post path structure

PostStructureConfiguration()
mix_posts bool

Save all files from different posts at same path in creator directory. It would not create any post directory, and CreatorIndices would not been recorded.

False
sequential_filename bool

Rename attachments in numerical order, e.g. 1.png, 2.png, ...

False
sequential_filename_excludes Set[str]

File extensions to exclude from sequential naming when sequential_filename is enabled. Files with these extensions will keep their original names. e.g. [".psd", ".zip", ".mp4"]

Field(default_factory=set)
filename_format str

Customize the filename format by inserting an empty {} to represent the basic filename. Similar to post_dirname_format, you can use some of the properties in Post. For example: {title}_{} could result in filenames like TheTitle_b4b41de2-8736-480d-b5c3-ebf0d917561b, TheTitle_af349b25-ac08-46d7-98fb-6ce99a237b90, etc. You can also use it with sequential_filename. For instance, [{published}]_{} could result in filenames like [2024-1-1]_1.png, [2024-1-1]_2.png, etc. Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: {title:.6} could shorten the title length to 6 characters like HiEveryoneThisIsALongTitle to HiEver

'{}'
allow_list Set[str]

Download files which match these patterns (Unix shell-style), e.g. ["*.png"]

Field(default_factory=set)
block_list Set[str]

Not to download files which match these patterns (Unix shell-style), e.g. ["*.psd","*.zip"]

Field(default_factory=set)
extract_content bool

Extract post content and save to separate file (filename was defined in config.job.post_structure.content)

False
extract_content_images bool

Extract images from post content and download them.

False
extract_external_links bool

Extract external file sharing links from post content and save to separate file (filename was defined in config.job.post_structure.external_links)

False
external_link_patterns List[str]

Regex patterns for extracting external links.

['https?://drive\\.google\\.com/[^\\s]+', 'https?://docs\\.google\\.com/[^\\s]+', 'https?://mega\\.nz/[^\\s]+', 'https?://mega\\.co\\.nz/[^\\s]+', 'https?://(?:www\\.)?dropbox\\.com/[^\\s]+', 'https?://db\\.tt/[^\\s]+', 'https?://onedrive\\.live\\.com/[^\\s]+', 'https?://1drv\\.ms/[^\\s]+', 'https?://(?:www\\.)?mediafire\\.com/[^\\s]+', 'https?://(?:www\\.)?wetransfer\\.com/[^\\s]+', 'https?://we\\.tl/[^\\s]+', 'https?://(?:www\\.)?sendspace\\.com/[^\\s]+', 'https?://(?:www\\.)?4shared\\.com/[^\\s]+', 'https?://(?:www\\.)?zippyshare\\.com/[^\\s]+', 'https?://(?:www\\.)?uploadfiles\\.io/[^\\s]+', 'https?://(?:www\\.)?box\\.com/[^\\s]+', 'https?://(?:www\\.)?pcloud\\.com/[^\\s]+', 'https?://disk\\.yandex\\.[a-z]+/[^\\s]+', 'https?://[^\\s]*(?:file|upload|share|download|drive|storage)[^\\s]*\\.[a-z]{2,4}/[^\\s]+']
group_by_year bool

Group posts by year in separate directories based on published date

False
group_by_month bool

Group posts by month in separate directories based on published date (requires group_by_year)

False
year_dirname_format str

Customize the year directory name format. Available properties: year. e.g. {year} > 2024, Year_{year} > Year_2024

'{year}'
month_dirname_format str

Customize the month directory name format. Available properties: year, month. e.g. {year}-{month} > 2024-01, {year}_{month} > 2024_01

'{year}-{month:02d}'
keywords Set[str]

keywords to filter posts by title (case-insensitive)

Field(default_factory=set)
keywords_exclude Set[str]

keywords to exclude posts by title (case-insensitive)

Field(default_factory=set)
download_file bool

Download post file (usually cover image). Set to False to skip file downloads.

True
download_attachments bool

Download post attachments. Set to False to skip attachment downloads.

True
min_file_size Optional[int]

Minimum file size in bytes to download. Files smaller than this will be skipped. Set to None to disable minimum size filtering.

None
max_file_size Optional[int]

Maximum file size in bytes to download. Files larger than this will be skipped. Set to None to disable maximum size filtering.

None
Source code in ktoolbox/configuration.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
class JobConfiguration(BaseModel):
    """
    Download jobs Configuration

    - Available properties for ``post_dirname_format`` and ``filename_format``

        | Property      | Type   |
        |---------------|--------|
        | ``id``        | String |
        | ``user``      | String |
        | ``service``   | String |
        | ``title``     | String |
        | ``added``     | Date   |
        | ``published`` | Date   |
        | ``edited``    | Date   |

    - Available properties for ``year_dirname_format`` and ``month_dirname_format``

        | Property      | Type   |
        |---------------|--------|
        | ``year``      | String |
        | ``month``     | String |

    - Python Format Specification Mini-Language reference:

        https://docs.python.org/3.13/library/string.html#format-specification-mini-language

    :ivar count: Number of coroutines for concurrent download
    :ivar include_revisions: Include and download revision posts when available
    :ivar post_dirname_format: Customize the post directory name format, you can use some of the \
    [properties][ktoolbox.configuration.JobConfiguration] in ``Post``. \
    e.g. ``[{published}]{id}`` could result dirname ``[2024-1-1]123123``, \
    ``{user}_{published}_{title}`` could result dirname like ``234234_2024-1-1_TheTitle``. \
    Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: \
    ``{title:.6}`` could shorten the title length to 6 characters like ``HiEveryoneThisIsALongTitle`` to ``HiEver``
    :ivar post_structure: Post path structure
    :ivar mix_posts: Save all files from different posts at same path in creator directory. \
    It would not create any post directory, and ``CreatorIndices`` would not been recorded.
    :ivar sequential_filename: Rename attachments in numerical order, e.g. ``1.png``, ``2.png``, ...
    :ivar sequential_filename_excludes: File extensions to exclude from sequential naming when ``sequential_filename`` is enabled. \
    Files with these extensions will keep their original names. e.g. ``[".psd", ".zip", ".mp4"]``
    :ivar filename_format: Customize the filename format by inserting an empty ``{}`` to represent the basic filename.
    Similar to post_dirname_format, you can use some of the [properties][ktoolbox.configuration.JobConfiguration] \
    in Post. For example: ``{title}_{}`` could result in filenames like \
    ``TheTitle_b4b41de2-8736-480d-b5c3-ebf0d917561b``, ``TheTitle_af349b25-ac08-46d7-98fb-6ce99a237b90``, etc. \
    You can also use it with ``sequential_filename``. For instance, \
    ``[{published}]_{}`` could result in filenames like ``[2024-1-1]_1.png``, ``[2024-1-1]_2.png``, etc. \
    Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: \
    ``{title:.6}`` could shorten the title length to 6 characters like ``HiEveryoneThisIsALongTitle`` to ``HiEver``
    :ivar allow_list: Download files which match these patterns (Unix shell-style), e.g. ``["*.png"]``
    :ivar block_list: Not to download files which match these patterns (Unix shell-style), e.g. ``["*.psd","*.zip"]``
    :ivar extract_content: Extract post content and save to separate file (filename was defined in ``config.job.post_structure.content``)
    :ivar extract_content_images: Extract images from post content and download them.
    :ivar extract_external_links: Extract external file sharing links from post content and save to separate file \
    (filename was defined in ``config.job.post_structure.external_links``)
    :ivar external_link_patterns: Regex patterns for extracting external links.
    :ivar group_by_year: Group posts by year in separate directories based on published date
    :ivar group_by_month: Group posts by month in separate directories based on published date (requires group_by_year)
    :ivar year_dirname_format: Customize the year directory name format. Available properties: ``year``. \
    e.g. ``{year}`` > ``2024``, ``Year_{year}`` > ``Year_2024``
    :ivar month_dirname_format: Customize the month directory name format. Available properties: ``year``, ``month``. \
    e.g. ``{year}-{month}`` > ``2024-01``, ``{year}_{month}`` > ``2024_01``
    :ivar keywords: keywords to filter posts by title (case-insensitive)
    :ivar keywords_exclude: keywords to exclude posts by title (case-insensitive)
    :ivar download_file: Download post file (usually cover image). Set to False to skip file downloads.
    :ivar download_attachments: Download post attachments. Set to False to skip attachment downloads.
    :ivar min_file_size: Minimum file size in bytes to download. Files smaller than this will be skipped. \
    Set to None to disable minimum size filtering.
    :ivar max_file_size: Maximum file size in bytes to download. Files larger than this will be skipped. \
    Set to None to disable maximum size filtering.
    """
    count: int = 4
    include_revisions: bool = False
    post_dirname_format: str = "{title}"
    post_structure: PostStructureConfiguration = PostStructureConfiguration()
    mix_posts: bool = False
    sequential_filename: bool = False
    sequential_filename_excludes: Set[str] = Field(default_factory=set)
    filename_format: str = "{}"
    # noinspection PyDataclass
    allow_list: Set[str] = Field(default_factory=set)
    # noinspection PyDataclass
    block_list: Set[str] = Field(default_factory=set)
    extract_content: bool = False
    extract_content_images: bool = False
    extract_external_links: bool = False
    # noinspection SpellCheckingInspection
    external_link_patterns: List[str] = [
        # Google Drive
        r'https?://drive\.google\.com/[^\s]+',
        r'https?://docs\.google\.com/[^\s]+',

        # MEGA
        r'https?://mega\.nz/[^\s]+',
        r'https?://mega\.co\.nz/[^\s]+',

        # Dropbox
        r'https?://(?:www\.)?dropbox\.com/[^\s]+',
        r'https?://db\.tt/[^\s]+',

        # OneDrive
        r'https?://onedrive\.live\.com/[^\s]+',
        r'https?://1drv\.ms/[^\s]+',

        # MediaFire
        r'https?://(?:www\.)?mediafire\.com/[^\s]+',

        # WeTransfer
        r'https?://(?:www\.)?wetransfer\.com/[^\s]+',
        r'https?://we\.tl/[^\s]+',

        # SendSpace
        r'https?://(?:www\.)?sendspace\.com/[^\s]+',

        # 4shared
        r'https?://(?:www\.)?4shared\.com/[^\s]+',

        # Zippyshare
        r'https?://(?:www\.)?zippyshare\.com/[^\s]+',

        # Uploadfiles.io
        r'https?://(?:www\.)?uploadfiles\.io/[^\s]+',

        # Box
        r'https?://(?:www\.)?box\.com/[^\s]+',

        # pCloud
        r'https?://(?:www\.)?pcloud\.com/[^\s]+',

        # Yandex Disk
        r'https?://disk\.yandex\.[a-z]+/[^\s]+',

        # Generic patterns for other file hosting services
        r'https?://[^\s]*(?:file|upload|share|download|drive|storage)[^\s]*\.[a-z]{2,4}/[^\s]+',
    ]
    group_by_year: bool = False
    group_by_month: bool = False
    year_dirname_format: str = "{year}"
    month_dirname_format: str = "{year}-{month:02d}"
    keywords: Set[str] = Field(default_factory=set)
    keywords_exclude: Set[str] = Field(default_factory=set)
    download_file: bool = True
    download_attachments: bool = True
    min_file_size: Optional[int] = None
    max_file_size: Optional[int] = None

allow_list: Set[str] = Field(default_factory=set) class-attribute instance-attribute

block_list: Set[str] = Field(default_factory=set) class-attribute instance-attribute

count: int = 4 class-attribute instance-attribute

download_attachments: bool = True class-attribute instance-attribute

download_file: bool = True class-attribute instance-attribute

extract_content: bool = False class-attribute instance-attribute

extract_content_images: bool = False class-attribute instance-attribute

filename_format: str = '{}' class-attribute instance-attribute

group_by_month: bool = False class-attribute instance-attribute

group_by_year: bool = False class-attribute instance-attribute

include_revisions: bool = False class-attribute instance-attribute

keywords: Set[str] = Field(default_factory=set) class-attribute instance-attribute

keywords_exclude: Set[str] = Field(default_factory=set) class-attribute instance-attribute

max_file_size: Optional[int] = None class-attribute instance-attribute

min_file_size: Optional[int] = None class-attribute instance-attribute

mix_posts: bool = False class-attribute instance-attribute

month_dirname_format: str = '{year}-{month:02d}' class-attribute instance-attribute

post_dirname_format: str = '{title}' class-attribute instance-attribute

post_structure: PostStructureConfiguration = PostStructureConfiguration() class-attribute instance-attribute

sequential_filename: bool = False class-attribute instance-attribute

sequential_filename_excludes: Set[str] = Field(default_factory=set) class-attribute instance-attribute

year_dirname_format: str = '{year}' class-attribute instance-attribute

LoggerConfiguration

Bases: BaseModel

Logger configuration

Attributes:

Name Type Description Default
path Optional[Path]

Path to save logs, None for disable log file output

None
level Union[str, int]

Log filter level

logging.getLevelName(logging.DEBUG)
rotation Union[str, int, time, timedelta]

Log rotation

'1 week'
Source code in ktoolbox/configuration.py
308
309
310
311
312
313
314
315
316
317
318
class LoggerConfiguration(BaseModel):
    """
    Logger configuration

    :ivar path: Path to save logs, ``None`` for disable log file output
    :ivar level: Log filter level
    :ivar rotation: Log rotation
    """
    path: Optional[Path] = None
    level: Union[str, int] = logging.getLevelName(logging.DEBUG)
    rotation: Union[str, int, datetime.time, datetime.timedelta] = "1 week"

level: Union[str, int] = logging.getLevelName(logging.DEBUG) class-attribute instance-attribute

path: Optional[Path] = None class-attribute instance-attribute

rotation: Union[str, int, datetime.time, datetime.timedelta] = '1 week' class-attribute instance-attribute

PostStructureConfiguration

Bases: BaseModel

Post path structure model

  • Default:

    ..
    ├─ content.txt
    ├─ external_links.txt
    ├─ {id}_{}.png (file)
    ├─ post.json (metadata)
    ├─ attachments
    │    ├─ 1.png
    │    └─ 2.png
    └─ revisions
         ├─ <PostStructure>
         │    ├─ ...
         │    └─ ...
         └─ <PostStructure>
              ├─ ...
              └─ ...
    

  • Available properties for file

    Property Type
    id String
    user String
    service String
    title String
    added Date
    published Date
    edited Date

Attributes:

Name Type Description Default
attachments Path

Sub path of attachment directory

Path('attachments')
content Path

Sub path of post content file

Path('content.txt')
external_links Path

Sub path of external links file (for cloud storage links found in content)

Path('external_links.txt')
file str

The format of the post file filename (file is not attachment, each post has only one file, usually the cover image) Customize the filename format by inserting an empty {} to represent the basic filename. You can use some of the properties in Post. For example: {title}_{} could result in filenames like TheTitle_Stelle_lv5_logo.gif, TheTitle_ScxHjZIdxt5cnjaAwf3ql2p7.jpg, etc. Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: {title:.6}_{} could shorten the title length to 6 characters like HiEveryoneThisIsALongTitle_ScxHjZIdxt5cnjaAwf3ql2p7.jpg to HiEver_ScxHjZIdxt5cnjaAwf3ql2p7.jpg

'{id}_{}'
revisions Path

Sub path of revisions directory

Path('revisions')
Source code in ktoolbox/configuration.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class PostStructureConfiguration(BaseModel):
    # noinspection SpellCheckingInspection,GrazieInspection
    """
    Post path structure model

    - Default:
    ```
    ..
    ├─ content.txt
    ├─ external_links.txt
    ├─ {id}_{}.png (file)
    ├─ post.json (metadata)
    ├─ attachments
    │    ├─ 1.png
    │    └─ 2.png
    └─ revisions
         ├─ <PostStructure>
         │    ├─ ...
         │    └─ ...
         └─ <PostStructure>
              ├─ ...
              └─ ...
    ```

    - Available properties for ``file``

        | Property      | Type   |
        |---------------|--------|
        | ``id``        | String |
        | ``user``      | String |
        | ``service``   | String |
        | ``title``     | String |
        | ``added``     | Date   |
        | ``published`` | Date   |
        | ``edited``    | Date   |

    :ivar attachments: Sub path of attachment directory
    :ivar content: Sub path of post content file
    :ivar external_links: Sub path of external links file (for cloud storage links found in content)
    :ivar file: The format of the post `file` filename (`file` is not `attachment`, each post has only one `file`, usually the cover image) \
    Customize the filename format by inserting an empty ``{}`` to represent the basic filename. \
    You can use some of the [properties][ktoolbox.configuration.JobConfiguration] \
    in Post. For example: ``{title}_{}`` could result in filenames like \
    ``TheTitle_Stelle_lv5_logo.gif``, ``TheTitle_ScxHjZIdxt5cnjaAwf3ql2p7.jpg``, etc. \
    Meanwhile, you can also use the formatting feature of the Python Format Specification Mini-Language, for example: \
    ``{title:.6}_{}`` could shorten the title length to 6 characters like \
    ``HiEveryoneThisIsALongTitle_ScxHjZIdxt5cnjaAwf3ql2p7.jpg`` to ``HiEver_ScxHjZIdxt5cnjaAwf3ql2p7.jpg``
    :ivar revisions: Sub path of revisions directory
    """
    attachments: Path = Path("attachments")
    content: Path = Path("content.txt")
    external_links: Path = Path("external_links.txt")
    file: str = "{id}_{}"
    revisions: Path = Path("revisions")

attachments: Path = Path('attachments') class-attribute instance-attribute

content: Path = Path('content.txt') class-attribute instance-attribute

file: str = '{id}_{}' class-attribute instance-attribute

revisions: Path = Path('revisions') class-attribute instance-attribute

downloader

Downloader

Attributes:

Name Type Description Default
_save_filename

The actual filename for saving.

designated_filename
Source code in ktoolbox/downloader/downloader.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
class Downloader:
    """
    :ivar _save_filename: The actual filename for saving.
    """
    succeeded_servers: Set[int] = set()
    failure_servers: Set[int] = set()
    wait_lock = Lock()

    def __init__(
            self,
            url: str,
            path: Path,
            client: httpx.AsyncClient,
            *,
            buffer_size: int = None,
            chunk_size: int = None,
            designated_filename: str = None,
            server_path: str = None,
            post: Post = None
    ):
        # noinspection GrazieInspection
        """
        Initialize a file downloader

        - About filename:
            1. If ``designated_filename`` parameter is set, use it.
            2. Else if ``Content-Disposition`` is set in headers, use filename from it.
            3. Else use filename from 'file' part of ``server_path``.

        :param url: Download URL
        :param path: Directory path to save the file, which needs to be sanitized
        :param client: HTTPX AsyncClient
        :param buffer_size: Number of bytes for file I/O buffer
        :param chunk_size: Number of bytes for chunk of download stream
        :param designated_filename: Manually specify the filename for saving, which needs to be sanitized
        :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \
        it will be used as the save path.
        :param post: Post object, use for logging.
        """

        self._url = self._initial_url = url
        self._path = path
        self._client = client
        self._buffer_size = buffer_size or config.downloader.buffer_size
        self._chunk_size = chunk_size or config.downloader.chunk_size
        self._designated_filename = designated_filename
        self._server_path = server_path  # /hash[:1]/hash2[1:3]/hash
        self._save_filename = designated_filename  # Prioritize the manually specified filename
        self._post = post

        self._next_subdomain_index = 1
        self._finished_lock = asyncio.Lock()
        self._stop: bool = False

    @cached_property
    def url(self) -> str:
        """Download URL"""
        return self._url

    @cached_property
    def path(self) -> Path:
        """Directory path to save the file"""
        return self._path

    @cached_property
    def client(self) -> httpx.AsyncClient:
        """HTTPX AsyncClient"""
        return self._client

    @cached_property
    def buffer_size(self) -> int:
        """Number of bytes for file I/O buffer"""
        return self._buffer_size

    @cached_property
    def chunk_size(self) -> int:
        """Number of bytes for chunk of download stream"""
        return self._chunk_size

    @cached_property
    def post(self) -> Post:
        """Post that the file belongs to"""
        return self._post

    @property
    def filename(self) -> Optional[str]:
        """Actual filename of the download file"""
        return self._save_filename

    @property
    def finished(self) -> bool:
        """
        Check if the download finished

        :return: ``False`` if the download **in process**, ``True`` otherwise
        """
        return not self._finished_lock.locked()

    def cancel(self):
        """
        Cancel the download

        It will raise ``asyncio.CancelledError`` in ``chunk_iterator`` (writing chunk to file) iteration.
        """
        self._stop = True

    @tenacity.retry(
        stop=stop_never if config.downloader.retry_stop_never else stop_after_attempt(config.downloader.retry_times),
        wait=wait_fixed(config.downloader.retry_interval),
        retry=retry_if_result(
            lambda x: not x and x.code != RetCodeEnum.FileExisted
        ) | retry_if_exception(
            lambda x: isinstance(x, httpx.HTTPError)
        ),
        before_sleep=lambda x: logger.warning(
            generate_msg(
                f"Retrying ({x.attempt_number})",
                file=x.args[0].filename,
                post_name=x.args[0].post.title if x.args[0].post else None,
                post_id=x.args[0].post.id if x.args[0].post else None,
                message=x.outcome.result().message if not x.outcome.failed else None,
                exception=x.outcome.exception(),
                url=x.args[0].url
            )
        ),
        reraise=True
    )
    async def run(
            self,
            *,
            sync_callable: Callable[["Downloader"], Any] = None,
            async_callable: Callable[["Downloader"], Coroutine] = None,
            tqdm_class: Type[std_tqdm] = None,
            progress: bool = False
    ) -> DownloaderRet[str]:
        """
        Start to download

        :param sync_callable: Sync callable for download finished
        :param async_callable: Async callable for download finished
        :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
        :param progress: Show progress bar
        :return: ``DownloaderRet`` which contain the actual output filename
        :raise CancelledError: Job cancelled
        """
        # Get filename to check if file exists (First-time duplicate file check)
        # Check it before request to make progress more efficiency
        server_relpath = self._server_path[1:]
        server_relpath_without_params = urlparse(server_relpath).path
        server_path_filename = unquote(Path(server_relpath_without_params).name)
        # Priority order can be referenced from the constructor's documentation
        save_filepath = self._path / (self._save_filename or server_path_filename)

        # Get bucket file path
        bucket_file_path: Optional[Path] = None
        if config.downloader.use_bucket:
            bucket_file_path = config.downloader.bucket_path / server_relpath

        # Check if the file exists
        file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
        if file_existed:
            return DownloaderRet(
                code=RetCodeEnum.FileExisted,
                message=generate_msg(
                    ret_msg,
                    path=save_filepath
                )
            )

        tqdm_class: Type[std_tqdm] = tqdm_class or tqdm.asyncio.tqdm
        async with self.wait_lock:
            await asyncio.sleep(1 / config.downloader.tps_limit)
        async with self._finished_lock:
            temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}")
            temp_size = temp_filepath.stat().st_size if temp_filepath.exists() else 0

            async with self._client.stream(
                    method="GET",
                    url=config.downloader.reverse_proxy.format(self._url),
                    follow_redirects=True,
                    timeout=config.downloader.timeout,
                    headers={"Range": f"bytes={temp_size}-"}
            ) as res:  # type: httpx.Response
                try:
                    subdomain_index = int(res.url.netloc.split(b".")[0][1:])
                except ValueError:
                    subdomain_index = None
                if res.status_code == 403:
                    if subdomain_index is not None:
                        self.succeeded_servers.discard(subdomain_index)
                        self.failure_servers.add(subdomain_index)
                    # try succeeded servers first
                    subdomain_index = next(iter(self.succeeded_servers), None)
                    if subdomain_index is None:
                        subdomain_index = self._next_subdomain_index
                        # Update self._next_subdomain_index
                        ## index fallback to 1 when a server after failure_servers has been tried
                        if self.failure_servers and self._next_subdomain_index > max(self.failure_servers):
                            self._next_subdomain_index = 1
                            self.failure_servers.clear()
                        ## otherwise, increment the index and avoid failure_servers
                        else:
                            self._next_subdomain_index += 1
                            while self._next_subdomain_index in self.failure_servers:
                                self._next_subdomain_index += 1
                        msg = "Download failed, trying next subdomain"
                    else:
                        msg = "Download failed, trying succeeded subdomains"
                    new_netloc = f"n{subdomain_index}.{config.api.files_netloc}"
                    self._url = str(res.url.copy_with(netloc=new_netloc.encode()))
                    return DownloaderRet(
                        code=RetCodeEnum.GeneralFailure,
                        message=generate_msg(
                            msg,
                            nex_subdomain=new_netloc,
                            status_code=res.status_code,
                            filename=save_filepath
                        )
                    )
                elif res.status_code != httpx.codes.PARTIAL_CONTENT:
                    self._url = self._initial_url
                    return DownloaderRet(
                        code=RetCodeEnum.GeneralFailure,
                        message=generate_msg(
                            "Download failed",
                            status_code=res.status_code,
                            filename=save_filepath
                        )
                    )
                else:
                    if subdomain_index is not None:
                        self.failure_servers.discard(subdomain_index)
                        self.succeeded_servers.add(subdomain_index)

                # Get filename for saving and check if file exists (Second-time duplicate file check)
                # Priority order can be referenced from the constructor's documentation
                self._save_filename = self._designated_filename or sanitize_filename(
                    filename_from_headers(res.headers)
                ) or server_path_filename
                save_filepath = self._path / self._save_filename
                file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
                if file_existed:
                    return DownloaderRet(
                        code=RetCodeEnum.FileExisted,
                        message=generate_msg(
                            ret_msg,
                            path=save_filepath
                        )
                    )

                # Download
                total_size = int(range_str.split("/")[-1]) if (range_str := res.headers.get("Content-Range")) else None

                # Check file size filtering if enabled and we have the total size
                if total_size is not None and (config.job.min_file_size is not None or config.job.max_file_size is not None):
                    # Check minimum size
                    if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                        logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                        return DownloaderRet(
                            code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                            message=generate_msg(
                                f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                                path=save_filepath
                            )
                        )

                    # Check maximum size  
                    if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                        logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                        return DownloaderRet(
                            code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                            message=generate_msg(
                                f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                                path=save_filepath
                            )
                        )

                # If no Content-Range header, try to get size from Content-Length
                if total_size is None:
                    content_length = res.headers.get("Content-Length")
                    if content_length:
                        try:
                            total_size = int(content_length)
                            # Apply size filtering with Content-Length
                            if config.job.min_file_size is not None or config.job.max_file_size is not None:
                                # Check minimum size
                                if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                                    return DownloaderRet(
                                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                        message=generate_msg(
                                            f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                                            path=save_filepath
                                        )
                                    )

                                # Check maximum size  
                                if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                                    return DownloaderRet(
                                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                        message=generate_msg(
                                            f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                                            path=save_filepath
                                        )
                                    )
                        except ValueError:
                            # Invalid Content-Length, continue with download
                            pass
                async with aiofiles.open(str(temp_filepath), "ab", self._buffer_size) as f:
                    chunk_iterator = res.aiter_bytes(self._chunk_size)
                    t = tqdm_class(
                        desc=self._save_filename,
                        total=total_size,
                        initial=temp_size,
                        disable=not progress,
                        unit="B",
                        unit_scale=True
                    )
                    async for chunk in chunk_iterator:
                        if self._stop:
                            raise CancelledError
                        await f.write(chunk)
                        t.update(len(chunk))  # Update progress bar

            # Download finished
            if config.downloader.use_bucket:
                bucket_file_path.parent.mkdir(parents=True, exist_ok=True)
                os.link(temp_filepath, bucket_file_path)
            final_filepath = self._path / self._save_filename
            temp_filepath.rename(final_filepath)

            # Set file time from headers
            if config.downloader.keep_metadata:
                try:
                    utime_from_headers(res.headers, final_filepath)
                except (OSError, ValueError, TypeError) as e:
                    logger.warning(
                        generate_msg(
                            "Failed to set file time from headers",
                            file=self._save_filename,
                            exception=e
                        )
                    )

            # Callbacks
            if sync_callable:
                sync_callable(self)
            if async_callable:
                await async_callable(self)

            return DownloaderRet(
                data=self._save_filename
            ) if self._save_filename else DownloaderRet(
                code=RetCodeEnum.GeneralFailure,
                message=generate_msg(
                    "Download failed",
                    filename=self._designated_filename
                )
            )

    __call__ = run

__call__ = run class-attribute instance-attribute

buffer_size: int cached property

Number of bytes for file I/O buffer

chunk_size: int cached property

Number of bytes for chunk of download stream

client: httpx.AsyncClient cached property

HTTPX AsyncClient

failure_servers: Set[int] = set() class-attribute instance-attribute

filename: Optional[str] property

Actual filename of the download file

finished: bool property

Check if the download finished

Returns:

Type Description
bool

False if the download in process, True otherwise

path: Path cached property

Directory path to save the file

post: Post cached property

Post that the file belongs to

succeeded_servers: Set[int] = set() class-attribute instance-attribute

url: str cached property

Download URL

wait_lock = Lock() class-attribute instance-attribute

__init__(url, path, client, *, buffer_size=None, chunk_size=None, designated_filename=None, server_path=None, post=None)

Initialize a file downloader

  • About filename:
    1. If designated_filename parameter is set, use it.
    2. Else if Content-Disposition is set in headers, use filename from it.
    3. Else use filename from 'file' part of server_path.

Parameters:

Name Type Description Default
url str

Download URL

required
path Path

Directory path to save the file, which needs to be sanitized

required
client AsyncClient

HTTPX AsyncClient

required
buffer_size int

Number of bytes for file I/O buffer

None
chunk_size int

Number of bytes for chunk of download stream

None
designated_filename str

Manually specify the filename for saving, which needs to be sanitized

None
server_path str

Server path of the file. if DownloaderConfiguration.use_bucket enabled, it will be used as the save path.

None
post Post

Post object, use for logging.

None
Source code in ktoolbox/downloader/downloader.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def __init__(
        self,
        url: str,
        path: Path,
        client: httpx.AsyncClient,
        *,
        buffer_size: int = None,
        chunk_size: int = None,
        designated_filename: str = None,
        server_path: str = None,
        post: Post = None
):
    # noinspection GrazieInspection
    """
    Initialize a file downloader

    - About filename:
        1. If ``designated_filename`` parameter is set, use it.
        2. Else if ``Content-Disposition`` is set in headers, use filename from it.
        3. Else use filename from 'file' part of ``server_path``.

    :param url: Download URL
    :param path: Directory path to save the file, which needs to be sanitized
    :param client: HTTPX AsyncClient
    :param buffer_size: Number of bytes for file I/O buffer
    :param chunk_size: Number of bytes for chunk of download stream
    :param designated_filename: Manually specify the filename for saving, which needs to be sanitized
    :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \
    it will be used as the save path.
    :param post: Post object, use for logging.
    """

    self._url = self._initial_url = url
    self._path = path
    self._client = client
    self._buffer_size = buffer_size or config.downloader.buffer_size
    self._chunk_size = chunk_size or config.downloader.chunk_size
    self._designated_filename = designated_filename
    self._server_path = server_path  # /hash[:1]/hash2[1:3]/hash
    self._save_filename = designated_filename  # Prioritize the manually specified filename
    self._post = post

    self._next_subdomain_index = 1
    self._finished_lock = asyncio.Lock()
    self._stop: bool = False

cancel()

Cancel the download

It will raise asyncio.CancelledError in chunk_iterator (writing chunk to file) iteration.

Source code in ktoolbox/downloader/downloader.py
127
128
129
130
131
132
133
def cancel(self):
    """
    Cancel the download

    It will raise ``asyncio.CancelledError`` in ``chunk_iterator`` (writing chunk to file) iteration.
    """
    self._stop = True

run(*, sync_callable=None, async_callable=None, tqdm_class=None, progress=False) async

Start to download

Parameters:

Name Type Description Default
sync_callable Callable[[Downloader], Any]

Sync callable for download finished

None
async_callable Callable[[Downloader], Coroutine]

Async callable for download finished

None
tqdm_class Type[tqdm]

tqdm class to replace default tqdm.asyncio.tqdm

None
progress bool

Show progress bar

False

Returns:

Type Description
DownloaderRet[str]

DownloaderRet which contain the actual output filename

Raises:

Type Description
CancelledError

Job cancelled

Source code in ktoolbox/downloader/downloader.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
@tenacity.retry(
    stop=stop_never if config.downloader.retry_stop_never else stop_after_attempt(config.downloader.retry_times),
    wait=wait_fixed(config.downloader.retry_interval),
    retry=retry_if_result(
        lambda x: not x and x.code != RetCodeEnum.FileExisted
    ) | retry_if_exception(
        lambda x: isinstance(x, httpx.HTTPError)
    ),
    before_sleep=lambda x: logger.warning(
        generate_msg(
            f"Retrying ({x.attempt_number})",
            file=x.args[0].filename,
            post_name=x.args[0].post.title if x.args[0].post else None,
            post_id=x.args[0].post.id if x.args[0].post else None,
            message=x.outcome.result().message if not x.outcome.failed else None,
            exception=x.outcome.exception(),
            url=x.args[0].url
        )
    ),
    reraise=True
)
async def run(
        self,
        *,
        sync_callable: Callable[["Downloader"], Any] = None,
        async_callable: Callable[["Downloader"], Coroutine] = None,
        tqdm_class: Type[std_tqdm] = None,
        progress: bool = False
) -> DownloaderRet[str]:
    """
    Start to download

    :param sync_callable: Sync callable for download finished
    :param async_callable: Async callable for download finished
    :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
    :param progress: Show progress bar
    :return: ``DownloaderRet`` which contain the actual output filename
    :raise CancelledError: Job cancelled
    """
    # Get filename to check if file exists (First-time duplicate file check)
    # Check it before request to make progress more efficiency
    server_relpath = self._server_path[1:]
    server_relpath_without_params = urlparse(server_relpath).path
    server_path_filename = unquote(Path(server_relpath_without_params).name)
    # Priority order can be referenced from the constructor's documentation
    save_filepath = self._path / (self._save_filename or server_path_filename)

    # Get bucket file path
    bucket_file_path: Optional[Path] = None
    if config.downloader.use_bucket:
        bucket_file_path = config.downloader.bucket_path / server_relpath

    # Check if the file exists
    file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
    if file_existed:
        return DownloaderRet(
            code=RetCodeEnum.FileExisted,
            message=generate_msg(
                ret_msg,
                path=save_filepath
            )
        )

    tqdm_class: Type[std_tqdm] = tqdm_class or tqdm.asyncio.tqdm
    async with self.wait_lock:
        await asyncio.sleep(1 / config.downloader.tps_limit)
    async with self._finished_lock:
        temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}")
        temp_size = temp_filepath.stat().st_size if temp_filepath.exists() else 0

        async with self._client.stream(
                method="GET",
                url=config.downloader.reverse_proxy.format(self._url),
                follow_redirects=True,
                timeout=config.downloader.timeout,
                headers={"Range": f"bytes={temp_size}-"}
        ) as res:  # type: httpx.Response
            try:
                subdomain_index = int(res.url.netloc.split(b".")[0][1:])
            except ValueError:
                subdomain_index = None
            if res.status_code == 403:
                if subdomain_index is not None:
                    self.succeeded_servers.discard(subdomain_index)
                    self.failure_servers.add(subdomain_index)
                # try succeeded servers first
                subdomain_index = next(iter(self.succeeded_servers), None)
                if subdomain_index is None:
                    subdomain_index = self._next_subdomain_index
                    # Update self._next_subdomain_index
                    ## index fallback to 1 when a server after failure_servers has been tried
                    if self.failure_servers and self._next_subdomain_index > max(self.failure_servers):
                        self._next_subdomain_index = 1
                        self.failure_servers.clear()
                    ## otherwise, increment the index and avoid failure_servers
                    else:
                        self._next_subdomain_index += 1
                        while self._next_subdomain_index in self.failure_servers:
                            self._next_subdomain_index += 1
                    msg = "Download failed, trying next subdomain"
                else:
                    msg = "Download failed, trying succeeded subdomains"
                new_netloc = f"n{subdomain_index}.{config.api.files_netloc}"
                self._url = str(res.url.copy_with(netloc=new_netloc.encode()))
                return DownloaderRet(
                    code=RetCodeEnum.GeneralFailure,
                    message=generate_msg(
                        msg,
                        nex_subdomain=new_netloc,
                        status_code=res.status_code,
                        filename=save_filepath
                    )
                )
            elif res.status_code != httpx.codes.PARTIAL_CONTENT:
                self._url = self._initial_url
                return DownloaderRet(
                    code=RetCodeEnum.GeneralFailure,
                    message=generate_msg(
                        "Download failed",
                        status_code=res.status_code,
                        filename=save_filepath
                    )
                )
            else:
                if subdomain_index is not None:
                    self.failure_servers.discard(subdomain_index)
                    self.succeeded_servers.add(subdomain_index)

            # Get filename for saving and check if file exists (Second-time duplicate file check)
            # Priority order can be referenced from the constructor's documentation
            self._save_filename = self._designated_filename or sanitize_filename(
                filename_from_headers(res.headers)
            ) or server_path_filename
            save_filepath = self._path / self._save_filename
            file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
            if file_existed:
                return DownloaderRet(
                    code=RetCodeEnum.FileExisted,
                    message=generate_msg(
                        ret_msg,
                        path=save_filepath
                    )
                )

            # Download
            total_size = int(range_str.split("/")[-1]) if (range_str := res.headers.get("Content-Range")) else None

            # Check file size filtering if enabled and we have the total size
            if total_size is not None and (config.job.min_file_size is not None or config.job.max_file_size is not None):
                # Check minimum size
                if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                    return DownloaderRet(
                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                        message=generate_msg(
                            f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                            path=save_filepath
                        )
                    )

                # Check maximum size  
                if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                    return DownloaderRet(
                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                        message=generate_msg(
                            f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                            path=save_filepath
                        )
                    )

            # If no Content-Range header, try to get size from Content-Length
            if total_size is None:
                content_length = res.headers.get("Content-Length")
                if content_length:
                    try:
                        total_size = int(content_length)
                        # Apply size filtering with Content-Length
                        if config.job.min_file_size is not None or config.job.max_file_size is not None:
                            # Check minimum size
                            if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                                logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                                return DownloaderRet(
                                    code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                    message=generate_msg(
                                        f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                                        path=save_filepath
                                    )
                                )

                            # Check maximum size  
                            if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                                logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                                return DownloaderRet(
                                    code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                    message=generate_msg(
                                        f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                                        path=save_filepath
                                    )
                                )
                    except ValueError:
                        # Invalid Content-Length, continue with download
                        pass
            async with aiofiles.open(str(temp_filepath), "ab", self._buffer_size) as f:
                chunk_iterator = res.aiter_bytes(self._chunk_size)
                t = tqdm_class(
                    desc=self._save_filename,
                    total=total_size,
                    initial=temp_size,
                    disable=not progress,
                    unit="B",
                    unit_scale=True
                )
                async for chunk in chunk_iterator:
                    if self._stop:
                        raise CancelledError
                    await f.write(chunk)
                    t.update(len(chunk))  # Update progress bar

        # Download finished
        if config.downloader.use_bucket:
            bucket_file_path.parent.mkdir(parents=True, exist_ok=True)
            os.link(temp_filepath, bucket_file_path)
        final_filepath = self._path / self._save_filename
        temp_filepath.rename(final_filepath)

        # Set file time from headers
        if config.downloader.keep_metadata:
            try:
                utime_from_headers(res.headers, final_filepath)
            except (OSError, ValueError, TypeError) as e:
                logger.warning(
                    generate_msg(
                        "Failed to set file time from headers",
                        file=self._save_filename,
                        exception=e
                    )
                )

        # Callbacks
        if sync_callable:
            sync_callable(self)
        if async_callable:
            await async_callable(self)

        return DownloaderRet(
            data=self._save_filename
        ) if self._save_filename else DownloaderRet(
            code=RetCodeEnum.GeneralFailure,
            message=generate_msg(
                "Download failed",
                filename=self._designated_filename
            )
        )

DownloaderRet

Bases: BaseRet[_T]

Return data model of action call

Source code in ktoolbox/downloader/base.py
10
11
12
class DownloaderRet(BaseRet[_T]):
    """Return data model of action call"""
    pass

duplicate_file_check(local_file_path, bucket_file_path=None)

Check if the file existed, and link the bucket filepath to local filepath if DownloaderConfiguration.use_bucket enabled.

Parameters:

Name Type Description Default
local_file_path Path

Download target path

required
bucket_file_path Path

The bucket filepath of the local download path

None

Returns:

Type Description
Tuple[bool, Optional[str]]

(if file existed, message)

Source code in ktoolbox/downloader/utils.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def duplicate_file_check(local_file_path: Path, bucket_file_path: Path = None) -> Tuple[bool, Optional[str]]:
    """
    Check if the file existed, and link the bucket filepath to local filepath \
    if ``DownloaderConfiguration.use_bucket`` enabled.

    :param local_file_path: Download target path
    :param bucket_file_path: The bucket filepath of the local download path
    :return: ``(if file existed, message)``
    """
    duplicate_check_path = bucket_file_path or local_file_path
    if duplicate_check_path.is_file():
        if config.downloader.use_bucket:
            ret_msg = "Download file already exists in both bucket and local, skipping"
            if not local_file_path.is_file():
                ret_msg = "Download file already exists in bucket, linking to local path"
                os.link(bucket_file_path, local_file_path)
        else:
            ret_msg = "Download file already exists, skipping"
        return True, ret_msg
    else:
        return False, None

filename_from_headers(headers)

Get file name from headers.

Parse from Content-Disposition.

  • Example:

    filename_from_headers({'Content-Disposition': 'attachment;filename*=utf-8''README%2Emd;filename="README.md"'})
    

  • Return:

    README.md
    

Parameters:

Name Type Description Default
headers Dict[str, str]

HTTP headers

required

Returns:

Type Description
Optional[str]

File name

Source code in ktoolbox/downloader/utils.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def filename_from_headers(headers: Dict[str, str]) -> Optional[str]:
    """
    Get file name from headers.

    Parse from ``Content-Disposition``.

    - Example:
    ```
    filename_from_headers({'Content-Disposition': 'attachment;filename*=utf-8\'\'README%2Emd;filename="README.md"'})
    ```

    - Return:
    ```
    README.md
    ```

    :param headers: HTTP headers
    :return: File name
    """
    if not (disposition := headers.get("Content-Disposition")):
        if not (disposition := headers.get("content-disposition")):
            return None
    options = parse_header(disposition)  # alternative: `parse_header` in `utils.py`
    if filename := options.get("filename*"):
        if len(name_with_charset := filename.split("''")) == 2:
            charset, name = name_with_charset
            return urllib.parse.unquote(name, charset)
    if filename := options.get("filename"):
        return urllib.parse.unquote(filename, config.downloader.encoding)
    return None

utime_from_headers(headers, path)

Run os.utime on specific file using Last-Modified or Date in HTTP headers.

Parameters:

Name Type Description Default
headers Dict[str, str]

HTTP Headers

required
path Union[Path, str]

File path

required
Source code in ktoolbox/downloader/utils.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def utime_from_headers(headers: Dict[str, str], path: Union[Path, str]) -> Optional[Exception]:
    """
    Run ``os.utime`` on specific file using ``Last-Modified`` or ``Date`` in HTTP headers.

    :param headers: HTTP Headers
    :param path: File path
    :raise: OSError, ValueError, TypeError
    """
    # Set file times using Last-Modified and Date headers from the response
    last_modified = headers.get("Last-Modified")
    date_header = headers.get("Date")
    # Prefer Last-Modified for modification time
    mtime = email.utils.parsedate_to_datetime(last_modified).timestamp() if last_modified else None
    # Use Date for creation time
    ctime = email.utils.parsedate_to_datetime(date_header).timestamp() if date_header else None
    # Set times if available
    if mtime or ctime:
        atime = mtime or ctime  # Access time can be the same as modification time
        os.utime(path, (atime, mtime or ctime))

base

__all__ = ['DownloaderRet'] module-attribute

DownloaderRet

Bases: BaseRet[_T]

Return data model of action call

Source code in ktoolbox/downloader/base.py
10
11
12
class DownloaderRet(BaseRet[_T]):
    """Return data model of action call"""
    pass

downloader

__all__ = ['Downloader'] module-attribute

Downloader

Attributes:

Name Type Description Default
_save_filename

The actual filename for saving.

designated_filename
Source code in ktoolbox/downloader/downloader.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
class Downloader:
    """
    :ivar _save_filename: The actual filename for saving.
    """
    succeeded_servers: Set[int] = set()
    failure_servers: Set[int] = set()
    wait_lock = Lock()

    def __init__(
            self,
            url: str,
            path: Path,
            client: httpx.AsyncClient,
            *,
            buffer_size: int = None,
            chunk_size: int = None,
            designated_filename: str = None,
            server_path: str = None,
            post: Post = None
    ):
        # noinspection GrazieInspection
        """
        Initialize a file downloader

        - About filename:
            1. If ``designated_filename`` parameter is set, use it.
            2. Else if ``Content-Disposition`` is set in headers, use filename from it.
            3. Else use filename from 'file' part of ``server_path``.

        :param url: Download URL
        :param path: Directory path to save the file, which needs to be sanitized
        :param client: HTTPX AsyncClient
        :param buffer_size: Number of bytes for file I/O buffer
        :param chunk_size: Number of bytes for chunk of download stream
        :param designated_filename: Manually specify the filename for saving, which needs to be sanitized
        :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \
        it will be used as the save path.
        :param post: Post object, use for logging.
        """

        self._url = self._initial_url = url
        self._path = path
        self._client = client
        self._buffer_size = buffer_size or config.downloader.buffer_size
        self._chunk_size = chunk_size or config.downloader.chunk_size
        self._designated_filename = designated_filename
        self._server_path = server_path  # /hash[:1]/hash2[1:3]/hash
        self._save_filename = designated_filename  # Prioritize the manually specified filename
        self._post = post

        self._next_subdomain_index = 1
        self._finished_lock = asyncio.Lock()
        self._stop: bool = False

    @cached_property
    def url(self) -> str:
        """Download URL"""
        return self._url

    @cached_property
    def path(self) -> Path:
        """Directory path to save the file"""
        return self._path

    @cached_property
    def client(self) -> httpx.AsyncClient:
        """HTTPX AsyncClient"""
        return self._client

    @cached_property
    def buffer_size(self) -> int:
        """Number of bytes for file I/O buffer"""
        return self._buffer_size

    @cached_property
    def chunk_size(self) -> int:
        """Number of bytes for chunk of download stream"""
        return self._chunk_size

    @cached_property
    def post(self) -> Post:
        """Post that the file belongs to"""
        return self._post

    @property
    def filename(self) -> Optional[str]:
        """Actual filename of the download file"""
        return self._save_filename

    @property
    def finished(self) -> bool:
        """
        Check if the download finished

        :return: ``False`` if the download **in process**, ``True`` otherwise
        """
        return not self._finished_lock.locked()

    def cancel(self):
        """
        Cancel the download

        It will raise ``asyncio.CancelledError`` in ``chunk_iterator`` (writing chunk to file) iteration.
        """
        self._stop = True

    @tenacity.retry(
        stop=stop_never if config.downloader.retry_stop_never else stop_after_attempt(config.downloader.retry_times),
        wait=wait_fixed(config.downloader.retry_interval),
        retry=retry_if_result(
            lambda x: not x and x.code != RetCodeEnum.FileExisted
        ) | retry_if_exception(
            lambda x: isinstance(x, httpx.HTTPError)
        ),
        before_sleep=lambda x: logger.warning(
            generate_msg(
                f"Retrying ({x.attempt_number})",
                file=x.args[0].filename,
                post_name=x.args[0].post.title if x.args[0].post else None,
                post_id=x.args[0].post.id if x.args[0].post else None,
                message=x.outcome.result().message if not x.outcome.failed else None,
                exception=x.outcome.exception(),
                url=x.args[0].url
            )
        ),
        reraise=True
    )
    async def run(
            self,
            *,
            sync_callable: Callable[["Downloader"], Any] = None,
            async_callable: Callable[["Downloader"], Coroutine] = None,
            tqdm_class: Type[std_tqdm] = None,
            progress: bool = False
    ) -> DownloaderRet[str]:
        """
        Start to download

        :param sync_callable: Sync callable for download finished
        :param async_callable: Async callable for download finished
        :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
        :param progress: Show progress bar
        :return: ``DownloaderRet`` which contain the actual output filename
        :raise CancelledError: Job cancelled
        """
        # Get filename to check if file exists (First-time duplicate file check)
        # Check it before request to make progress more efficiency
        server_relpath = self._server_path[1:]
        server_relpath_without_params = urlparse(server_relpath).path
        server_path_filename = unquote(Path(server_relpath_without_params).name)
        # Priority order can be referenced from the constructor's documentation
        save_filepath = self._path / (self._save_filename or server_path_filename)

        # Get bucket file path
        bucket_file_path: Optional[Path] = None
        if config.downloader.use_bucket:
            bucket_file_path = config.downloader.bucket_path / server_relpath

        # Check if the file exists
        file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
        if file_existed:
            return DownloaderRet(
                code=RetCodeEnum.FileExisted,
                message=generate_msg(
                    ret_msg,
                    path=save_filepath
                )
            )

        tqdm_class: Type[std_tqdm] = tqdm_class or tqdm.asyncio.tqdm
        async with self.wait_lock:
            await asyncio.sleep(1 / config.downloader.tps_limit)
        async with self._finished_lock:
            temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}")
            temp_size = temp_filepath.stat().st_size if temp_filepath.exists() else 0

            async with self._client.stream(
                    method="GET",
                    url=config.downloader.reverse_proxy.format(self._url),
                    follow_redirects=True,
                    timeout=config.downloader.timeout,
                    headers={"Range": f"bytes={temp_size}-"}
            ) as res:  # type: httpx.Response
                try:
                    subdomain_index = int(res.url.netloc.split(b".")[0][1:])
                except ValueError:
                    subdomain_index = None
                if res.status_code == 403:
                    if subdomain_index is not None:
                        self.succeeded_servers.discard(subdomain_index)
                        self.failure_servers.add(subdomain_index)
                    # try succeeded servers first
                    subdomain_index = next(iter(self.succeeded_servers), None)
                    if subdomain_index is None:
                        subdomain_index = self._next_subdomain_index
                        # Update self._next_subdomain_index
                        ## index fallback to 1 when a server after failure_servers has been tried
                        if self.failure_servers and self._next_subdomain_index > max(self.failure_servers):
                            self._next_subdomain_index = 1
                            self.failure_servers.clear()
                        ## otherwise, increment the index and avoid failure_servers
                        else:
                            self._next_subdomain_index += 1
                            while self._next_subdomain_index in self.failure_servers:
                                self._next_subdomain_index += 1
                        msg = "Download failed, trying next subdomain"
                    else:
                        msg = "Download failed, trying succeeded subdomains"
                    new_netloc = f"n{subdomain_index}.{config.api.files_netloc}"
                    self._url = str(res.url.copy_with(netloc=new_netloc.encode()))
                    return DownloaderRet(
                        code=RetCodeEnum.GeneralFailure,
                        message=generate_msg(
                            msg,
                            nex_subdomain=new_netloc,
                            status_code=res.status_code,
                            filename=save_filepath
                        )
                    )
                elif res.status_code != httpx.codes.PARTIAL_CONTENT:
                    self._url = self._initial_url
                    return DownloaderRet(
                        code=RetCodeEnum.GeneralFailure,
                        message=generate_msg(
                            "Download failed",
                            status_code=res.status_code,
                            filename=save_filepath
                        )
                    )
                else:
                    if subdomain_index is not None:
                        self.failure_servers.discard(subdomain_index)
                        self.succeeded_servers.add(subdomain_index)

                # Get filename for saving and check if file exists (Second-time duplicate file check)
                # Priority order can be referenced from the constructor's documentation
                self._save_filename = self._designated_filename or sanitize_filename(
                    filename_from_headers(res.headers)
                ) or server_path_filename
                save_filepath = self._path / self._save_filename
                file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
                if file_existed:
                    return DownloaderRet(
                        code=RetCodeEnum.FileExisted,
                        message=generate_msg(
                            ret_msg,
                            path=save_filepath
                        )
                    )

                # Download
                total_size = int(range_str.split("/")[-1]) if (range_str := res.headers.get("Content-Range")) else None

                # Check file size filtering if enabled and we have the total size
                if total_size is not None and (config.job.min_file_size is not None or config.job.max_file_size is not None):
                    # Check minimum size
                    if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                        logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                        return DownloaderRet(
                            code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                            message=generate_msg(
                                f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                                path=save_filepath
                            )
                        )

                    # Check maximum size  
                    if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                        logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                        return DownloaderRet(
                            code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                            message=generate_msg(
                                f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                                path=save_filepath
                            )
                        )

                # If no Content-Range header, try to get size from Content-Length
                if total_size is None:
                    content_length = res.headers.get("Content-Length")
                    if content_length:
                        try:
                            total_size = int(content_length)
                            # Apply size filtering with Content-Length
                            if config.job.min_file_size is not None or config.job.max_file_size is not None:
                                # Check minimum size
                                if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                                    return DownloaderRet(
                                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                        message=generate_msg(
                                            f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                                            path=save_filepath
                                        )
                                    )

                                # Check maximum size  
                                if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                                    return DownloaderRet(
                                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                        message=generate_msg(
                                            f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                                            path=save_filepath
                                        )
                                    )
                        except ValueError:
                            # Invalid Content-Length, continue with download
                            pass
                async with aiofiles.open(str(temp_filepath), "ab", self._buffer_size) as f:
                    chunk_iterator = res.aiter_bytes(self._chunk_size)
                    t = tqdm_class(
                        desc=self._save_filename,
                        total=total_size,
                        initial=temp_size,
                        disable=not progress,
                        unit="B",
                        unit_scale=True
                    )
                    async for chunk in chunk_iterator:
                        if self._stop:
                            raise CancelledError
                        await f.write(chunk)
                        t.update(len(chunk))  # Update progress bar

            # Download finished
            if config.downloader.use_bucket:
                bucket_file_path.parent.mkdir(parents=True, exist_ok=True)
                os.link(temp_filepath, bucket_file_path)
            final_filepath = self._path / self._save_filename
            temp_filepath.rename(final_filepath)

            # Set file time from headers
            if config.downloader.keep_metadata:
                try:
                    utime_from_headers(res.headers, final_filepath)
                except (OSError, ValueError, TypeError) as e:
                    logger.warning(
                        generate_msg(
                            "Failed to set file time from headers",
                            file=self._save_filename,
                            exception=e
                        )
                    )

            # Callbacks
            if sync_callable:
                sync_callable(self)
            if async_callable:
                await async_callable(self)

            return DownloaderRet(
                data=self._save_filename
            ) if self._save_filename else DownloaderRet(
                code=RetCodeEnum.GeneralFailure,
                message=generate_msg(
                    "Download failed",
                    filename=self._designated_filename
                )
            )

    __call__ = run
__call__ = run class-attribute instance-attribute
buffer_size: int cached property

Number of bytes for file I/O buffer

chunk_size: int cached property

Number of bytes for chunk of download stream

client: httpx.AsyncClient cached property

HTTPX AsyncClient

failure_servers: Set[int] = set() class-attribute instance-attribute
filename: Optional[str] property

Actual filename of the download file

finished: bool property

Check if the download finished

Returns:

Type Description
bool

False if the download in process, True otherwise

path: Path cached property

Directory path to save the file

post: Post cached property

Post that the file belongs to

succeeded_servers: Set[int] = set() class-attribute instance-attribute
url: str cached property

Download URL

wait_lock = Lock() class-attribute instance-attribute
__init__(url, path, client, *, buffer_size=None, chunk_size=None, designated_filename=None, server_path=None, post=None)

Initialize a file downloader

  • About filename:
    1. If designated_filename parameter is set, use it.
    2. Else if Content-Disposition is set in headers, use filename from it.
    3. Else use filename from 'file' part of server_path.

Parameters:

Name Type Description Default
url str

Download URL

required
path Path

Directory path to save the file, which needs to be sanitized

required
client AsyncClient

HTTPX AsyncClient

required
buffer_size int

Number of bytes for file I/O buffer

None
chunk_size int

Number of bytes for chunk of download stream

None
designated_filename str

Manually specify the filename for saving, which needs to be sanitized

None
server_path str

Server path of the file. if DownloaderConfiguration.use_bucket enabled, it will be used as the save path.

None
post Post

Post object, use for logging.

None
Source code in ktoolbox/downloader/downloader.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def __init__(
        self,
        url: str,
        path: Path,
        client: httpx.AsyncClient,
        *,
        buffer_size: int = None,
        chunk_size: int = None,
        designated_filename: str = None,
        server_path: str = None,
        post: Post = None
):
    # noinspection GrazieInspection
    """
    Initialize a file downloader

    - About filename:
        1. If ``designated_filename`` parameter is set, use it.
        2. Else if ``Content-Disposition`` is set in headers, use filename from it.
        3. Else use filename from 'file' part of ``server_path``.

    :param url: Download URL
    :param path: Directory path to save the file, which needs to be sanitized
    :param client: HTTPX AsyncClient
    :param buffer_size: Number of bytes for file I/O buffer
    :param chunk_size: Number of bytes for chunk of download stream
    :param designated_filename: Manually specify the filename for saving, which needs to be sanitized
    :param server_path: Server path of the file. if ``DownloaderConfiguration.use_bucket`` enabled, \
    it will be used as the save path.
    :param post: Post object, use for logging.
    """

    self._url = self._initial_url = url
    self._path = path
    self._client = client
    self._buffer_size = buffer_size or config.downloader.buffer_size
    self._chunk_size = chunk_size or config.downloader.chunk_size
    self._designated_filename = designated_filename
    self._server_path = server_path  # /hash[:1]/hash2[1:3]/hash
    self._save_filename = designated_filename  # Prioritize the manually specified filename
    self._post = post

    self._next_subdomain_index = 1
    self._finished_lock = asyncio.Lock()
    self._stop: bool = False
cancel()

Cancel the download

It will raise asyncio.CancelledError in chunk_iterator (writing chunk to file) iteration.

Source code in ktoolbox/downloader/downloader.py
127
128
129
130
131
132
133
def cancel(self):
    """
    Cancel the download

    It will raise ``asyncio.CancelledError`` in ``chunk_iterator`` (writing chunk to file) iteration.
    """
    self._stop = True
run(*, sync_callable=None, async_callable=None, tqdm_class=None, progress=False) async

Start to download

Parameters:

Name Type Description Default
sync_callable Callable[[Downloader], Any]

Sync callable for download finished

None
async_callable Callable[[Downloader], Coroutine]

Async callable for download finished

None
tqdm_class Type[tqdm]

tqdm class to replace default tqdm.asyncio.tqdm

None
progress bool

Show progress bar

False

Returns:

Type Description
DownloaderRet[str]

DownloaderRet which contain the actual output filename

Raises:

Type Description
CancelledError

Job cancelled

Source code in ktoolbox/downloader/downloader.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
@tenacity.retry(
    stop=stop_never if config.downloader.retry_stop_never else stop_after_attempt(config.downloader.retry_times),
    wait=wait_fixed(config.downloader.retry_interval),
    retry=retry_if_result(
        lambda x: not x and x.code != RetCodeEnum.FileExisted
    ) | retry_if_exception(
        lambda x: isinstance(x, httpx.HTTPError)
    ),
    before_sleep=lambda x: logger.warning(
        generate_msg(
            f"Retrying ({x.attempt_number})",
            file=x.args[0].filename,
            post_name=x.args[0].post.title if x.args[0].post else None,
            post_id=x.args[0].post.id if x.args[0].post else None,
            message=x.outcome.result().message if not x.outcome.failed else None,
            exception=x.outcome.exception(),
            url=x.args[0].url
        )
    ),
    reraise=True
)
async def run(
        self,
        *,
        sync_callable: Callable[["Downloader"], Any] = None,
        async_callable: Callable[["Downloader"], Coroutine] = None,
        tqdm_class: Type[std_tqdm] = None,
        progress: bool = False
) -> DownloaderRet[str]:
    """
    Start to download

    :param sync_callable: Sync callable for download finished
    :param async_callable: Async callable for download finished
    :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
    :param progress: Show progress bar
    :return: ``DownloaderRet`` which contain the actual output filename
    :raise CancelledError: Job cancelled
    """
    # Get filename to check if file exists (First-time duplicate file check)
    # Check it before request to make progress more efficiency
    server_relpath = self._server_path[1:]
    server_relpath_without_params = urlparse(server_relpath).path
    server_path_filename = unquote(Path(server_relpath_without_params).name)
    # Priority order can be referenced from the constructor's documentation
    save_filepath = self._path / (self._save_filename or server_path_filename)

    # Get bucket file path
    bucket_file_path: Optional[Path] = None
    if config.downloader.use_bucket:
        bucket_file_path = config.downloader.bucket_path / server_relpath

    # Check if the file exists
    file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
    if file_existed:
        return DownloaderRet(
            code=RetCodeEnum.FileExisted,
            message=generate_msg(
                ret_msg,
                path=save_filepath
            )
        )

    tqdm_class: Type[std_tqdm] = tqdm_class or tqdm.asyncio.tqdm
    async with self.wait_lock:
        await asyncio.sleep(1 / config.downloader.tps_limit)
    async with self._finished_lock:
        temp_filepath = Path(f"{save_filepath}.{config.downloader.temp_suffix}")
        temp_size = temp_filepath.stat().st_size if temp_filepath.exists() else 0

        async with self._client.stream(
                method="GET",
                url=config.downloader.reverse_proxy.format(self._url),
                follow_redirects=True,
                timeout=config.downloader.timeout,
                headers={"Range": f"bytes={temp_size}-"}
        ) as res:  # type: httpx.Response
            try:
                subdomain_index = int(res.url.netloc.split(b".")[0][1:])
            except ValueError:
                subdomain_index = None
            if res.status_code == 403:
                if subdomain_index is not None:
                    self.succeeded_servers.discard(subdomain_index)
                    self.failure_servers.add(subdomain_index)
                # try succeeded servers first
                subdomain_index = next(iter(self.succeeded_servers), None)
                if subdomain_index is None:
                    subdomain_index = self._next_subdomain_index
                    # Update self._next_subdomain_index
                    ## index fallback to 1 when a server after failure_servers has been tried
                    if self.failure_servers and self._next_subdomain_index > max(self.failure_servers):
                        self._next_subdomain_index = 1
                        self.failure_servers.clear()
                    ## otherwise, increment the index and avoid failure_servers
                    else:
                        self._next_subdomain_index += 1
                        while self._next_subdomain_index in self.failure_servers:
                            self._next_subdomain_index += 1
                    msg = "Download failed, trying next subdomain"
                else:
                    msg = "Download failed, trying succeeded subdomains"
                new_netloc = f"n{subdomain_index}.{config.api.files_netloc}"
                self._url = str(res.url.copy_with(netloc=new_netloc.encode()))
                return DownloaderRet(
                    code=RetCodeEnum.GeneralFailure,
                    message=generate_msg(
                        msg,
                        nex_subdomain=new_netloc,
                        status_code=res.status_code,
                        filename=save_filepath
                    )
                )
            elif res.status_code != httpx.codes.PARTIAL_CONTENT:
                self._url = self._initial_url
                return DownloaderRet(
                    code=RetCodeEnum.GeneralFailure,
                    message=generate_msg(
                        "Download failed",
                        status_code=res.status_code,
                        filename=save_filepath
                    )
                )
            else:
                if subdomain_index is not None:
                    self.failure_servers.discard(subdomain_index)
                    self.succeeded_servers.add(subdomain_index)

            # Get filename for saving and check if file exists (Second-time duplicate file check)
            # Priority order can be referenced from the constructor's documentation
            self._save_filename = self._designated_filename or sanitize_filename(
                filename_from_headers(res.headers)
            ) or server_path_filename
            save_filepath = self._path / self._save_filename
            file_existed, ret_msg = duplicate_file_check(save_filepath, bucket_file_path)
            if file_existed:
                return DownloaderRet(
                    code=RetCodeEnum.FileExisted,
                    message=generate_msg(
                        ret_msg,
                        path=save_filepath
                    )
                )

            # Download
            total_size = int(range_str.split("/")[-1]) if (range_str := res.headers.get("Content-Range")) else None

            # Check file size filtering if enabled and we have the total size
            if total_size is not None and (config.job.min_file_size is not None or config.job.max_file_size is not None):
                # Check minimum size
                if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                    return DownloaderRet(
                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                        message=generate_msg(
                            f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                            path=save_filepath
                        )
                    )

                # Check maximum size  
                if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                    logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                    return DownloaderRet(
                        code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                        message=generate_msg(
                            f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                            path=save_filepath
                        )
                    )

            # If no Content-Range header, try to get size from Content-Length
            if total_size is None:
                content_length = res.headers.get("Content-Length")
                if content_length:
                    try:
                        total_size = int(content_length)
                        # Apply size filtering with Content-Length
                        if config.job.min_file_size is not None or config.job.max_file_size is not None:
                            # Check minimum size
                            if config.job.min_file_size is not None and total_size < config.job.min_file_size:
                                logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - below minimum size {config.job.min_file_size}")
                                return DownloaderRet(
                                    code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                    message=generate_msg(
                                        f"File skipped due to size filtering (size: {total_size} bytes, below minimum: {config.job.min_file_size})",
                                        path=save_filepath
                                    )
                                )

                            # Check maximum size  
                            if config.job.max_file_size is not None and total_size > config.job.max_file_size:
                                logger.debug(f"Skipping file {self._save_filename} (size: {total_size} bytes) - above maximum size {config.job.max_file_size}")
                                return DownloaderRet(
                                    code=RetCodeEnum.FileExisted,  # Use FileExisted to indicate it was skipped intentionally
                                    message=generate_msg(
                                        f"File skipped due to size filtering (size: {total_size} bytes, above maximum: {config.job.max_file_size})",
                                        path=save_filepath
                                    )
                                )
                    except ValueError:
                        # Invalid Content-Length, continue with download
                        pass
            async with aiofiles.open(str(temp_filepath), "ab", self._buffer_size) as f:
                chunk_iterator = res.aiter_bytes(self._chunk_size)
                t = tqdm_class(
                    desc=self._save_filename,
                    total=total_size,
                    initial=temp_size,
                    disable=not progress,
                    unit="B",
                    unit_scale=True
                )
                async for chunk in chunk_iterator:
                    if self._stop:
                        raise CancelledError
                    await f.write(chunk)
                    t.update(len(chunk))  # Update progress bar

        # Download finished
        if config.downloader.use_bucket:
            bucket_file_path.parent.mkdir(parents=True, exist_ok=True)
            os.link(temp_filepath, bucket_file_path)
        final_filepath = self._path / self._save_filename
        temp_filepath.rename(final_filepath)

        # Set file time from headers
        if config.downloader.keep_metadata:
            try:
                utime_from_headers(res.headers, final_filepath)
            except (OSError, ValueError, TypeError) as e:
                logger.warning(
                    generate_msg(
                        "Failed to set file time from headers",
                        file=self._save_filename,
                        exception=e
                    )
                )

        # Callbacks
        if sync_callable:
            sync_callable(self)
        if async_callable:
            await async_callable(self)

        return DownloaderRet(
            data=self._save_filename
        ) if self._save_filename else DownloaderRet(
            code=RetCodeEnum.GeneralFailure,
            message=generate_msg(
                "Download failed",
                filename=self._designated_filename
            )
        )

utils

__all__ = ['filename_from_headers', 'duplicate_file_check', 'utime_from_headers'] module-attribute

duplicate_file_check(local_file_path, bucket_file_path=None)

Check if the file existed, and link the bucket filepath to local filepath if DownloaderConfiguration.use_bucket enabled.

Parameters:

Name Type Description Default
local_file_path Path

Download target path

required
bucket_file_path Path

The bucket filepath of the local download path

None

Returns:

Type Description
Tuple[bool, Optional[str]]

(if file existed, message)

Source code in ktoolbox/downloader/utils.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def duplicate_file_check(local_file_path: Path, bucket_file_path: Path = None) -> Tuple[bool, Optional[str]]:
    """
    Check if the file existed, and link the bucket filepath to local filepath \
    if ``DownloaderConfiguration.use_bucket`` enabled.

    :param local_file_path: Download target path
    :param bucket_file_path: The bucket filepath of the local download path
    :return: ``(if file existed, message)``
    """
    duplicate_check_path = bucket_file_path or local_file_path
    if duplicate_check_path.is_file():
        if config.downloader.use_bucket:
            ret_msg = "Download file already exists in both bucket and local, skipping"
            if not local_file_path.is_file():
                ret_msg = "Download file already exists in bucket, linking to local path"
                os.link(bucket_file_path, local_file_path)
        else:
            ret_msg = "Download file already exists, skipping"
        return True, ret_msg
    else:
        return False, None

filename_from_headers(headers)

Get file name from headers.

Parse from Content-Disposition.

  • Example:

    filename_from_headers({'Content-Disposition': 'attachment;filename*=utf-8''README%2Emd;filename="README.md"'})
    

  • Return:

    README.md
    

Parameters:

Name Type Description Default
headers Dict[str, str]

HTTP headers

required

Returns:

Type Description
Optional[str]

File name

Source code in ktoolbox/downloader/utils.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def filename_from_headers(headers: Dict[str, str]) -> Optional[str]:
    """
    Get file name from headers.

    Parse from ``Content-Disposition``.

    - Example:
    ```
    filename_from_headers({'Content-Disposition': 'attachment;filename*=utf-8\'\'README%2Emd;filename="README.md"'})
    ```

    - Return:
    ```
    README.md
    ```

    :param headers: HTTP headers
    :return: File name
    """
    if not (disposition := headers.get("Content-Disposition")):
        if not (disposition := headers.get("content-disposition")):
            return None
    options = parse_header(disposition)  # alternative: `parse_header` in `utils.py`
    if filename := options.get("filename*"):
        if len(name_with_charset := filename.split("''")) == 2:
            charset, name = name_with_charset
            return urllib.parse.unquote(name, charset)
    if filename := options.get("filename"):
        return urllib.parse.unquote(filename, config.downloader.encoding)
    return None

parse_header(line)

Alternative resolution for parsing header line.

Apply when cgi.parse_header is unable to use due to the deprecation of cgi module.

https://peps.python.org/pep-0594/#cgi

  • Example:

    parse_header("text/html; charset=utf-8")
    

  • Return:

    {'text/html': None, 'charset': 'utf-8'}
    

Parameters:

Name Type Description Default
line str

Header line

required

Returns:

Type Description
Dict[str, Optional[str]]

Dict of header line

Source code in ktoolbox/downloader/utils.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def parse_header(line: str) -> Dict[str, Optional[str]]:
    """
    Alternative resolution for parsing header line.

    Apply when ``cgi.parse_header`` is unable to use due to the deprecation of `cgi` module.

    https://peps.python.org/pep-0594/#cgi

    - Example:
    ```
    parse_header("text/html; charset=utf-8")
    ```

    - Return:
    ```
    {'text/html': None, 'charset': 'utf-8'}
    ```

    :param line: Header line
    :return: Dict of header line
    """
    dict_value: Dict[str, Optional[str]] = {}
    for item in line.split(";"):
        if len(pair := item.split("=")) == 1:
            dict_value[pair[0]] = None
        elif len(pair) == 2:
            key, value = pair
            dict_value.setdefault(key, value)
    return dict_value

utime_from_headers(headers, path)

Run os.utime on specific file using Last-Modified or Date in HTTP headers.

Parameters:

Name Type Description Default
headers Dict[str, str]

HTTP Headers

required
path Union[Path, str]

File path

required
Source code in ktoolbox/downloader/utils.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def utime_from_headers(headers: Dict[str, str], path: Union[Path, str]) -> Optional[Exception]:
    """
    Run ``os.utime`` on specific file using ``Last-Modified`` or ``Date`` in HTTP headers.

    :param headers: HTTP Headers
    :param path: File path
    :raise: OSError, ValueError, TypeError
    """
    # Set file times using Last-Modified and Date headers from the response
    last_modified = headers.get("Last-Modified")
    date_header = headers.get("Date")
    # Prefer Last-Modified for modification time
    mtime = email.utils.parsedate_to_datetime(last_modified).timestamp() if last_modified else None
    # Use Date for creation time
    ctime = email.utils.parsedate_to_datetime(date_header).timestamp() if date_header else None
    # Set times if available
    if mtime or ctime:
        atime = mtime or ctime  # Access time can be the same as modification time
        os.utime(path, (atime, mtime or ctime))

editor

__all__ = ['EditWithSignalWidget', 'CascadingBoxes', 'run_config_editor'] module-attribute

default_config = Configuration(_env_file='') module-attribute

default_config_envs = set(dump_envs(default_config)) module-attribute

initial_envs = set(dump_envs(config)) module-attribute

menu_top = menu('KToolBox Configuration Editor', [sub_menu('Edit', [sub_menu('API', model_to_widgets(config.api)), sub_menu('Downloader', model_to_widgets(config.downloader)), sub_menu('Job', model_to_widgets(config.job)), sub_menu('Logger', model_to_widgets(config.logger)), urwid.Divider()] + list(model_to_widgets(config, ['ssl_verify', 'json_dump_indent', 'use_uvloop']))), urwid.Divider(), menu_option(urwid.Button('JSON Preview', lambda x: top.open_box(sub_menu_with_menu_widget('JSON Preview', [urwid.Text(config.model_dump_json(indent=4))])[1]))), menu_option(urwid.Button('JSON Preview (Python Mode)', lambda x: top.open_box(sub_menu_with_menu_widget('JSON Preview (Python Serialize Mode)', [urwid.Text(pprint.pformat(config.model_dump(mode='python'), sort_dicts=False))])[1]))), menu_option(urwid.Button('DotEnv Preview (.env / prod.env)', lambda x: top.open_box(sub_menu_with_menu_widget('DotEnv Preview (.env / prod.env)', [urwid.Text('\n'.join(dump_modified_envs(dump_envs(config))) or 'Same as the default configuration, DotEnv will be left empty.')])[1]))), urwid.Divider(), sub_menu('Save', [menu_option(urwid.Button("Save to '.env' / 'prod.env' file", on_save_dotenv))]), urwid.Divider(bottom=2), menu_option(urwid.Button('Help', lambda x: webbrowser.open('https://ktoolbox.readthedocs.io/latest/configuration/guide/'))), menu_option(urwid.Button('Exit', exit_program)), urwid.Divider(bottom=2), urwid.Text('For detailed information, please refer to https://ktoolbox.readthedocs.io', align=urwid.CENTER), urwid.Divider(), urwid.Text(__version__, align=urwid.CENTER)]) module-attribute

top = CascadingBoxes(menu_top) module-attribute

CascadingBoxes

Bases: WidgetPlaceholder

Source code in ktoolbox/editor.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
class CascadingBoxes(urwid.WidgetPlaceholder):
    max_box_levels = 4

    def __init__(self, box: urwid.Widget) -> None:
        super().__init__(urwid.SolidFill("/"))
        self.box_level = 0
        self.open_box(box)

    def open_box(self, box: urwid.Widget):
        self.original_widget = urwid.Overlay(
            urwid.LineBox(
                urwid.Padding(box, align=urwid.CENTER, left=2, right=2)
            ),
            self.original_widget,
            align=urwid.CENTER,
            width=(urwid.RELATIVE, 80),
            valign=urwid.MIDDLE,
            height=(urwid.RELATIVE, 80),
            min_width=24,
            min_height=8,
            left=self.box_level * 3,
            right=(self.max_box_levels - self.box_level - 1) * 3,
            top=self.box_level * 2,
            bottom=(self.max_box_levels - self.box_level - 1) * 2,
        )
        self.box_level += 1

    def back(self) -> Optional[NoReturn]:
        self.original_widget = self.original_widget[0]
        self.box_level -= 1
        return None

    def exit(self):
        raise urwid.ExitMainLoop()

    def keypress(self, size, key: str) -> Union[str, NoReturn, None]:
        if key == "esc":
            if self.box_level > 1:
                self.back()
            else:
                exit_program()
        return super().keypress(size, key)

box_level = 0 instance-attribute

max_box_levels = 4 class-attribute instance-attribute

__init__(box)

Source code in ktoolbox/editor.py
50
51
52
53
def __init__(self, box: urwid.Widget) -> None:
    super().__init__(urwid.SolidFill("/"))
    self.box_level = 0
    self.open_box(box)

back()

Source code in ktoolbox/editor.py
74
75
76
77
def back(self) -> Optional[NoReturn]:
    self.original_widget = self.original_widget[0]
    self.box_level -= 1
    return None

exit()

Source code in ktoolbox/editor.py
79
80
def exit(self):
    raise urwid.ExitMainLoop()

keypress(size, key)

Source code in ktoolbox/editor.py
82
83
84
85
86
87
88
def keypress(self, size, key: str) -> Union[str, NoReturn, None]:
    if key == "esc":
        if self.box_level > 1:
            self.back()
        else:
            exit_program()
    return super().keypress(size, key)

open_box(box)

Source code in ktoolbox/editor.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def open_box(self, box: urwid.Widget):
    self.original_widget = urwid.Overlay(
        urwid.LineBox(
            urwid.Padding(box, align=urwid.CENTER, left=2, right=2)
        ),
        self.original_widget,
        align=urwid.CENTER,
        width=(urwid.RELATIVE, 80),
        valign=urwid.MIDDLE,
        height=(urwid.RELATIVE, 80),
        min_width=24,
        min_height=8,
        left=self.box_level * 3,
        right=(self.max_box_levels - self.box_level - 1) * 3,
        top=self.box_level * 2,
        bottom=(self.max_box_levels - self.box_level - 1) * 2,
    )
    self.box_level += 1

EditWithSignalWidget

Bases: Edit

Custom urwid.Edit, support callback when changed.

Source code in ktoolbox/editor.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class EditWithSignalWidget(urwid.Edit):
    """
    Custom ``urwid.Edit``, support callback when changed.
    """

    def __init__(
            self,
            *args,
            on_state_change: Optional[Callable[[EditWithSignalWidget, _T], Any]],
            user_data: Optional[_T],
            **kwargs
    ) -> None:
        self.__on_state_change = on_state_change
        self.__user_data = user_data
        super().__init__(*args, **kwargs)

    def keypress(self, size: Tuple[int], key: str) -> Union[str, None]:
        ret = super().keypress(size, key)
        self.__on_state_change(self, self.__user_data)
        return ret

__on_state_change = on_state_change instance-attribute

__user_data = user_data instance-attribute

__init__(*args, on_state_change, user_data, **kwargs)

Source code in ktoolbox/editor.py
30
31
32
33
34
35
36
37
38
39
def __init__(
        self,
        *args,
        on_state_change: Optional[Callable[[EditWithSignalWidget, _T], Any]],
        user_data: Optional[_T],
        **kwargs
) -> None:
    self.__on_state_change = on_state_change
    self.__user_data = user_data
    super().__init__(*args, **kwargs)

keypress(size, key)

Source code in ktoolbox/editor.py
41
42
43
44
def keypress(self, size: Tuple[int], key: str) -> Union[str, None]:
    ret = super().keypress(size, key)
    self.__on_state_change(self, self.__user_data)
    return ret

dump_envs(model)

Dump environment variables, with no Env prefix

Source code in ktoolbox/editor.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def dump_envs(model: BaseModel) -> List[str]:
    """Dump environment variables, with no Env prefix"""
    envs = []
    for field in model.model_fields:
        value = model.__getattribute__(field)
        if isinstance(value, BaseModel):
            for env in dump_envs(value):
                envs.append(f"{field.upper()}__{env}")
        else:
            envs.append(
                f"{field.upper()}="
                f"{json.dumps(list(value)) if isinstance(value, (list, set, tuple, dict)) else model.__pydantic_serializer__.to_python(value)}"
            )
    return envs

dump_modified_envs(envs)

Dump modified environment variables, with Env prefix

Parameters:

Name Type Description Default
envs List[str]

Current Envs

required
Source code in ktoolbox/editor.py
107
108
109
110
111
112
113
114
115
def dump_modified_envs(envs: List[str]) -> List[str]:
    """
    Dump modified environment variables, with Env prefix

    :param envs: Current Envs
    """
    return sorted([
        f"KTOOLBOX_{env}" for env in set(envs) - default_config_envs
    ])

exit_program(_=None)

Source code in ktoolbox/editor.py
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def exit_program(_: urwid.Button = None) -> Optional[NoReturn]:
    if has_changed():
        top.open_box(
            urwid.Filler(
                urwid.Pile([
                    urwid.Text("Any unsaved changes will be lost. Are you sure you want to EXIT?"),
                    urwid.Divider(),
                    menu_option(urwid.Button(
                        "NO", lambda x: top.back()
                    )),
                    menu_option(urwid.Button(
                        "YES", lambda x: top.exit()
                    )),
                ])
            )
        )
    else:
        top.exit()

get_item(model, field, get_value_callback, widget_list, list_walker)

Source code in ktoolbox/editor.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def get_item(
        model: BaseModel,
        field: str,
        get_value_callback: Callable[[EditWithSignalWidget], Optional[Any]],
        widget_list: List[urwid.WidgetPlaceholder],
        list_walker: urwid.ListWalker
) -> Callable[[str], urwid.WidgetPlaceholder]:
    def inner(edit_text: str = ""):
        item = urwid.WidgetPlaceholder(urwid.Widget())
        edit_widget = EditWithSignalWidget(
            edit_text=edit_text,
            align=urwid.LEFT,
            on_state_change=on_item_changed,
            user_data=(model, field, get_value_callback, widget_list, item)
        )
        columns_widget = urwid.Columns([
            edit_widget,
            urwid.Divider(),
            urwid.Divider(),
            urwid.Button(
                "Remove -",
                on_remove_item,
                (model, field, widget_list, item, list_walker)
            )
        ])
        item.original_widget = columns_widget
        return item

    return inner

get_value(item_types)

Source code in ktoolbox/editor.py
326
327
328
329
330
331
332
333
334
335
def get_value(item_types: Sequence[type]) -> Callable[[EditWithSignalWidget], Optional[Any]]:
    def inner(w: EditWithSignalWidget = None):
        for t in item_types:
            try:
                return t(w.get_edit_text()) if w is not None else t()
            except ValueError:
                continue
        return None

    return inner

has_changed()

Source code in ktoolbox/editor.py
133
134
def has_changed() -> bool:
    return bool(set(dump_envs(config)) - initial_envs)

menu(title, choices)

Source code in ktoolbox/editor.py
170
171
172
173
174
175
def menu(
        title: Union[str, Tuple[Hashable, str], List[Union[str, Tuple[Hashable, str]]]],
        choices: Iterable[urwid.Widget],
) -> urwid.ListBox:
    body = [urwid.Text(title, align=urwid.CENTER), urwid.Divider(), *choices]
    return urwid.ListBox(urwid.SimpleFocusListWalker(body))

menu_option(widget)

Return focus_map="reversed" Widget

Source code in ktoolbox/editor.py
137
138
139
def menu_option(widget: urwid.Widget) -> urwid.AttrMap:
    """Return ``focus_map="reversed"`` Widget"""
    return urwid.AttrMap(widget, None, focus_map="reversed")

model_to_widgets(model, fields=None)

Generate urwid widgets for Pydantic model

Parameters:

Name Type Description Default
model BaseModel

Pydantic model

required
fields Iterable[str]

Only generate for these fields, default to all fields.

None
Source code in ktoolbox/editor.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
def model_to_widgets(model: BaseModel, fields: Iterable[str] = None) -> Generator[urwid.Widget, Any, None]:
    """
    Generate urwid widgets for Pydantic model

    :param model: Pydantic model
    :param fields: Only generate for these fields, default to all fields.
    """
    for field, field_info in model.model_fields.items():
        if fields is not None and field not in fields:
            continue
        origin_annotation = getattr(field_info.annotation, '__origin__', None)
        annotation = get_args(field_info.annotation) if origin_annotation is Union else [field_info.annotation]

        if origin_annotation is Literal:
            radio_buttons = []
            for value in get_args(field_info.annotation):
                menu_option(urwid.RadioButton(
                    radio_buttons,
                    str(value),
                    model.__getattribute__(field) == value,
                    on_radio_button_change,
                    (model, field, value)
                ))
            yield sub_menu(field, radio_buttons)
        elif bool in annotation:
            yield menu_option(urwid.CheckBox(
                field,
                model.__getattribute__(field),
                on_state_change=on_checkbox_change,
                user_data=(model, field)
            ))
        elif any(map(lambda x: x in annotation, [str, int, float, Path])):
            yield menu_option(urwid.Columns([
                urwid.Text(f"{' ' * 4}{field}", align=urwid.LEFT),
                EditWithSignalWidget(
                    edit_text=str(model.__getattribute__(field)),
                    align=urwid.RIGHT,
                    on_state_change=on_edit_change,
                    user_data=(model, field, annotation)
                )
            ]))
        elif origin_annotation in [list, set, tuple]:
            item_types = get_args(field_info.annotation)
            widget_list = []
            widget, menu_widget = sub_menu_with_menu_widget(field, [])
            list_walker: urwid.SimpleFocusListWalker = menu_widget.body  # type: ignore
            widget_list.extend([
                get_item(model, field, get_value(item_types), widget_list, list_walker)
                (str(existed)) for existed in model.__getattribute__(field)
            ])
            # noinspection PyTypeChecker
            option_widget = menu_option(
                urwid.Button(
                    "Add +",
                    on_add_item,
                    (
                        model,
                        field,
                        get_value(item_types),
                        widget_list,
                        get_item(model, field, get_value(item_types), widget_list, list_walker),
                        list_walker
                    )
                )
            )
            list_walker.extend([urwid.Divider(), option_widget, urwid.Divider()])
            list_walker.extend(widget_list)
            yield widget
        elif isinstance(field_info.annotation, ModelMetaclass):
            yield sub_menu(field, model_to_widgets(model.__getattribute__(field)))
        else:
            yield sub_menu(
                field,
                [urwid.Text(
                    f"This option ({repr(field_info.annotation)}) is currently not supported for editing in "
                    "the graphical interface; please edit it in the '.env' or 'prod.env' file in the working directory."
                )]
            )
    yield urwid.Divider()
    yield menu_option(urwid.Button(
        f"View Document: {type(model).__name__}", lambda x: webbrowser.open(
            f"https://ktoolbox.readthedocs.io/latest/configuration/reference/#ktoolbox.configuration.{type(model).__name__}"
        )
    ))

on_add_item(_, user_data)

Call when add item to List/Set/Tuple field

Parameters:

Name Type Description Default
_ Button

Widget

required
user_data Tuple[BaseModel, str, Callable[[], Optional[Any]], Union[List[_T], List[None]], Callable[[], _T], Union[MonitoredFocusList[_T], ListWalker]]

(model, field, () -> (default value), item list, () -> (new item), menu widget)

required
Source code in ktoolbox/editor.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def on_add_item(
        _: urwid.Button,
        user_data: Tuple[
            BaseModel,
            str,
            Callable[[], Optional[Any]],
            Union[List[_T], List[None]],
            Callable[[], _T],
            Union[urwid.MonitoredFocusList[_T], urwid.ListWalker]
        ]
):
    """
    Call when add item to List/Set/Tuple field

    :param _: Widget
    :param user_data: (model, field, () -> (default value), item list, () -> (new item), menu widget)
    """
    model, field, get_default, item_list, get_new_widget, widget = user_data
    values = list(model.__getattribute__(field))
    values.append(get_default())
    model.__setattr__(field, values)
    new_widget = get_new_widget()
    item_list.append(new_widget)
    widget.append(new_widget)

on_checkbox_change(_, state, user_data)

Source code in ktoolbox/editor.py
184
185
186
def on_checkbox_change(_: urwid.CheckBox, state: bool, user_data: Tuple[BaseModel, str]):
    model, field = user_data
    model.__setattr__(field, state)

on_edit_change(widget, user_data)

Source code in ktoolbox/editor.py
263
264
265
266
267
268
269
270
271
def on_edit_change(widget: urwid.EditWithSignalWidget, user_data: Tuple[BaseModel, str, Iterable[type]]):
    model, field, annotation = user_data
    for field_type in annotation:
        try:
            model.__setattr__(field, field_type(widget.get_edit_text()))
        except ValueError:
            continue
        else:
            break

on_item_changed(widget, user_data)

Call when List/Set/Tuple field item changed

Parameters:

Name Type Description Default
widget EditWithSignalWidget

Widget

required
user_data Tuple[BaseModel, str, Callable[[EditWithSignalWidget], Any], Union[List[_T], List[None]], _T]

(model, field, (edit widget) -> (value), item list, item)

required
Source code in ktoolbox/editor.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def on_item_changed(
        widget: EditWithSignalWidget,
        user_data: Tuple[
            BaseModel,
            str,
            Callable[[EditWithSignalWidget], Any],
            Union[List[_T], List[None]],
            _T
        ]
):
    """
    Call when List/Set/Tuple field item changed

    :param widget: Widget
    :param user_data: (model, field, (edit widget) -> (value), item list, item)
    """
    model, field, get_value_callback, item_list, item = user_data
    values = list(model.__getattribute__(field))
    index = item_list.index(item)
    values[index] = get_value_callback(widget)
    model.__setattr__(field, values)

on_radio_button_change(_, state, user_data)

Source code in ktoolbox/editor.py
178
179
180
181
def on_radio_button_change(_: urwid.RadioButton, state: bool, user_data: Tuple[BaseModel, str, Any]):
    if state:
        model, field, value = user_data
        model.__setattr__(field, value)

on_remove_item(_, user_data)

Call when remove item to List/Set/Tuple field

Parameters:

Name Type Description Default
_ Button

Widget

required
user_data Tuple[BaseModel, str, Union[List[_T], List[None]], _T, Union[MonitoredFocusList[_T], ListWalker]]

(model, field, item list, item, menu widget)

required
Source code in ktoolbox/editor.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def on_remove_item(
        _: urwid.Button,
        user_data: Tuple[
            BaseModel,
            str,
            Union[List[_T], List[None]],
            _T,
            Union[urwid.MonitoredFocusList[_T], urwid.ListWalker]
        ]
):
    """
    Call when remove item to List/Set/Tuple field

    :param _: Widget
    :param user_data: (model, field, item list, item, menu widget)
    """
    model, field, item_list, item, widget = user_data
    values = list(model.__getattribute__(field))
    index = item_list.index(item)
    values.pop(index)
    model.__setattr__(field, values)
    item_list.pop(index)
    widget.remove(item)

on_save_dotenv(_)

Source code in ktoolbox/editor.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
def on_save_dotenv(_: urwid.Button):
    if has_changed():
        pile = urwid.Pile([
            urwid.Text("Your changes have been saved."),
            urwid.Divider(),
            menu_option(urwid.Button(
                "OK", lambda x: top.back()
            )),
        ])
        try:
            save_dotenv()
        except Exception as e:
            pile = urwid.Pile([
                urwid.Text("Unable to save changes!"),
                urwid.Divider(),
                urwid.Text(f"{type(e).__name__}: {e}"),
                urwid.Divider(),
                menu_option(urwid.Button(
                    "OK", lambda x: top.back()
                )),
            ])
    else:
        pile = urwid.Pile([
            urwid.Text("Nothing has changed, no need to save."),
            urwid.Divider(),
            menu_option(urwid.Button(
                "OK", lambda x: top.back()
            )),
        ])
    top.open_box(urwid.Filler(pile))

run_config_editor()

Source code in ktoolbox/editor.py
455
456
def run_config_editor():
    urwid.MainLoop(top, palette=[("reversed", "standout", "")]).run()

save_dotenv()

Source code in ktoolbox/editor.py
118
119
120
121
122
123
124
125
126
127
128
129
130
def save_dotenv():
    current_envs = dump_envs(config)
    envs_to_dump = "\n".join(dump_modified_envs(current_envs))
    prod_dotenv_path = Path("prod.env")
    dotenv_path = Path(".env")
    if prod_dotenv_path.is_file():
        with prod_dotenv_path.open("w") as f:
            f.write(envs_to_dump)
    else:
        with dotenv_path.open("w") as f:
            f.write(envs_to_dump)
    initial_envs.clear()
    initial_envs.update(current_envs)

sub_menu(caption, choices)

Source code in ktoolbox/editor.py
162
163
164
165
166
167
def sub_menu(
        caption: Union[str, Tuple[Hashable, str], List[Union[str, Tuple[Hashable, str]]]],
        choices: Iterable[urwid.Widget],
) -> urwid.AttrMap[urwid.Button]:
    button, _ = sub_menu_with_menu_widget(caption, choices)
    return button

sub_menu_with_menu_widget(caption, choices)

Source code in ktoolbox/editor.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def sub_menu_with_menu_widget(
        caption: Union[str, Tuple[Hashable, str], List[Union[str, Tuple[Hashable, str]]]],
        choices: Iterable[urwid.Widget],
) -> Tuple[urwid.AttrMap[urwid.Button], urwid.ListBox]:
    contents = menu(
        caption,
        list(choices) + [
            urwid.Divider(bottom=2),
            menu_option(urwid.Button(
                "Back", lambda x: top.back()
            ))
        ]
    )

    return menu_option(urwid.Button(
        [caption, "..."],
        lambda x: top.open_box(contents)
    )), contents

job

CreatorIndices

Bases: BaseKToolBoxData

Creator directory indices model

Record the path of each downloaded post.

Source code in ktoolbox/job/model.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class CreatorIndices(BaseKToolBoxData):
    """
    Creator directory indices model

    Record the path of each downloaded post.
    """
    creator_id: str
    """Creator ID"""
    service: str
    """Creator service"""
    posts: Dict[str, Post] = {}
    """All posts, ``id`` -> ``Post``"""
    posts_path: Dict[str, Path] = {}
    """Posts and their path, ``id`` -> ``Path``"""

creator_id: str instance-attribute

Creator ID

posts: Dict[str, Post] = {} class-attribute instance-attribute

All posts, id -> Post

posts_path: Dict[str, Path] = {} class-attribute instance-attribute

Posts and their path, id -> Path

service: str instance-attribute

Creator service

Job

Bases: BaseModel

Download job model

Source code in ktoolbox/job/model.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class Job(BaseModel):
    """
    Download job model
    """
    path: Path
    """Directory path to save the file"""
    alt_filename: Optional[str] = None
    """Use this name if no filename given by the server"""
    server_path: str
    """The `path` part of download URL"""
    type: Optional[Literal[PostFileTypeEnum.Attachment, PostFileTypeEnum.File]] = None
    """Target file type"""
    post: Optional[Post] = None
    """Post object"""

alt_filename: Optional[str] = None class-attribute instance-attribute

Use this name if no filename given by the server

path: Path instance-attribute

Directory path to save the file

post: Optional[Post] = None class-attribute instance-attribute

Post object

server_path: str instance-attribute

The path part of download URL

type: Optional[Literal[PostFileTypeEnum.Attachment, PostFileTypeEnum.File]] = None class-attribute instance-attribute

Target file type

JobListData

Bases: BaseKToolBoxData

Download job list data model

For saving the list of jobs to disk.

Source code in ktoolbox/job/model.py
43
44
45
46
47
48
49
50
class JobListData(BaseKToolBoxData):
    """
    Download job list data model

    For saving the list of jobs to disk.
    """
    jobs: List[Job] = []
    """All jobs"""

jobs: List[Job] = [] class-attribute instance-attribute

All jobs

JobRunner

Source code in ktoolbox/job/runner.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
class JobRunner:
    def __init__(self, *, job_list: List[Job] = None, tqdm_class: std_tqdm = None, progress: bool = True,
                 centralized_progress: bool = True, use_colors: bool = True, use_emojis: bool = True):
        """
        Create a job runner

        :param job_list: Jobs to initial ``self._job_queue``
        :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
        :param progress: Show progress bar
        :param centralized_progress: Use centralized progress manager to prevent display chaos
        :param use_colors: Enable colorful progress bars (requires ANSI terminal support)
        :param use_emojis: Enable emoji indicators in progress bars
        """
        job_list = job_list or []
        self._job_queue: asyncio.Queue[Job] = asyncio.Queue()
        for job in job_list:
            self._job_queue.put_nowait(job)

        # Initialize progress management
        self._progress = progress
        self._centralized_progress = centralized_progress and progress

        if self._centralized_progress:
            # Use centralized progress manager with enhanced visuals
            self._progress_manager = ProgressManager(
                max_workers=config.job.count,
                use_colors=use_colors,
                use_emojis=use_emojis
            )
            self._tqdm_class = tqdm_class or create_managed_tqdm_class(self._progress_manager)
        else:
            # Use traditional tqdm
            self._progress_manager = None
            self._tqdm_class = tqdm_class

        self._downloaders_with_task: Dict[Downloader, asyncio.Task] = {}
        self._concurrent_tasks: Set[asyncio.Task] = set()
        self._lock = asyncio.Lock()
        self._total_jobs_count = len(job_list)

    @property
    def finished(self):
        """
        Check if all jobs finished

        :return: ``False`` if **in process**, ``False`` otherwise
        """
        return not self._lock.locked()

    @cached_property
    def downloaders(self):
        """Get downloaders with task"""
        return MappingProxyType(self._downloaders_with_task)

    @property
    def waiting_size(self) -> int:
        """Get the number of jobs waiting to be processed"""
        return self._job_queue.qsize()

    @property
    def done_size(self) -> int:
        """Get the number of jobs that done"""
        size = 0
        for downloader, task in self._downloaders_with_task.items():
            if downloader.finished or task.done():
                size += 1
        return size

    @property
    def processing_size(self) -> int:
        """Get the number of jobs that in process"""
        return len(self._downloaders_with_task) - self.done_size

    async def processor(self) -> int:
        """
        Process each job in ``self._job_queue``

        :return: Number of jobs that failed
        """
        failed_num = 0
        async with httpx.AsyncClient(
                verify=config.ssl_verify,
                cookies={"session": config.api.session_key} if config.api.session_key else None
        ) as client:
            while not self._job_queue.empty():
                job = await self._job_queue.get()

                # Create downloader
                url_parts = [config.downloader.scheme, config.api.files_netloc, job.server_path, '', '', '']
                url = str(urlunparse(url_parts))
                downloader = Downloader(
                    url=url,
                    path=job.path,
                    client=client,
                    designated_filename=job.alt_filename,
                    server_path=job.server_path,
                    post=job.post
                )

                # Create task
                task = asyncio.create_task(
                    downloader.run(
                        tqdm_class=self._tqdm_class,
                        progress=self._progress
                    )
                )
                self._downloaders_with_task[downloader] = task
                # task.add_done_callback(lambda _: self._downloaders_with_task.pop(downloader))
                #   Delete this for counting finished job tasks

                # Run task
                task_done_set, _ = await asyncio.wait([task], return_when=asyncio.FIRST_EXCEPTION)
                task_done = task_done_set.pop()
                try:
                    exception = task_done.exception()
                except CancelledError as e:
                    exception = e
                if not exception:  # raise Exception when cancelled or other exceptions
                    ret = task_done.result()
                    if ret.code == RetCodeEnum.FileExisted:
                        logger.debug(ret.message)
                        # Treat file existed as successful download but mark as existed
                        if self._progress_manager:
                            # Increment existed count atomically and update completed based on current done_size
                            try:
                                self._progress_manager.increment_existed(1)
                            except AttributeError:
                                # Fallback if older ProgressManager doesn't have increment_existed
                                self._progress_manager.update_job_progress(
                                    existed=self._progress_manager._existed_jobs + 1
                                )
                            self._progress_manager.update_job_progress(
                                completed=self.done_size
                            )
                    elif ret.code != RetCodeEnum.Success:
                        logger.error(ret.message)
                        failed_num += 1
                        # Update progress manager with failed job
                        if self._progress_manager:
                            self._progress_manager.update_job_progress(
                                failed=failed_num
                            )
                    else:
                        # Update progress manager with completed job
                        if self._progress_manager:
                            self._progress_manager.update_job_progress(
                                completed=self.done_size
                            )
                elif isinstance(exception, CancelledError):
                    logger.warning(
                        generate_msg(
                            "Download cancelled",
                            filename=job.alt_filename
                        )
                    )
                else:
                    logger.error(
                        generate_msg(
                            "Download failed",
                            filename=job.alt_filename,
                            exception=exception
                        )
                    )
                    failed_num += 1
                    # Update progress manager with failed job
                    if self._progress_manager:
                        self._progress_manager.update_job_progress(
                            failed=failed_num
                        )
                self._job_queue.task_done()
        await self._job_queue.join()
        return failed_num

    async def _watch_status(self):
        """
        Watch running, completed, failed jobs
        """
        try:
            while not self._job_queue.empty():
                await asyncio.sleep(30)
                existed = self._progress_manager._existed_jobs if self._progress_manager else 0
                total = (self.waiting_size + self.processing_size + self.done_size)
                percent = (self.done_size / total) * 100 if total > 0 else 0
                logger.info(
                    f"Waiting: {self.waiting_size} / "
                    f"Running: {self.processing_size} / "
                    f"Completed: {self.done_size} "
                    f"({percent:.2f}%) | Existed: {existed}"
                )
        except asyncio.CancelledError:
            # Exit promptly when cancelled to allow fast shutdown
            return

    async def start(self) -> int:
        """
        Start processing jobs concurrently

        It will **Block** until other call of ``self.start()`` method finished

        :return: Number of jobs that failed
        """
        failed_num = 0

        # Initialize progress manager if using centralized progress
        if self._progress_manager:
            self._progress_manager.set_job_totals(self._total_jobs_count)
            self._progress_manager.start_display()
            # Setup logger integration to work with progress display
            setup_logger_for_progress(self._progress_manager)

        async with self._lock:
            self._concurrent_tasks.clear()
            for _ in range(config.job.count):
                task = asyncio.create_task(self.processor())
                self._concurrent_tasks.add(task)
                task.add_done_callback(self._concurrent_tasks.discard)

            # Start background display update if using centralized progress
            display_task = None
            if self._progress_manager:
                display_task = asyncio.create_task(self._update_display_loop())

            # Start watcher as a background task so we can cancel it promptly when downloads finish
            watch_task = None
            if self._progress_manager:
                watch_task = asyncio.create_task(self._watch_status())

            # Wait for all concurrent processor tasks to finish
            task_done_set, _ = await asyncio.wait(self._concurrent_tasks)

            # Cancel watcher promptly to avoid waiting for its sleep interval
            if watch_task:
                watch_task.cancel()
                try:
                    await watch_task
                except asyncio.CancelledError:
                    pass

            if display_task:
                display_task.cancel()
                try:
                    await display_task
                except asyncio.CancelledError:
                    pass

            for task in task_done_set:
                try:
                    failed_num += task.result()
                except CancelledError:
                    pass

            # Clean up progress manager
            if self._progress_manager:
                self._progress_manager.stop_display()
                # Remove logger integration
                setup_logger_for_progress(None)

        if failed_num:
            logger.warning(f"{failed_num} jobs failed, download finished")
        else:
            logger.success("All jobs in queue finished")
        return failed_num

    async def _update_display_loop(self):
        """Background task to update the progress display"""
        try:
            while True:
                if self._progress_manager:
                    self._progress_manager.update_display()
                await asyncio.sleep(0.1)  # Update 10 times per second
        except asyncio.CancelledError:
            pass

    async def add_jobs(self, *jobs: Job):
        """Add jobs to ``self._job_queue``"""
        for job in jobs:
            await self._job_queue.put(job)

        # Update total job count for progress tracking
        self._total_jobs_count += len(jobs)
        if self._progress_manager:
            self._progress_manager.set_job_totals(self._total_jobs_count)

    @staticmethod
    async def _force_cancel(target: asyncio.Task, wait_time: float = None) -> bool:
        """
        Force cancel ``asyncio.Task`` after ``wait_time`` seconds

        :param target: Target task
        :param wait_time: Seconds to wait before cancel (``0`` for skip one event loop run cycle)
        :return: Whether cancelled successfully
        """
        if wait_time is not None:
            await asyncio.sleep(wait_time)
        return target.cancel()

    async def cancel_downloader(self, target: Downloader) -> bool:
        """
        Cancel downloader

        :return: Whether cancelled successfully
        """
        task = self._downloaders_with_task[target]
        if not task.done():
            target.cancel()
            return await self._force_cancel(task, 0) or task.done()
        return True

done_size: int property

Get the number of jobs that done

downloaders cached property

Get downloaders with task

finished property

Check if all jobs finished

Returns:

Type Description

False if in process, False otherwise

processing_size: int property

Get the number of jobs that in process

waiting_size: int property

Get the number of jobs waiting to be processed

__init__(*, job_list=None, tqdm_class=None, progress=True, centralized_progress=True, use_colors=True, use_emojis=True)

Create a job runner

Parameters:

Name Type Description Default
job_list List[Job]

Jobs to initial self._job_queue

None
tqdm_class tqdm

tqdm class to replace default tqdm.asyncio.tqdm

None
progress bool

Show progress bar

True
centralized_progress bool

Use centralized progress manager to prevent display chaos

True
use_colors bool

Enable colorful progress bars (requires ANSI terminal support)

True
use_emojis bool

Enable emoji indicators in progress bars

True
Source code in ktoolbox/job/runner.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, *, job_list: List[Job] = None, tqdm_class: std_tqdm = None, progress: bool = True,
             centralized_progress: bool = True, use_colors: bool = True, use_emojis: bool = True):
    """
    Create a job runner

    :param job_list: Jobs to initial ``self._job_queue``
    :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
    :param progress: Show progress bar
    :param centralized_progress: Use centralized progress manager to prevent display chaos
    :param use_colors: Enable colorful progress bars (requires ANSI terminal support)
    :param use_emojis: Enable emoji indicators in progress bars
    """
    job_list = job_list or []
    self._job_queue: asyncio.Queue[Job] = asyncio.Queue()
    for job in job_list:
        self._job_queue.put_nowait(job)

    # Initialize progress management
    self._progress = progress
    self._centralized_progress = centralized_progress and progress

    if self._centralized_progress:
        # Use centralized progress manager with enhanced visuals
        self._progress_manager = ProgressManager(
            max_workers=config.job.count,
            use_colors=use_colors,
            use_emojis=use_emojis
        )
        self._tqdm_class = tqdm_class or create_managed_tqdm_class(self._progress_manager)
    else:
        # Use traditional tqdm
        self._progress_manager = None
        self._tqdm_class = tqdm_class

    self._downloaders_with_task: Dict[Downloader, asyncio.Task] = {}
    self._concurrent_tasks: Set[asyncio.Task] = set()
    self._lock = asyncio.Lock()
    self._total_jobs_count = len(job_list)

add_jobs(*jobs) async

Add jobs to self._job_queue

Source code in ktoolbox/job/runner.py
295
296
297
298
299
300
301
302
303
async def add_jobs(self, *jobs: Job):
    """Add jobs to ``self._job_queue``"""
    for job in jobs:
        await self._job_queue.put(job)

    # Update total job count for progress tracking
    self._total_jobs_count += len(jobs)
    if self._progress_manager:
        self._progress_manager.set_job_totals(self._total_jobs_count)

cancel_downloader(target) async

Cancel downloader

Returns:

Type Description
bool

Whether cancelled successfully

Source code in ktoolbox/job/runner.py
318
319
320
321
322
323
324
325
326
327
328
async def cancel_downloader(self, target: Downloader) -> bool:
    """
    Cancel downloader

    :return: Whether cancelled successfully
    """
    task = self._downloaders_with_task[target]
    if not task.done():
        target.cancel()
        return await self._force_cancel(task, 0) or task.done()
    return True

processor() async

Process each job in self._job_queue

Returns:

Type Description
int

Number of jobs that failed

Source code in ktoolbox/job/runner.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
async def processor(self) -> int:
    """
    Process each job in ``self._job_queue``

    :return: Number of jobs that failed
    """
    failed_num = 0
    async with httpx.AsyncClient(
            verify=config.ssl_verify,
            cookies={"session": config.api.session_key} if config.api.session_key else None
    ) as client:
        while not self._job_queue.empty():
            job = await self._job_queue.get()

            # Create downloader
            url_parts = [config.downloader.scheme, config.api.files_netloc, job.server_path, '', '', '']
            url = str(urlunparse(url_parts))
            downloader = Downloader(
                url=url,
                path=job.path,
                client=client,
                designated_filename=job.alt_filename,
                server_path=job.server_path,
                post=job.post
            )

            # Create task
            task = asyncio.create_task(
                downloader.run(
                    tqdm_class=self._tqdm_class,
                    progress=self._progress
                )
            )
            self._downloaders_with_task[downloader] = task
            # task.add_done_callback(lambda _: self._downloaders_with_task.pop(downloader))
            #   Delete this for counting finished job tasks

            # Run task
            task_done_set, _ = await asyncio.wait([task], return_when=asyncio.FIRST_EXCEPTION)
            task_done = task_done_set.pop()
            try:
                exception = task_done.exception()
            except CancelledError as e:
                exception = e
            if not exception:  # raise Exception when cancelled or other exceptions
                ret = task_done.result()
                if ret.code == RetCodeEnum.FileExisted:
                    logger.debug(ret.message)
                    # Treat file existed as successful download but mark as existed
                    if self._progress_manager:
                        # Increment existed count atomically and update completed based on current done_size
                        try:
                            self._progress_manager.increment_existed(1)
                        except AttributeError:
                            # Fallback if older ProgressManager doesn't have increment_existed
                            self._progress_manager.update_job_progress(
                                existed=self._progress_manager._existed_jobs + 1
                            )
                        self._progress_manager.update_job_progress(
                            completed=self.done_size
                        )
                elif ret.code != RetCodeEnum.Success:
                    logger.error(ret.message)
                    failed_num += 1
                    # Update progress manager with failed job
                    if self._progress_manager:
                        self._progress_manager.update_job_progress(
                            failed=failed_num
                        )
                else:
                    # Update progress manager with completed job
                    if self._progress_manager:
                        self._progress_manager.update_job_progress(
                            completed=self.done_size
                        )
            elif isinstance(exception, CancelledError):
                logger.warning(
                    generate_msg(
                        "Download cancelled",
                        filename=job.alt_filename
                    )
                )
            else:
                logger.error(
                    generate_msg(
                        "Download failed",
                        filename=job.alt_filename,
                        exception=exception
                    )
                )
                failed_num += 1
                # Update progress manager with failed job
                if self._progress_manager:
                    self._progress_manager.update_job_progress(
                        failed=failed_num
                    )
            self._job_queue.task_done()
    await self._job_queue.join()
    return failed_num

start() async

Start processing jobs concurrently

It will Block until other call of self.start() method finished

Returns:

Type Description
int

Number of jobs that failed

Source code in ktoolbox/job/runner.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
async def start(self) -> int:
    """
    Start processing jobs concurrently

    It will **Block** until other call of ``self.start()`` method finished

    :return: Number of jobs that failed
    """
    failed_num = 0

    # Initialize progress manager if using centralized progress
    if self._progress_manager:
        self._progress_manager.set_job_totals(self._total_jobs_count)
        self._progress_manager.start_display()
        # Setup logger integration to work with progress display
        setup_logger_for_progress(self._progress_manager)

    async with self._lock:
        self._concurrent_tasks.clear()
        for _ in range(config.job.count):
            task = asyncio.create_task(self.processor())
            self._concurrent_tasks.add(task)
            task.add_done_callback(self._concurrent_tasks.discard)

        # Start background display update if using centralized progress
        display_task = None
        if self._progress_manager:
            display_task = asyncio.create_task(self._update_display_loop())

        # Start watcher as a background task so we can cancel it promptly when downloads finish
        watch_task = None
        if self._progress_manager:
            watch_task = asyncio.create_task(self._watch_status())

        # Wait for all concurrent processor tasks to finish
        task_done_set, _ = await asyncio.wait(self._concurrent_tasks)

        # Cancel watcher promptly to avoid waiting for its sleep interval
        if watch_task:
            watch_task.cancel()
            try:
                await watch_task
            except asyncio.CancelledError:
                pass

        if display_task:
            display_task.cancel()
            try:
                await display_task
            except asyncio.CancelledError:
                pass

        for task in task_done_set:
            try:
                failed_num += task.result()
            except CancelledError:
                pass

        # Clean up progress manager
        if self._progress_manager:
            self._progress_manager.stop_display()
            # Remove logger integration
            setup_logger_for_progress(None)

    if failed_num:
        logger.warning(f"{failed_num} jobs failed, download finished")
    else:
        logger.success("All jobs in queue finished")
    return failed_num

model

__all__ = ['Job', 'JobListData', 'CreatorIndices'] module-attribute

CreatorIndices

Bases: BaseKToolBoxData

Creator directory indices model

Record the path of each downloaded post.

Source code in ktoolbox/job/model.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class CreatorIndices(BaseKToolBoxData):
    """
    Creator directory indices model

    Record the path of each downloaded post.
    """
    creator_id: str
    """Creator ID"""
    service: str
    """Creator service"""
    posts: Dict[str, Post] = {}
    """All posts, ``id`` -> ``Post``"""
    posts_path: Dict[str, Path] = {}
    """Posts and their path, ``id`` -> ``Path``"""
creator_id: str instance-attribute

Creator ID

posts: Dict[str, Post] = {} class-attribute instance-attribute

All posts, id -> Post

posts_path: Dict[str, Path] = {} class-attribute instance-attribute

Posts and their path, id -> Path

service: str instance-attribute

Creator service

Job

Bases: BaseModel

Download job model

Source code in ktoolbox/job/model.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class Job(BaseModel):
    """
    Download job model
    """
    path: Path
    """Directory path to save the file"""
    alt_filename: Optional[str] = None
    """Use this name if no filename given by the server"""
    server_path: str
    """The `path` part of download URL"""
    type: Optional[Literal[PostFileTypeEnum.Attachment, PostFileTypeEnum.File]] = None
    """Target file type"""
    post: Optional[Post] = None
    """Post object"""
alt_filename: Optional[str] = None class-attribute instance-attribute

Use this name if no filename given by the server

path: Path instance-attribute

Directory path to save the file

post: Optional[Post] = None class-attribute instance-attribute

Post object

server_path: str instance-attribute

The path part of download URL

type: Optional[Literal[PostFileTypeEnum.Attachment, PostFileTypeEnum.File]] = None class-attribute instance-attribute

Target file type

JobListData

Bases: BaseKToolBoxData

Download job list data model

For saving the list of jobs to disk.

Source code in ktoolbox/job/model.py
43
44
45
46
47
48
49
50
class JobListData(BaseKToolBoxData):
    """
    Download job list data model

    For saving the list of jobs to disk.
    """
    jobs: List[Job] = []
    """All jobs"""
jobs: List[Job] = [] class-attribute instance-attribute

All jobs

runner

__all__ = ['JobRunner'] module-attribute

JobRunner

Source code in ktoolbox/job/runner.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
class JobRunner:
    def __init__(self, *, job_list: List[Job] = None, tqdm_class: std_tqdm = None, progress: bool = True,
                 centralized_progress: bool = True, use_colors: bool = True, use_emojis: bool = True):
        """
        Create a job runner

        :param job_list: Jobs to initial ``self._job_queue``
        :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
        :param progress: Show progress bar
        :param centralized_progress: Use centralized progress manager to prevent display chaos
        :param use_colors: Enable colorful progress bars (requires ANSI terminal support)
        :param use_emojis: Enable emoji indicators in progress bars
        """
        job_list = job_list or []
        self._job_queue: asyncio.Queue[Job] = asyncio.Queue()
        for job in job_list:
            self._job_queue.put_nowait(job)

        # Initialize progress management
        self._progress = progress
        self._centralized_progress = centralized_progress and progress

        if self._centralized_progress:
            # Use centralized progress manager with enhanced visuals
            self._progress_manager = ProgressManager(
                max_workers=config.job.count,
                use_colors=use_colors,
                use_emojis=use_emojis
            )
            self._tqdm_class = tqdm_class or create_managed_tqdm_class(self._progress_manager)
        else:
            # Use traditional tqdm
            self._progress_manager = None
            self._tqdm_class = tqdm_class

        self._downloaders_with_task: Dict[Downloader, asyncio.Task] = {}
        self._concurrent_tasks: Set[asyncio.Task] = set()
        self._lock = asyncio.Lock()
        self._total_jobs_count = len(job_list)

    @property
    def finished(self):
        """
        Check if all jobs finished

        :return: ``False`` if **in process**, ``False`` otherwise
        """
        return not self._lock.locked()

    @cached_property
    def downloaders(self):
        """Get downloaders with task"""
        return MappingProxyType(self._downloaders_with_task)

    @property
    def waiting_size(self) -> int:
        """Get the number of jobs waiting to be processed"""
        return self._job_queue.qsize()

    @property
    def done_size(self) -> int:
        """Get the number of jobs that done"""
        size = 0
        for downloader, task in self._downloaders_with_task.items():
            if downloader.finished or task.done():
                size += 1
        return size

    @property
    def processing_size(self) -> int:
        """Get the number of jobs that in process"""
        return len(self._downloaders_with_task) - self.done_size

    async def processor(self) -> int:
        """
        Process each job in ``self._job_queue``

        :return: Number of jobs that failed
        """
        failed_num = 0
        async with httpx.AsyncClient(
                verify=config.ssl_verify,
                cookies={"session": config.api.session_key} if config.api.session_key else None
        ) as client:
            while not self._job_queue.empty():
                job = await self._job_queue.get()

                # Create downloader
                url_parts = [config.downloader.scheme, config.api.files_netloc, job.server_path, '', '', '']
                url = str(urlunparse(url_parts))
                downloader = Downloader(
                    url=url,
                    path=job.path,
                    client=client,
                    designated_filename=job.alt_filename,
                    server_path=job.server_path,
                    post=job.post
                )

                # Create task
                task = asyncio.create_task(
                    downloader.run(
                        tqdm_class=self._tqdm_class,
                        progress=self._progress
                    )
                )
                self._downloaders_with_task[downloader] = task
                # task.add_done_callback(lambda _: self._downloaders_with_task.pop(downloader))
                #   Delete this for counting finished job tasks

                # Run task
                task_done_set, _ = await asyncio.wait([task], return_when=asyncio.FIRST_EXCEPTION)
                task_done = task_done_set.pop()
                try:
                    exception = task_done.exception()
                except CancelledError as e:
                    exception = e
                if not exception:  # raise Exception when cancelled or other exceptions
                    ret = task_done.result()
                    if ret.code == RetCodeEnum.FileExisted:
                        logger.debug(ret.message)
                        # Treat file existed as successful download but mark as existed
                        if self._progress_manager:
                            # Increment existed count atomically and update completed based on current done_size
                            try:
                                self._progress_manager.increment_existed(1)
                            except AttributeError:
                                # Fallback if older ProgressManager doesn't have increment_existed
                                self._progress_manager.update_job_progress(
                                    existed=self._progress_manager._existed_jobs + 1
                                )
                            self._progress_manager.update_job_progress(
                                completed=self.done_size
                            )
                    elif ret.code != RetCodeEnum.Success:
                        logger.error(ret.message)
                        failed_num += 1
                        # Update progress manager with failed job
                        if self._progress_manager:
                            self._progress_manager.update_job_progress(
                                failed=failed_num
                            )
                    else:
                        # Update progress manager with completed job
                        if self._progress_manager:
                            self._progress_manager.update_job_progress(
                                completed=self.done_size
                            )
                elif isinstance(exception, CancelledError):
                    logger.warning(
                        generate_msg(
                            "Download cancelled",
                            filename=job.alt_filename
                        )
                    )
                else:
                    logger.error(
                        generate_msg(
                            "Download failed",
                            filename=job.alt_filename,
                            exception=exception
                        )
                    )
                    failed_num += 1
                    # Update progress manager with failed job
                    if self._progress_manager:
                        self._progress_manager.update_job_progress(
                            failed=failed_num
                        )
                self._job_queue.task_done()
        await self._job_queue.join()
        return failed_num

    async def _watch_status(self):
        """
        Watch running, completed, failed jobs
        """
        try:
            while not self._job_queue.empty():
                await asyncio.sleep(30)
                existed = self._progress_manager._existed_jobs if self._progress_manager else 0
                total = (self.waiting_size + self.processing_size + self.done_size)
                percent = (self.done_size / total) * 100 if total > 0 else 0
                logger.info(
                    f"Waiting: {self.waiting_size} / "
                    f"Running: {self.processing_size} / "
                    f"Completed: {self.done_size} "
                    f"({percent:.2f}%) | Existed: {existed}"
                )
        except asyncio.CancelledError:
            # Exit promptly when cancelled to allow fast shutdown
            return

    async def start(self) -> int:
        """
        Start processing jobs concurrently

        It will **Block** until other call of ``self.start()`` method finished

        :return: Number of jobs that failed
        """
        failed_num = 0

        # Initialize progress manager if using centralized progress
        if self._progress_manager:
            self._progress_manager.set_job_totals(self._total_jobs_count)
            self._progress_manager.start_display()
            # Setup logger integration to work with progress display
            setup_logger_for_progress(self._progress_manager)

        async with self._lock:
            self._concurrent_tasks.clear()
            for _ in range(config.job.count):
                task = asyncio.create_task(self.processor())
                self._concurrent_tasks.add(task)
                task.add_done_callback(self._concurrent_tasks.discard)

            # Start background display update if using centralized progress
            display_task = None
            if self._progress_manager:
                display_task = asyncio.create_task(self._update_display_loop())

            # Start watcher as a background task so we can cancel it promptly when downloads finish
            watch_task = None
            if self._progress_manager:
                watch_task = asyncio.create_task(self._watch_status())

            # Wait for all concurrent processor tasks to finish
            task_done_set, _ = await asyncio.wait(self._concurrent_tasks)

            # Cancel watcher promptly to avoid waiting for its sleep interval
            if watch_task:
                watch_task.cancel()
                try:
                    await watch_task
                except asyncio.CancelledError:
                    pass

            if display_task:
                display_task.cancel()
                try:
                    await display_task
                except asyncio.CancelledError:
                    pass

            for task in task_done_set:
                try:
                    failed_num += task.result()
                except CancelledError:
                    pass

            # Clean up progress manager
            if self._progress_manager:
                self._progress_manager.stop_display()
                # Remove logger integration
                setup_logger_for_progress(None)

        if failed_num:
            logger.warning(f"{failed_num} jobs failed, download finished")
        else:
            logger.success("All jobs in queue finished")
        return failed_num

    async def _update_display_loop(self):
        """Background task to update the progress display"""
        try:
            while True:
                if self._progress_manager:
                    self._progress_manager.update_display()
                await asyncio.sleep(0.1)  # Update 10 times per second
        except asyncio.CancelledError:
            pass

    async def add_jobs(self, *jobs: Job):
        """Add jobs to ``self._job_queue``"""
        for job in jobs:
            await self._job_queue.put(job)

        # Update total job count for progress tracking
        self._total_jobs_count += len(jobs)
        if self._progress_manager:
            self._progress_manager.set_job_totals(self._total_jobs_count)

    @staticmethod
    async def _force_cancel(target: asyncio.Task, wait_time: float = None) -> bool:
        """
        Force cancel ``asyncio.Task`` after ``wait_time`` seconds

        :param target: Target task
        :param wait_time: Seconds to wait before cancel (``0`` for skip one event loop run cycle)
        :return: Whether cancelled successfully
        """
        if wait_time is not None:
            await asyncio.sleep(wait_time)
        return target.cancel()

    async def cancel_downloader(self, target: Downloader) -> bool:
        """
        Cancel downloader

        :return: Whether cancelled successfully
        """
        task = self._downloaders_with_task[target]
        if not task.done():
            target.cancel()
            return await self._force_cancel(task, 0) or task.done()
        return True
done_size: int property

Get the number of jobs that done

downloaders cached property

Get downloaders with task

finished property

Check if all jobs finished

Returns:

Type Description

False if in process, False otherwise

processing_size: int property

Get the number of jobs that in process

waiting_size: int property

Get the number of jobs waiting to be processed

__init__(*, job_list=None, tqdm_class=None, progress=True, centralized_progress=True, use_colors=True, use_emojis=True)

Create a job runner

Parameters:

Name Type Description Default
job_list List[Job]

Jobs to initial self._job_queue

None
tqdm_class tqdm

tqdm class to replace default tqdm.asyncio.tqdm

None
progress bool

Show progress bar

True
centralized_progress bool

Use centralized progress manager to prevent display chaos

True
use_colors bool

Enable colorful progress bars (requires ANSI terminal support)

True
use_emojis bool

Enable emoji indicators in progress bars

True
Source code in ktoolbox/job/runner.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, *, job_list: List[Job] = None, tqdm_class: std_tqdm = None, progress: bool = True,
             centralized_progress: bool = True, use_colors: bool = True, use_emojis: bool = True):
    """
    Create a job runner

    :param job_list: Jobs to initial ``self._job_queue``
    :param tqdm_class: ``tqdm`` class to replace default ``tqdm.asyncio.tqdm``
    :param progress: Show progress bar
    :param centralized_progress: Use centralized progress manager to prevent display chaos
    :param use_colors: Enable colorful progress bars (requires ANSI terminal support)
    :param use_emojis: Enable emoji indicators in progress bars
    """
    job_list = job_list or []
    self._job_queue: asyncio.Queue[Job] = asyncio.Queue()
    for job in job_list:
        self._job_queue.put_nowait(job)

    # Initialize progress management
    self._progress = progress
    self._centralized_progress = centralized_progress and progress

    if self._centralized_progress:
        # Use centralized progress manager with enhanced visuals
        self._progress_manager = ProgressManager(
            max_workers=config.job.count,
            use_colors=use_colors,
            use_emojis=use_emojis
        )
        self._tqdm_class = tqdm_class or create_managed_tqdm_class(self._progress_manager)
    else:
        # Use traditional tqdm
        self._progress_manager = None
        self._tqdm_class = tqdm_class

    self._downloaders_with_task: Dict[Downloader, asyncio.Task] = {}
    self._concurrent_tasks: Set[asyncio.Task] = set()
    self._lock = asyncio.Lock()
    self._total_jobs_count = len(job_list)
add_jobs(*jobs) async

Add jobs to self._job_queue

Source code in ktoolbox/job/runner.py
295
296
297
298
299
300
301
302
303
async def add_jobs(self, *jobs: Job):
    """Add jobs to ``self._job_queue``"""
    for job in jobs:
        await self._job_queue.put(job)

    # Update total job count for progress tracking
    self._total_jobs_count += len(jobs)
    if self._progress_manager:
        self._progress_manager.set_job_totals(self._total_jobs_count)
cancel_downloader(target) async

Cancel downloader

Returns:

Type Description
bool

Whether cancelled successfully

Source code in ktoolbox/job/runner.py
318
319
320
321
322
323
324
325
326
327
328
async def cancel_downloader(self, target: Downloader) -> bool:
    """
    Cancel downloader

    :return: Whether cancelled successfully
    """
    task = self._downloaders_with_task[target]
    if not task.done():
        target.cancel()
        return await self._force_cancel(task, 0) or task.done()
    return True
processor() async

Process each job in self._job_queue

Returns:

Type Description
int

Number of jobs that failed

Source code in ktoolbox/job/runner.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
async def processor(self) -> int:
    """
    Process each job in ``self._job_queue``

    :return: Number of jobs that failed
    """
    failed_num = 0
    async with httpx.AsyncClient(
            verify=config.ssl_verify,
            cookies={"session": config.api.session_key} if config.api.session_key else None
    ) as client:
        while not self._job_queue.empty():
            job = await self._job_queue.get()

            # Create downloader
            url_parts = [config.downloader.scheme, config.api.files_netloc, job.server_path, '', '', '']
            url = str(urlunparse(url_parts))
            downloader = Downloader(
                url=url,
                path=job.path,
                client=client,
                designated_filename=job.alt_filename,
                server_path=job.server_path,
                post=job.post
            )

            # Create task
            task = asyncio.create_task(
                downloader.run(
                    tqdm_class=self._tqdm_class,
                    progress=self._progress
                )
            )
            self._downloaders_with_task[downloader] = task
            # task.add_done_callback(lambda _: self._downloaders_with_task.pop(downloader))
            #   Delete this for counting finished job tasks

            # Run task
            task_done_set, _ = await asyncio.wait([task], return_when=asyncio.FIRST_EXCEPTION)
            task_done = task_done_set.pop()
            try:
                exception = task_done.exception()
            except CancelledError as e:
                exception = e
            if not exception:  # raise Exception when cancelled or other exceptions
                ret = task_done.result()
                if ret.code == RetCodeEnum.FileExisted:
                    logger.debug(ret.message)
                    # Treat file existed as successful download but mark as existed
                    if self._progress_manager:
                        # Increment existed count atomically and update completed based on current done_size
                        try:
                            self._progress_manager.increment_existed(1)
                        except AttributeError:
                            # Fallback if older ProgressManager doesn't have increment_existed
                            self._progress_manager.update_job_progress(
                                existed=self._progress_manager._existed_jobs + 1
                            )
                        self._progress_manager.update_job_progress(
                            completed=self.done_size
                        )
                elif ret.code != RetCodeEnum.Success:
                    logger.error(ret.message)
                    failed_num += 1
                    # Update progress manager with failed job
                    if self._progress_manager:
                        self._progress_manager.update_job_progress(
                            failed=failed_num
                        )
                else:
                    # Update progress manager with completed job
                    if self._progress_manager:
                        self._progress_manager.update_job_progress(
                            completed=self.done_size
                        )
            elif isinstance(exception, CancelledError):
                logger.warning(
                    generate_msg(
                        "Download cancelled",
                        filename=job.alt_filename
                    )
                )
            else:
                logger.error(
                    generate_msg(
                        "Download failed",
                        filename=job.alt_filename,
                        exception=exception
                    )
                )
                failed_num += 1
                # Update progress manager with failed job
                if self._progress_manager:
                    self._progress_manager.update_job_progress(
                        failed=failed_num
                    )
            self._job_queue.task_done()
    await self._job_queue.join()
    return failed_num
start() async

Start processing jobs concurrently

It will Block until other call of self.start() method finished

Returns:

Type Description
int

Number of jobs that failed

Source code in ktoolbox/job/runner.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
async def start(self) -> int:
    """
    Start processing jobs concurrently

    It will **Block** until other call of ``self.start()`` method finished

    :return: Number of jobs that failed
    """
    failed_num = 0

    # Initialize progress manager if using centralized progress
    if self._progress_manager:
        self._progress_manager.set_job_totals(self._total_jobs_count)
        self._progress_manager.start_display()
        # Setup logger integration to work with progress display
        setup_logger_for_progress(self._progress_manager)

    async with self._lock:
        self._concurrent_tasks.clear()
        for _ in range(config.job.count):
            task = asyncio.create_task(self.processor())
            self._concurrent_tasks.add(task)
            task.add_done_callback(self._concurrent_tasks.discard)

        # Start background display update if using centralized progress
        display_task = None
        if self._progress_manager:
            display_task = asyncio.create_task(self._update_display_loop())

        # Start watcher as a background task so we can cancel it promptly when downloads finish
        watch_task = None
        if self._progress_manager:
            watch_task = asyncio.create_task(self._watch_status())

        # Wait for all concurrent processor tasks to finish
        task_done_set, _ = await asyncio.wait(self._concurrent_tasks)

        # Cancel watcher promptly to avoid waiting for its sleep interval
        if watch_task:
            watch_task.cancel()
            try:
                await watch_task
            except asyncio.CancelledError:
                pass

        if display_task:
            display_task.cancel()
            try:
                await display_task
            except asyncio.CancelledError:
                pass

        for task in task_done_set:
            try:
                failed_num += task.result()
            except CancelledError:
                pass

        # Clean up progress manager
        if self._progress_manager:
            self._progress_manager.stop_display()
            # Remove logger integration
            setup_logger_for_progress(None)

    if failed_num:
        logger.warning(f"{failed_num} jobs failed, download finished")
    else:
        logger.success("All jobs in queue finished")
    return failed_num

model

__all__ = ['BaseKToolBoxData', 'SearchResult'] module-attribute

BaseKToolBoxData

Bases: BaseModel

Base class for all KToolBox data models.

Source code in ktoolbox/model.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class BaseKToolBoxData(BaseModel):
    """
    Base class for all KToolBox data models.
    """

    def __init__(self, **data: Any):
        super().__init__(**data)
        self.type = type(self)

    version: str = __version__
    type: Union[Type["BaseKToolBoxData"], str] = None

    @field_serializer('type')
    def _(self, value: Type["BaseKToolBoxData"], _info):
        return str(value)

type: Union[Type[BaseKToolBoxData], str] = None class-attribute instance-attribute

version: str = __version__ class-attribute instance-attribute

_(value, _info)

Source code in ktoolbox/model.py
24
25
26
@field_serializer('type')
def _(self, value: Type["BaseKToolBoxData"], _info):
    return str(value)

__init__(**data)

Source code in ktoolbox/model.py
17
18
19
def __init__(self, **data: Any):
    super().__init__(**data)
    self.type = type(self)

SearchResult

Bases: BaseKToolBoxData, Generic[_T]

Cli search result

Source code in ktoolbox/model.py
29
30
31
class SearchResult(BaseKToolBoxData, Generic[_T]):
    """Cli search result"""
    result: List[_T] = []

result: List[_T] = [] class-attribute instance-attribute

progress

Centralized Progress Management for KToolBox

This module provides a centralized progress display system that prevents multiple concurrent progress bars from interfering with each other. Inspired by rclone's progress display approach with enhanced visual effects.

RICH_AVAILABLE = True module-attribute

__all__ = ['ProgressManager', 'ManagedTqdm', 'ColorTheme', 'setup_logger_for_progress', 'create_managed_tqdm_class'] module-attribute

ColorTheme

Rich-based color themes and styles for progress display

Source code in ktoolbox/progress.py
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
class ColorTheme:
    """Rich-based color themes and styles for progress display"""

    # Rich console for color support detection and rendering
    _console = Console() if RICH_AVAILABLE else None

    # ANSI Color codes (fallback when Rich not available)
    RESET = '\033[0m'
    BOLD = '\033[1m'

    # Basic colors
    RED = '\033[31m'
    GREEN = '\033[32m'
    YELLOW = '\033[33m'
    BLUE = '\033[34m'
    MAGENTA = '\033[35m'
    CYAN = '\033[36m'
    WHITE = '\033[37m'

    # Bright colors
    BRIGHT_RED = '\033[91m'
    BRIGHT_GREEN = '\033[92m'
    BRIGHT_YELLOW = '\033[93m'
    BRIGHT_BLUE = '\033[94m'
    BRIGHT_MAGENTA = '\033[95m'
    BRIGHT_CYAN = '\033[96m'
    BRIGHT_WHITE = '\033[97m'

    # Background colors
    BG_GREEN = '\033[42m'
    BG_RED = '\033[41m'
    BG_YELLOW = '\033[43m'

    # Rich style mappings
    if RICH_AVAILABLE:
        RICH_STYLES = {
            RED: "red",
            GREEN: "green",
            YELLOW: "yellow",
            BLUE: "blue", 
            MAGENTA: "magenta",
            CYAN: "cyan",
            WHITE: "white",
            BRIGHT_RED: "bright_red",
            BRIGHT_GREEN: "bright_green",
            BRIGHT_YELLOW: "bright_yellow",
            BRIGHT_BLUE: "bright_blue",
            BRIGHT_MAGENTA: "bright_magenta",
            BRIGHT_CYAN: "bright_cyan",
            BRIGHT_WHITE: "bright_white",
        }
    else:
        RICH_STYLES = {}

    # Emojis
    DOWNLOAD = "📥"
    COMPLETED = "✅"
    FAILED = "❌"
    RUNNING = "🔄"
    WAITING = "⏳"
    SPEED = "⚡"
    ROCKET = "🚀"

    # Animation frames for spinner
    SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]

    @classmethod
    def colorize(cls, text: str, color: str, bold: bool = False) -> str:
        """Apply color to text with optional bold using Rich when available"""
        if not cls.supports_color():
            return text

        if RICH_AVAILABLE:
            # Use Rich for more robust color support
            rich_color = cls.RICH_STYLES.get(color, color)
            style = f"bold {rich_color}" if bold else rich_color

            # Create a Text object and render it to get the styled string
            from rich.text import Text
            from rich.console import Console

            text_obj = Text(text, style=style)
            # Create a temporary console that forces color output for testing
            console = Console(force_terminal=True, width=1000)
            with console.capture() as capture:
                console.print(text_obj, end="")
            return capture.get()
        else:
            # Fallback to ANSI codes
            prefix = cls.BOLD + color if bold else color
            return f"{prefix}{text}{cls.RESET}"

    @classmethod
    def supports_color(cls) -> bool:
        """Check if terminal supports color using Rich when available"""
        if RICH_AVAILABLE and cls._console:
            # Rich handles color support detection better
            return cls._console.is_terminal and not cls._console.options.legacy_windows
        else:
            # Fallback to manual detection
            return (
                hasattr(sys.stdout, 'isatty') and sys.stdout.isatty() and
                not sys.platform.startswith('win') or 'ANSICON' in os.environ
            )

BG_GREEN = '\x1b[42m' class-attribute instance-attribute

BG_RED = '\x1b[41m' class-attribute instance-attribute

BG_YELLOW = '\x1b[43m' class-attribute instance-attribute

BLUE = '\x1b[34m' class-attribute instance-attribute

BOLD = '\x1b[1m' class-attribute instance-attribute

BRIGHT_BLUE = '\x1b[94m' class-attribute instance-attribute

BRIGHT_CYAN = '\x1b[96m' class-attribute instance-attribute

BRIGHT_GREEN = '\x1b[92m' class-attribute instance-attribute

BRIGHT_MAGENTA = '\x1b[95m' class-attribute instance-attribute

BRIGHT_RED = '\x1b[91m' class-attribute instance-attribute

BRIGHT_WHITE = '\x1b[97m' class-attribute instance-attribute

BRIGHT_YELLOW = '\x1b[93m' class-attribute instance-attribute

COMPLETED = '✅' class-attribute instance-attribute

CYAN = '\x1b[36m' class-attribute instance-attribute

DOWNLOAD = '📥' class-attribute instance-attribute

FAILED = '❌' class-attribute instance-attribute

GREEN = '\x1b[32m' class-attribute instance-attribute

MAGENTA = '\x1b[35m' class-attribute instance-attribute

RED = '\x1b[31m' class-attribute instance-attribute

RESET = '\x1b[0m' class-attribute instance-attribute

RICH_STYLES = {RED: 'red', GREEN: 'green', YELLOW: 'yellow', BLUE: 'blue', MAGENTA: 'magenta', CYAN: 'cyan', WHITE: 'white', BRIGHT_RED: 'bright_red', BRIGHT_GREEN: 'bright_green', BRIGHT_YELLOW: 'bright_yellow', BRIGHT_BLUE: 'bright_blue', BRIGHT_MAGENTA: 'bright_magenta', BRIGHT_CYAN: 'bright_cyan', BRIGHT_WHITE: 'bright_white'} class-attribute instance-attribute

ROCKET = '🚀' class-attribute instance-attribute

RUNNING = '🔄' class-attribute instance-attribute

SPEED = '⚡' class-attribute instance-attribute

SPINNER_FRAMES = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] class-attribute instance-attribute

WAITING = '⏳' class-attribute instance-attribute

WHITE = '\x1b[37m' class-attribute instance-attribute

YELLOW = '\x1b[33m' class-attribute instance-attribute

colorize(text, color, bold=False) classmethod

Apply color to text with optional bold using Rich when available

Source code in ktoolbox/progress.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
@classmethod
def colorize(cls, text: str, color: str, bold: bool = False) -> str:
    """Apply color to text with optional bold using Rich when available"""
    if not cls.supports_color():
        return text

    if RICH_AVAILABLE:
        # Use Rich for more robust color support
        rich_color = cls.RICH_STYLES.get(color, color)
        style = f"bold {rich_color}" if bold else rich_color

        # Create a Text object and render it to get the styled string
        from rich.text import Text
        from rich.console import Console

        text_obj = Text(text, style=style)
        # Create a temporary console that forces color output for testing
        console = Console(force_terminal=True, width=1000)
        with console.capture() as capture:
            console.print(text_obj, end="")
        return capture.get()
    else:
        # Fallback to ANSI codes
        prefix = cls.BOLD + color if bold else color
        return f"{prefix}{text}{cls.RESET}"

supports_color() classmethod

Check if terminal supports color using Rich when available

Source code in ktoolbox/progress.py
154
155
156
157
158
159
160
161
162
163
164
165
@classmethod
def supports_color(cls) -> bool:
    """Check if terminal supports color using Rich when available"""
    if RICH_AVAILABLE and cls._console:
        # Rich handles color support detection better
        return cls._console.is_terminal and not cls._console.options.legacy_windows
    else:
        # Fallback to manual detection
        return (
            hasattr(sys.stdout, 'isatty') and sys.stdout.isatty() and
            not sys.platform.startswith('win') or 'ANSICON' in os.environ
        )

ManagedTqdm

A tqdm-compatible progress bar that works with ProgressManager. This class mimics the tqdm interface but delegates to ProgressManager.

Source code in ktoolbox/progress.py
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
class ManagedTqdm:
    """
    A tqdm-compatible progress bar that works with ProgressManager.
    This class mimics the tqdm interface but delegates to ProgressManager.
    """

    def __init__(self, desc=None, total=None, initial=0, disable=False,
                 unit="it", unit_scale=False, manager=None, **kwargs):
        self._failed = False
        self._paused = False
        # If no manager provided or disabled, fall back to standard tqdm
        if manager is None or disable:
            self._fallback = std_tqdm(desc=desc, total=total, initial=initial,
                                    disable=disable, unit=unit, unit_scale=unit_scale, **kwargs)
            self.manager = None
            self.progress_id = None
        else:
            self.manager = manager
            self._fallback = None
            self._current = initial
            self._total = total
            self._desc = desc or ""
            self._disable = disable
            self._unit = unit
            self._unit_scale = unit_scale

            # Create progress bar in manager
            self.progress_id = f"{desc}_{id(self)}"
            with self.manager._lock:
                self.manager._progress_bars[self.progress_id] = ProgressState(
                    desc=self._desc, total=total, current=initial,
                    unit=unit, unit_scale=unit_scale
                )
                if self.progress_id not in self.manager._display_order:
                    self.manager._display_order.append(self.progress_id)

    def update(self, n: int = 1):
        """Update the progress bar by n units"""
        if self._fallback:
            return self._fallback.update(n)

        self._current += n
        self.manager.update_progress(self.progress_id, self._current, self._desc)
        self.manager.update_display()
        return None

    def set_description(self, desc: str):
        """Set the description of the progress bar"""
        if self._fallback:
            return self._fallback.set_description(desc)

        self._desc = desc
        self.manager.update_progress(self.progress_id, self._current, desc)
        return None

    def close(self):
        """Close/finish the progress bar"""
        if self._fallback:
            return self._fallback.close()

        if self.manager and self.progress_id:
            self.manager.finish_progress(self.progress_id, failed=getattr(self, '_failed', False))
        return None

    def __enter__(self):
        if self._fallback:
            return self._fallback.__enter__()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self._fallback:
            return self._fallback.__exit__(exc_type, exc_val, exc_tb)
        self.close()
        return None

    def set_failed(self, failed: bool = True):
        """Mark the progress bar as failed"""
        if self._fallback:
            return  # Not supported in fallback mode

        self._failed = failed
        if self.manager and self.progress_id:
            self.manager.update_progress(self.progress_id, self._current, self._desc, failed=failed)

    def set_paused(self, paused: bool = True):
        """Mark the progress bar as paused"""
        if self._fallback:
            return  # Not supported in fallback mode

        self._paused = paused
        if self.manager and self.progress_id:
            with self.manager._lock:
                if self.progress_id in self.manager._progress_bars:
                    self.manager._progress_bars[self.progress_id].paused = paused

    # Properties to maintain compatibility with tqdm
    @property
    def total(self):
        if self._fallback:
            return self._fallback.total
        return self._total

    @total.setter
    def total(self, value):
        if self._fallback:
            self._fallback.total = value
        else:
            self._total = value

    @property
    def n(self):
        if self._fallback:
            return self._fallback.n
        return self._current

    @n.setter
    def n(self, value):
        if self._fallback:
            self._fallback.n = value
        else:
            self._current = value
            if self.manager:
                self.manager.update_progress(self.progress_id, self._current, self._desc)

    def __bool__(self):
        """For compatibility with tqdm boolean checks"""
        return True

    def __del__(self):
        """Cleanup when object is garbage collected"""
        try:
            self.close()
        except Exception:
            pass

manager = None instance-attribute

n property writable

progress_id = None instance-attribute

total property writable

__bool__()

For compatibility with tqdm boolean checks

Source code in ktoolbox/progress.py
849
850
851
def __bool__(self):
    """For compatibility with tqdm boolean checks"""
    return True

__del__()

Cleanup when object is garbage collected

Source code in ktoolbox/progress.py
853
854
855
856
857
858
def __del__(self):
    """Cleanup when object is garbage collected"""
    try:
        self.close()
    except Exception:
        pass

__enter__()

Source code in ktoolbox/progress.py
789
790
791
792
def __enter__(self):
    if self._fallback:
        return self._fallback.__enter__()
    return self

__exit__(exc_type, exc_val, exc_tb)

Source code in ktoolbox/progress.py
794
795
796
797
798
def __exit__(self, exc_type, exc_val, exc_tb):
    if self._fallback:
        return self._fallback.__exit__(exc_type, exc_val, exc_tb)
    self.close()
    return None

__init__(desc=None, total=None, initial=0, disable=False, unit='it', unit_scale=False, manager=None, **kwargs)

Source code in ktoolbox/progress.py
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
def __init__(self, desc=None, total=None, initial=0, disable=False,
             unit="it", unit_scale=False, manager=None, **kwargs):
    self._failed = False
    self._paused = False
    # If no manager provided or disabled, fall back to standard tqdm
    if manager is None or disable:
        self._fallback = std_tqdm(desc=desc, total=total, initial=initial,
                                disable=disable, unit=unit, unit_scale=unit_scale, **kwargs)
        self.manager = None
        self.progress_id = None
    else:
        self.manager = manager
        self._fallback = None
        self._current = initial
        self._total = total
        self._desc = desc or ""
        self._disable = disable
        self._unit = unit
        self._unit_scale = unit_scale

        # Create progress bar in manager
        self.progress_id = f"{desc}_{id(self)}"
        with self.manager._lock:
            self.manager._progress_bars[self.progress_id] = ProgressState(
                desc=self._desc, total=total, current=initial,
                unit=unit, unit_scale=unit_scale
            )
            if self.progress_id not in self.manager._display_order:
                self.manager._display_order.append(self.progress_id)

close()

Close/finish the progress bar

Source code in ktoolbox/progress.py
780
781
782
783
784
785
786
787
def close(self):
    """Close/finish the progress bar"""
    if self._fallback:
        return self._fallback.close()

    if self.manager and self.progress_id:
        self.manager.finish_progress(self.progress_id, failed=getattr(self, '_failed', False))
    return None

set_description(desc)

Set the description of the progress bar

Source code in ktoolbox/progress.py
771
772
773
774
775
776
777
778
def set_description(self, desc: str):
    """Set the description of the progress bar"""
    if self._fallback:
        return self._fallback.set_description(desc)

    self._desc = desc
    self.manager.update_progress(self.progress_id, self._current, desc)
    return None

set_failed(failed=True)

Mark the progress bar as failed

Source code in ktoolbox/progress.py
800
801
802
803
804
805
806
807
def set_failed(self, failed: bool = True):
    """Mark the progress bar as failed"""
    if self._fallback:
        return  # Not supported in fallback mode

    self._failed = failed
    if self.manager and self.progress_id:
        self.manager.update_progress(self.progress_id, self._current, self._desc, failed=failed)

set_paused(paused=True)

Mark the progress bar as paused

Source code in ktoolbox/progress.py
809
810
811
812
813
814
815
816
817
818
def set_paused(self, paused: bool = True):
    """Mark the progress bar as paused"""
    if self._fallback:
        return  # Not supported in fallback mode

    self._paused = paused
    if self.manager and self.progress_id:
        with self.manager._lock:
            if self.progress_id in self.manager._progress_bars:
                self.manager._progress_bars[self.progress_id].paused = paused

update(n=1)

Update the progress bar by n units

Source code in ktoolbox/progress.py
761
762
763
764
765
766
767
768
769
def update(self, n: int = 1):
    """Update the progress bar by n units"""
    if self._fallback:
        return self._fallback.update(n)

    self._current += n
    self.manager.update_progress(self.progress_id, self._current, self._desc)
    self.manager.update_display()
    return None

ProgressAwareHandler

Custom loguru handler that works with progress manager

Source code in ktoolbox/progress.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class ProgressAwareHandler:
    """Custom loguru handler that works with progress manager"""

    def __init__(self, original_handler):
        self.original_handler = original_handler

    def write(self, message):
        global _active_progress_manager
        if _active_progress_manager and _active_progress_manager._running:
            # Temporarily clear progress display
            _active_progress_manager.temporary_clear_for_log()
            self.original_handler.write(message)
            # Small delay to ensure message is visible
            time.sleep(0.05)
            # Restore progress display
            _active_progress_manager.restore_display()
        else:
            self.original_handler.write(message)

    def flush(self):
        if hasattr(self.original_handler, 'flush'):
            self.original_handler.flush()

original_handler = original_handler instance-attribute

__init__(original_handler)

Source code in ktoolbox/progress.py
41
42
def __init__(self, original_handler):
    self.original_handler = original_handler

flush()

Source code in ktoolbox/progress.py
57
58
59
def flush(self):
    if hasattr(self.original_handler, 'flush'):
        self.original_handler.flush()

write(message)

Source code in ktoolbox/progress.py
44
45
46
47
48
49
50
51
52
53
54
55
def write(self, message):
    global _active_progress_manager
    if _active_progress_manager and _active_progress_manager._running:
        # Temporarily clear progress display
        _active_progress_manager.temporary_clear_for_log()
        self.original_handler.write(message)
        # Small delay to ensure message is visible
        time.sleep(0.05)
        # Restore progress display
        _active_progress_manager.restore_display()
    else:
        self.original_handler.write(message)

ProgressManager

Centralized progress manager that coordinates multiple progress bars in a fixed terminal display area with enhanced visual effects.

Source code in ktoolbox/progress.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
class ProgressManager:
    """
    Centralized progress manager that coordinates multiple progress bars
    in a fixed terminal display area with enhanced visual effects.
    """

    def __init__(self, max_workers: int = 5, file: Optional[TextIO] = None,
                 use_colors: bool = True, use_emojis: bool = True,
                 update_interval: float = 0.1):
        """
        Initialize the progress manager.

        :param max_workers: Maximum number of concurrent progress bars to display
        :param file: Output stream (defaults to sys.stdout)
        :param use_colors: Enable color output
        :param use_emojis: Enable emoji indicators
        """
        self.max_workers = max_workers
        self.file = file or sys.stdout
        self.use_colors = use_colors and ColorTheme.supports_color()
        self.use_emojis = use_emojis
        self._progress_bars: Dict[str, ProgressState] = {}
        self._display_order: List[str] = []
        self._lock = threading.RLock()
        self._display_task: Optional[asyncio.Task] = None
        self._running = False
        self._total_jobs = 0
        self._completed_jobs = 0
        self._failed_jobs = 0
        self._existed_jobs = 0

        # Terminal control
        self._lines_written = 0
        self._last_display_time = 0
        # Update interval (seconds). When downloads change, refresh at most
        # once per `update_interval`. Default is 1.0s to avoid excessive redraws.
        self._update_interval = float(update_interval)

        # Display deduplication
        self._last_display_content = ""

    def set_job_totals(self, total: int, completed: int = 0, failed: int = 0, existed: int = 0):
        """Set the total number of jobs for overall progress tracking"""
        with self._lock:
            self._total_jobs = total
            self._completed_jobs = completed
            self._failed_jobs = failed
            self._existed_jobs = existed

    def update_job_progress(self, completed: int = None, failed: int = None, existed: int = None):
        """Update overall job progress"""
        with self._lock:
            if completed is not None:
                self._completed_jobs = completed
            if failed is not None:
                self._failed_jobs = failed
            if existed is not None:
                self._existed_jobs = existed

    def increment_existed(self, n: int = 1) -> int:
        """Atomically increment the existed count by n and return the new value"""
        with self._lock:
            self._existed_jobs += n
            return self._existed_jobs

    def create_progress_bar(self, desc: str, total: Optional[int] = None,
                          unit: str = "B", unit_scale: bool = True) -> 'ManagedTqdm':
        """Create a new managed progress bar"""
        # Don't create progress state here - let ManagedTqdm do it with proper unique ID
        return ManagedTqdm(desc=desc, total=total, unit=unit, unit_scale=unit_scale, manager=self)

    def update_progress(self, progress_id: str, current: int, desc: str = None, failed: bool = False):
        """Update progress for a specific progress bar"""
        with self._lock:
            if progress_id in self._progress_bars:
                state = self._progress_bars[progress_id]
                state.current = current
                state.failed = failed
                if desc:
                    state.desc = desc
                state.last_update = time.time()

                # Calculate rate
                if hasattr(state, '_last_current') and hasattr(state, '_last_time'):
                    time_diff = state.last_update - state._last_time
                    if time_diff > 0:
                        current_diff = current - state._last_current
                        state.rate = current_diff / time_diff

                state._last_current = current
                state._last_time = state.last_update

    def finish_progress(self, progress_id: str, failed: bool = False):
        """Mark a progress bar as finished"""
        with self._lock:
            if progress_id in self._progress_bars:
                self._progress_bars[progress_id].finished = True
                self._progress_bars[progress_id].failed = failed
                # Remove finished progress bar immediately in sync context
                # to avoid coroutine warnings
                try:
                    loop = asyncio.get_running_loop()
                    # Only create task if we're in an async context with a running loop
                    loop.create_task(self._remove_finished_after_delay(progress_id))
                except RuntimeError:
                    # No event loop running, remove immediately
                    if progress_id in self._progress_bars:
                        del self._progress_bars[progress_id]
                    if progress_id in self._display_order:
                        self._display_order.remove(progress_id)

    async def _remove_finished_after_delay(self, progress_id: str, delay: float = 1.0):
        """Remove a finished progress bar after a delay"""
        await asyncio.sleep(delay)
        with self._lock:
            if progress_id in self._progress_bars and self._progress_bars[progress_id].finished:
                del self._progress_bars[progress_id]
                if progress_id in self._display_order:
                    self._display_order.remove(progress_id)

    def start_display(self):
        """Start the progress display loop"""
        if not self._running and self.file.isatty():
            self._running = True
            # Hide cursor
            self.file.write('\033[?25l')
            self.file.flush()
            # Set as active progress manager for logger integration
            setup_logger_for_progress(self)

    def stop_display(self):
        """Stop the progress display loop"""
        if self._running:
            self._running = False
            # Clear display area and show cursor
            self._clear_display()
            self.file.write('\033[?25h\n')
            self.file.flush()
            # Remove from logger integration
            setup_logger_for_progress(None)

    def _clear_display(self):
        """Clear the current display area"""
        if self._lines_written > 0 and self.file.isatty():
            # Move cursor up to the start of our display area
            self.file.write(f'\033[{self._lines_written}A')
            # Clear each line and move to next
            for _ in range(self._lines_written):
                self.file.write('\033[2K\033[1B')  # Clear line and move down
            # Move cursor back to start of display area
            self.file.write(f'\033[{self._lines_written}A')
            self.file.flush()
            self._lines_written = 0

    def temporary_clear_for_log(self, log_message: str = None):
        """Temporarily clear display to allow log output"""
        if self._running and self.file.isatty():
            self._clear_display()
            # Reset last display content so we redraw after logging
            self._last_display_content = ""
            if log_message:
                self.file.write(log_message + '\n')
                self.file.flush()

    def restore_display(self):
        """Restore display after log output"""
        if self._running and self.file.isatty():
            # Force immediate display update
            self.update_display()

    @staticmethod
    def _format_size(size: Optional[int], unit_scale: bool = True) -> str:
        """Format a size value with appropriate units"""
        if size is None:
            return "?"

        if not unit_scale:
            return str(size)

        units = ['B', 'KB', 'MB', 'GB', 'TB']
        size_float = float(size)
        unit_index = 0

        while size_float >= 1024 and unit_index < len(units) - 1:
            size_float /= 1024
            unit_index += 1

        if unit_index == 0:
            return f"{size}B"
        else:
            return f"{size_float:.1f}{units[unit_index]}"

    def _format_rate(self, rate: Optional[float]) -> str:
        """Format a rate value"""
        if rate is None:
            return "?/s"
        return f"{self._format_size(int(rate))}/s"

    def _render_overall_progress(self) -> List[str]:
        """Render the overall job progress with colors and emojis, including visual progress bar"""
        lines = []

        if self._total_jobs > 0:
            running = len([p for p in self._progress_bars.values() if not p.finished])
            waiting = max(0, self._total_jobs - self._completed_jobs - self._failed_jobs - running)

            progress_pct = (self._completed_jobs / self._total_jobs) * 100 if self._total_jobs > 0 else 0
            progress_ratio = self._completed_jobs / self._total_jobs if self._total_jobs > 0 else 0

            # Determine overall status emoji and color
            if self.use_emojis:
                if running > 0:
                    status_emoji = f"{ColorTheme.RUNNING} "
                elif self._failed_jobs > 0:
                    status_emoji = f"{ColorTheme.FAILED} "
                else:
                    status_emoji = f"{ColorTheme.COMPLETED} "
            else:
                status_emoji = ""

            # Create visual progress bar
            bar_width = 30
            filled = int(bar_width * progress_ratio)

            if self.use_colors:
                if progress_pct >= 100:
                    bar_filled = ColorTheme.colorize('=' * filled, ColorTheme.BRIGHT_GREEN)
                    bar_empty = ColorTheme.colorize('-' * (bar_width - filled), ColorTheme.GREEN)
                elif progress_pct >= 75:
                    bar_filled = ColorTheme.colorize('=' * filled, ColorTheme.BRIGHT_CYAN)
                    bar_empty = ColorTheme.colorize('>' + '-' * (bar_width - filled), ColorTheme.CYAN)
                else:
                    bar_filled = ColorTheme.colorize('=' * filled, ColorTheme.BRIGHT_YELLOW)
                    bar_empty = ColorTheme.colorize('>' + '-' * (bar_width - filled), ColorTheme.YELLOW)

                bar_display = bar_filled + bar_empty
            else:
                bar_filled = '=' * filled
                bar_empty = '-' * (bar_width - filled)
                if filled < bar_width and progress_pct < 100:
                    bar_display = bar_filled + '>' + bar_empty[1:] if filled > 0 else '>' + bar_empty[1:]
                else:
                    bar_display = bar_filled + bar_empty

            # Color the progress percentage
            if self.use_colors:
                if progress_pct >= 100:
                    pct_colored = ColorTheme.colorize(f"{progress_pct:.0f}%", ColorTheme.BRIGHT_GREEN, bold=True)
                elif progress_pct >= 75:
                    pct_colored = ColorTheme.colorize(f"{progress_pct:.0f}%", ColorTheme.BRIGHT_CYAN, bold=True)
                elif progress_pct >= 50:
                    pct_colored = ColorTheme.colorize(f"{progress_pct:.0f}%", ColorTheme.BRIGHT_YELLOW, bold=True)
                else:
                    pct_colored = ColorTheme.colorize(f"{progress_pct:.0f}%", ColorTheme.BRIGHT_WHITE, bold=True)
            else:
                pct_colored = f"{progress_pct:.0f}%"

            # Color status numbers
            if self.use_colors:
                completed_colored = ColorTheme.colorize(str(self._completed_jobs), ColorTheme.BRIGHT_GREEN)
                total_colored = ColorTheme.colorize(str(self._total_jobs), ColorTheme.BRIGHT_WHITE)
                running_colored = ColorTheme.colorize(str(running), ColorTheme.BRIGHT_CYAN)
                waiting_colored = ColorTheme.colorize(str(waiting), ColorTheme.BRIGHT_YELLOW)
                existed_colored = ColorTheme.colorize(str(self._existed_jobs), ColorTheme.BRIGHT_WHITE)
            else:
                completed_colored = str(self._completed_jobs)
                total_colored = str(self._total_jobs)
                running_colored = str(running)
                waiting_colored = str(waiting)
                existed_colored = str(self._existed_jobs)

            # Calculate overall download speed from active progress bars
            total_rate = 0
            active_count = 0
            for state in self._progress_bars.values():
                if not state.finished and state.rate and state.rate > 0:
                    total_rate += state.rate
                    active_count += 1

            # Format overall speed
            if total_rate > 0:
                speed_str = self._format_rate(total_rate)
                if self.use_colors:
                    speed_str = ColorTheme.colorize(speed_str, ColorTheme.BRIGHT_MAGENTA)
            else:
                speed_str = ""

            # Build the main progress line in the requested format
            # [================>----------------] 23% | Jobs: 10/44 | 4 running | 30 waiting | 4.5 MB/s
            line_parts = []

            if status_emoji:  # Only add emoji if it's not empty
                line_parts.append(status_emoji)

            line_parts.extend([
                f"[{bar_display}]",
                pct_colored,
                f"| Jobs: {completed_colored}/{total_colored}",
                f"| {running_colored} running",
                f"| {waiting_colored} waiting",
                f"| {existed_colored} existed"
            ])

            if speed_str:
                line_parts.append(f"| {speed_str}")

            line = " ".join(line_parts)
            lines.append(line)

            # Show failed jobs on a separate line if any
            if self._failed_jobs > 0:
                failed_emoji = f"{ColorTheme.FAILED} " if self.use_emojis else ""
                if self.use_colors:
                    failed_colored = ColorTheme.colorize(str(self._failed_jobs), ColorTheme.BRIGHT_RED, bold=True)
                else:
                    failed_colored = str(self._failed_jobs)
                lines.append(f"{failed_emoji}Failed: {failed_colored}")

        return lines

    def _render_progress_bars(self) -> List[str]:
        """Render individual progress bars"""
        lines = []

        # Show only active progress bars in stable order (up to max_workers)
        # Use display_order to maintain consistent positioning instead of sorting by update time
        active_progress = []
        for progress_id in self._display_order:
            if (progress_id in self._progress_bars and 
                not self._progress_bars[progress_id].finished):
                active_progress.append((progress_id, self._progress_bars[progress_id]))

        for progress_id, state in active_progress[:self.max_workers]:
            line = self._render_single_progress_bar(state)
            lines.append(line)

        return lines

    def _render_single_progress_bar(self, state: ProgressState) -> str:
        """Render a single progress bar with colors and animations"""
        # Progress bar width
        bar_width = 30

        # Determine status emoji
        if self.use_emojis:
            if state.failed:
                status_emoji = ColorTheme.FAILED
            elif state.finished:
                status_emoji = ColorTheme.COMPLETED
            elif state.paused:
                status_emoji = ColorTheme.WAITING
            else:
                # Animated spinner for active downloads
                # Advance spinner only when this progress state's content changed
                # since the last render to avoid continuous terminal refreshes
                # caused solely by animation frames.
                current_time = time.time()
                last_seen = getattr(state, '_last_render_seen', 0.0)
                if state.last_update != last_seen:
                    _animation_state['frame'] = (_animation_state['frame'] + 1) % len(ColorTheme.SPINNER_FRAMES)
                    _animation_state['last_update'] = current_time
                    state._last_render_seen = state.last_update
                status_emoji = ColorTheme.SPINNER_FRAMES[_animation_state['frame']]
        else:
            status_emoji = ""

        if state.total is not None and state.total > 0:
            progress = min(state.current / state.total, 1.0)
            filled = int(bar_width * progress)

            # Create colored progress bar with Unicode characters when Rich is available
            if self.use_colors and RICH_AVAILABLE:
                # Use Unicode characters: ━━━╺━━━━━━━ with Rich colors
                if state.failed:
                    # Red for failed
                    if filled > 0:
                        if filled < bar_width:
                            bar_filled = ColorTheme.colorize('━' * (filled - 1), 'bright_red')
                            bar_empty = ColorTheme.colorize('╺' + '━' * (bar_width - filled), 'bright_black')
                        else:
                            bar_filled = ColorTheme.colorize('━' * filled, 'bright_red')
                            bar_empty = ''
                    else:
                        bar_filled = ''
                        bar_empty = ColorTheme.colorize('━' * bar_width, 'bright_black')
                elif state.finished:
                    # Green for completed
                    bar_filled = ColorTheme.colorize('━' * filled, 'bright_green')
                    bar_empty = ColorTheme.colorize('━' * (bar_width - filled), 'bright_black')
                else:
                    # Pink/Magenta for in progress
                    if filled > 0:
                        if filled < bar_width:
                            bar_filled = ColorTheme.colorize('━' * (filled - 1), 'bright_magenta')
                            bar_empty = ColorTheme.colorize('╺' + '━' * (bar_width - filled), 'bright_black')
                        else:
                            bar_filled = ColorTheme.colorize('━' * filled, 'bright_magenta')
                            bar_empty = ''
                    else:
                        bar_filled = ''
                        bar_empty = ColorTheme.colorize('━' * bar_width, 'bright_black')
                bar = bar_filled + bar_empty
            elif self.use_colors:
                # Fallback to original characters with ANSI colors when Rich not available
                if state.failed:
                    # Red for failed
                    bar_filled = ColorTheme.colorize('█' * filled, ColorTheme.BRIGHT_RED)
                    bar_empty = ColorTheme.colorize('░' * (bar_width - filled), ColorTheme.RED)
                elif state.finished:
                    # Green for completed
                    bar_filled = ColorTheme.colorize('█' * filled, ColorTheme.BRIGHT_GREEN)
                    bar_empty = ColorTheme.colorize('░' * (bar_width - filled), ColorTheme.GREEN)
                else:
                    # Cyan for in progress
                    bar_filled = ColorTheme.colorize('█' * filled, ColorTheme.BRIGHT_CYAN)
                    bar_empty = ColorTheme.colorize('░' * (bar_width - filled), ColorTheme.CYAN)
                bar = bar_filled + bar_empty
            else:
                # No colors - use original characters
                bar = '█' * filled + '░' * (bar_width - filled)

            # Color the percentage
            percentage_val = progress * 100
            if self.use_colors:
                if percentage_val >= 100:
                    percentage = ColorTheme.colorize(f"{percentage_val:5.1f}%", ColorTheme.BRIGHT_GREEN, bold=True)
                elif percentage_val >= 75:
                    percentage = ColorTheme.colorize(f"{percentage_val:5.1f}%", ColorTheme.BRIGHT_CYAN)
                else:
                    percentage = ColorTheme.colorize(f"{percentage_val:5.1f}%", ColorTheme.BRIGHT_WHITE)
            else:
                percentage = f"{percentage_val:5.1f}%"
        else:
            # Indeterminate progress with animated bar
            if self.use_colors and RICH_AVAILABLE:
                # Moving progress indicator with Unicode characters
                pos = _animation_state['frame'] % bar_width
                bar_chars = ['━'] * bar_width
                for i in range(max(0, pos-2), min(bar_width, pos+3)):
                    bar_chars[i] = '━'
                bar_chars[pos] = '╺'  # Current position indicator
                bar = ColorTheme.colorize(''.join(bar_chars), 'bright_yellow')
            elif self.use_colors:
                # Fallback to original characters
                pos = _animation_state['frame'] % bar_width
                bar_chars = ['░'] * bar_width
                for i in range(max(0, pos-2), min(bar_width, pos+3)):
                    bar_chars[i] = '█'
                bar = ColorTheme.colorize(''.join(bar_chars), ColorTheme.BRIGHT_YELLOW)
            else:
                bar = '█' * 3 + '░' * (bar_width - 3)
            percentage = ColorTheme.colorize("  ?  %", ColorTheme.BRIGHT_YELLOW) if self.use_colors else "  ?  %"

        # Format sizes with color
        current_str = self._format_size(state.current, state.unit_scale)
        total_str = self._format_size(state.total, state.unit_scale) if state.total else "?"

        if self.use_colors:
            current_str = ColorTheme.colorize(current_str, ColorTheme.BRIGHT_WHITE, bold=True)
            total_str = ColorTheme.colorize(total_str, ColorTheme.BRIGHT_WHITE)

        # Format rate with speed emoji
        rate_str = self._format_rate(state.rate)
        if self.use_colors and state.rate and state.rate > 0:
            rate_str = ColorTheme.colorize(rate_str, ColorTheme.BRIGHT_MAGENTA)

        if self.use_emojis and state.rate and state.rate > 0:
            rate_str = f"{ColorTheme.SPEED} {rate_str}"

        # Truncate description if too long and add color
        desc = state.desc
        if len(desc) > 25:
            desc = desc[:22] + "..."

        if self.use_colors:
            if state.failed:
                desc = ColorTheme.colorize(desc, ColorTheme.BRIGHT_RED)
            elif state.finished:
                desc = ColorTheme.colorize(desc, ColorTheme.BRIGHT_GREEN)
            else:
                desc = ColorTheme.colorize(desc, ColorTheme.BRIGHT_CYAN)

        # Add emoji prefix
        if self.use_emojis:
            desc = f"{status_emoji} {desc}"

        return f"{desc:30} |{bar}| {current_str}/{total_str} {percentage} {rate_str}"

    def update_display(self):
        """Update the terminal display"""
        if not self._running or not self.file.isatty():
            return

        current_time = time.time()
        if current_time - self._last_display_time < self._update_interval:
            return

        self._last_display_time = current_time

        with self._lock:
            # Render new display
            lines = []

            # Overall progress
            overall_lines = self._render_overall_progress()
            if overall_lines:
                lines.extend(overall_lines)
                lines.append("")  # Separator

            # Individual progress bars
            progress_lines = self._render_progress_bars()
            lines.extend(progress_lines)

            # Write to terminal only if we have content and it's different from last display
            if lines:
                # Remove trailing empty lines
                while lines and lines[-1] == "":
                    lines.pop()

                if lines:  # Check again after removing empty lines
                    display_content = '\n'.join(lines)

                    # Only update if content has changed
                    if display_content != self._last_display_content:
                        # Clear previous display
                        self._clear_display()

                        output = display_content + '\n'
                        self.file.write(output)
                        self.file.flush()
                        self._lines_written = len(lines)
                        self._last_display_content = display_content

file = file or sys.stdout instance-attribute

max_workers = max_workers instance-attribute

use_colors = use_colors and ColorTheme.supports_color() instance-attribute

use_emojis = use_emojis instance-attribute

__init__(max_workers=5, file=None, use_colors=True, use_emojis=True, update_interval=0.1)

Initialize the progress manager.

Parameters:

Name Type Description Default
max_workers int

Maximum number of concurrent progress bars to display

5
file Optional[TextIO]

Output stream (defaults to sys.stdout)

None
use_colors bool

Enable color output

True
use_emojis bool

Enable emoji indicators

True
Source code in ktoolbox/progress.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def __init__(self, max_workers: int = 5, file: Optional[TextIO] = None,
             use_colors: bool = True, use_emojis: bool = True,
             update_interval: float = 0.1):
    """
    Initialize the progress manager.

    :param max_workers: Maximum number of concurrent progress bars to display
    :param file: Output stream (defaults to sys.stdout)
    :param use_colors: Enable color output
    :param use_emojis: Enable emoji indicators
    """
    self.max_workers = max_workers
    self.file = file or sys.stdout
    self.use_colors = use_colors and ColorTheme.supports_color()
    self.use_emojis = use_emojis
    self._progress_bars: Dict[str, ProgressState] = {}
    self._display_order: List[str] = []
    self._lock = threading.RLock()
    self._display_task: Optional[asyncio.Task] = None
    self._running = False
    self._total_jobs = 0
    self._completed_jobs = 0
    self._failed_jobs = 0
    self._existed_jobs = 0

    # Terminal control
    self._lines_written = 0
    self._last_display_time = 0
    # Update interval (seconds). When downloads change, refresh at most
    # once per `update_interval`. Default is 1.0s to avoid excessive redraws.
    self._update_interval = float(update_interval)

    # Display deduplication
    self._last_display_content = ""

create_progress_bar(desc, total=None, unit='B', unit_scale=True)

Create a new managed progress bar

Source code in ktoolbox/progress.py
256
257
258
259
260
def create_progress_bar(self, desc: str, total: Optional[int] = None,
                      unit: str = "B", unit_scale: bool = True) -> 'ManagedTqdm':
    """Create a new managed progress bar"""
    # Don't create progress state here - let ManagedTqdm do it with proper unique ID
    return ManagedTqdm(desc=desc, total=total, unit=unit, unit_scale=unit_scale, manager=self)

finish_progress(progress_id, failed=False)

Mark a progress bar as finished

Source code in ktoolbox/progress.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
def finish_progress(self, progress_id: str, failed: bool = False):
    """Mark a progress bar as finished"""
    with self._lock:
        if progress_id in self._progress_bars:
            self._progress_bars[progress_id].finished = True
            self._progress_bars[progress_id].failed = failed
            # Remove finished progress bar immediately in sync context
            # to avoid coroutine warnings
            try:
                loop = asyncio.get_running_loop()
                # Only create task if we're in an async context with a running loop
                loop.create_task(self._remove_finished_after_delay(progress_id))
            except RuntimeError:
                # No event loop running, remove immediately
                if progress_id in self._progress_bars:
                    del self._progress_bars[progress_id]
                if progress_id in self._display_order:
                    self._display_order.remove(progress_id)

increment_existed(n=1)

Atomically increment the existed count by n and return the new value

Source code in ktoolbox/progress.py
250
251
252
253
254
def increment_existed(self, n: int = 1) -> int:
    """Atomically increment the existed count by n and return the new value"""
    with self._lock:
        self._existed_jobs += n
        return self._existed_jobs

restore_display()

Restore display after log output

Source code in ktoolbox/progress.py
355
356
357
358
359
def restore_display(self):
    """Restore display after log output"""
    if self._running and self.file.isatty():
        # Force immediate display update
        self.update_display()

set_job_totals(total, completed=0, failed=0, existed=0)

Set the total number of jobs for overall progress tracking

Source code in ktoolbox/progress.py
232
233
234
235
236
237
238
def set_job_totals(self, total: int, completed: int = 0, failed: int = 0, existed: int = 0):
    """Set the total number of jobs for overall progress tracking"""
    with self._lock:
        self._total_jobs = total
        self._completed_jobs = completed
        self._failed_jobs = failed
        self._existed_jobs = existed

start_display()

Start the progress display loop

Source code in ktoolbox/progress.py
311
312
313
314
315
316
317
318
319
def start_display(self):
    """Start the progress display loop"""
    if not self._running and self.file.isatty():
        self._running = True
        # Hide cursor
        self.file.write('\033[?25l')
        self.file.flush()
        # Set as active progress manager for logger integration
        setup_logger_for_progress(self)

stop_display()

Stop the progress display loop

Source code in ktoolbox/progress.py
321
322
323
324
325
326
327
328
329
330
def stop_display(self):
    """Stop the progress display loop"""
    if self._running:
        self._running = False
        # Clear display area and show cursor
        self._clear_display()
        self.file.write('\033[?25h\n')
        self.file.flush()
        # Remove from logger integration
        setup_logger_for_progress(None)

temporary_clear_for_log(log_message=None)

Temporarily clear display to allow log output

Source code in ktoolbox/progress.py
345
346
347
348
349
350
351
352
353
def temporary_clear_for_log(self, log_message: str = None):
    """Temporarily clear display to allow log output"""
    if self._running and self.file.isatty():
        self._clear_display()
        # Reset last display content so we redraw after logging
        self._last_display_content = ""
        if log_message:
            self.file.write(log_message + '\n')
            self.file.flush()

update_display()

Update the terminal display

Source code in ktoolbox/progress.py
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
def update_display(self):
    """Update the terminal display"""
    if not self._running or not self.file.isatty():
        return

    current_time = time.time()
    if current_time - self._last_display_time < self._update_interval:
        return

    self._last_display_time = current_time

    with self._lock:
        # Render new display
        lines = []

        # Overall progress
        overall_lines = self._render_overall_progress()
        if overall_lines:
            lines.extend(overall_lines)
            lines.append("")  # Separator

        # Individual progress bars
        progress_lines = self._render_progress_bars()
        lines.extend(progress_lines)

        # Write to terminal only if we have content and it's different from last display
        if lines:
            # Remove trailing empty lines
            while lines and lines[-1] == "":
                lines.pop()

            if lines:  # Check again after removing empty lines
                display_content = '\n'.join(lines)

                # Only update if content has changed
                if display_content != self._last_display_content:
                    # Clear previous display
                    self._clear_display()

                    output = display_content + '\n'
                    self.file.write(output)
                    self.file.flush()
                    self._lines_written = len(lines)
                    self._last_display_content = display_content

update_job_progress(completed=None, failed=None, existed=None)

Update overall job progress

Source code in ktoolbox/progress.py
240
241
242
243
244
245
246
247
248
def update_job_progress(self, completed: int = None, failed: int = None, existed: int = None):
    """Update overall job progress"""
    with self._lock:
        if completed is not None:
            self._completed_jobs = completed
        if failed is not None:
            self._failed_jobs = failed
        if existed is not None:
            self._existed_jobs = existed

update_progress(progress_id, current, desc=None, failed=False)

Update progress for a specific progress bar

Source code in ktoolbox/progress.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def update_progress(self, progress_id: str, current: int, desc: str = None, failed: bool = False):
    """Update progress for a specific progress bar"""
    with self._lock:
        if progress_id in self._progress_bars:
            state = self._progress_bars[progress_id]
            state.current = current
            state.failed = failed
            if desc:
                state.desc = desc
            state.last_update = time.time()

            # Calculate rate
            if hasattr(state, '_last_current') and hasattr(state, '_last_time'):
                time_diff = state.last_update - state._last_time
                if time_diff > 0:
                    current_diff = current - state._last_current
                    state.rate = current_diff / time_diff

            state._last_current = current
            state._last_time = state.last_update

ProgressState dataclass

Represents the state of a single progress bar

Source code in ktoolbox/progress.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
@dataclass
class ProgressState:
    """Represents the state of a single progress bar"""
    desc: str = ""
    total: Optional[int] = None
    current: int = 0
    unit: str = "it"
    unit_scale: bool = False
    rate: Optional[float] = None
    last_update: float = field(default_factory=time.time)
    # Tracks the last state timestamp that was actually rendered.
    # Used to avoid advancing animation frames or forcing redraws
    # when nothing meaningful has changed.
    _last_render_seen: float = 0.0
    finished: bool = False
    failed: bool = False
    paused: bool = False

current: int = 0 class-attribute instance-attribute

desc: str = '' class-attribute instance-attribute

failed: bool = False class-attribute instance-attribute

finished: bool = False class-attribute instance-attribute

last_update: float = field(default_factory=time.time) class-attribute instance-attribute

paused: bool = False class-attribute instance-attribute

rate: Optional[float] = None class-attribute instance-attribute

total: Optional[int] = None class-attribute instance-attribute

unit: str = 'it' class-attribute instance-attribute

unit_scale: bool = False class-attribute instance-attribute

__init__(desc='', total=None, current=0, unit='it', unit_scale=False, rate=None, last_update=time.time(), _last_render_seen=0.0, finished=False, failed=False, paused=False)

create_managed_tqdm_class(progress_manager)

Create a tqdm class factory that uses the given ProgressManager. This allows us to create a drop-in replacement for tqdm.

Source code in ktoolbox/progress.py
861
862
863
864
865
866
867
868
869
870
871
def create_managed_tqdm_class(progress_manager: ProgressManager):
    """
    Create a tqdm class factory that uses the given ProgressManager.
    This allows us to create a drop-in replacement for tqdm.
    """
    class ManagedTqdmClass(ManagedTqdm):
        def __init__(self, *args, **kwargs):
            kwargs['manager'] = progress_manager
            super().__init__(*args, **kwargs)

    return ManagedTqdmClass

setup_logger_for_progress(progress_manager=None)

Setup logger to work with progress manager

Source code in ktoolbox/progress.py
32
33
34
35
def setup_logger_for_progress(progress_manager: 'ProgressManager' = None):
    """Setup logger to work with progress manager"""
    global _active_progress_manager
    _active_progress_manager = progress_manager

utils

__all__ = ['BaseRet', 'generate_msg', 'logger_init', 'dump_search', 'parse_webpage_url', 'uvloop_init', 'extract_external_links', 'check_for_updates'] module-attribute

BaseRet

Bases: BaseModel, Generic[_T]

Base data model of function return value

Source code in ktoolbox/utils.py
32
33
34
35
36
37
38
39
40
41
42
class BaseRet(BaseModel, Generic[_T]):
    """Base data model of function return value"""
    code: int = RetCodeEnum.Success.value
    message: str = ''
    exception: Optional[Exception] = None
    data: Optional[_T] = None

    model_config = ConfigDict(arbitrary_types_allowed=True)

    def __bool__(self):
        return self.code == RetCodeEnum.Success

code: int = RetCodeEnum.Success.value class-attribute instance-attribute

data: Optional[_T] = None class-attribute instance-attribute

exception: Optional[Exception] = None class-attribute instance-attribute

message: str = '' class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True) class-attribute instance-attribute

__bool__()

Source code in ktoolbox/utils.py
41
42
def __bool__(self):
    return self.code == RetCodeEnum.Success

check_for_updates() async

Check for updates from GitHub and PyPI (backup). Show information if a newer version is available.

Source code in ktoolbox/utils.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
async def check_for_updates() -> None:
    """
    Check for updates from GitHub and PyPI (backup).
    Show information if a newer version is available.
    """
    try:
        import httpx
        from ktoolbox import __version__

        current_version = __version__.lstrip('v')  # Remove 'v' prefix if present

        # First try GitHub API
        try:
            async with httpx.AsyncClient(timeout=5.0) as client:
                response = await client.get("https://api.github.com/repos/Ljzd-PRO/KToolBox/releases/latest")
                if response.status_code == 200:
                    data = response.json()
                    latest_version = data["tag_name"].lstrip('v')
                    if latest_version != current_version:
                        logger.info(f"Update available: {latest_version} (current: {current_version})")
                        logger.info(f"Release URL: {data['html_url']}")
                        return
                    else:
                        logger.debug("You are using the latest version")
                        return
        except Exception as e:
            logger.debug(f"Failed to check GitHub for updates: {e}")

        # Fallback to PyPI API
        try:
            async with httpx.AsyncClient(timeout=5.0) as client:
                response = await client.get("https://pypi.org/pypi/ktoolbox/json")
                if response.status_code == 200:
                    data = response.json()
                    latest_version = data["info"]["version"].lstrip('v')
                    if latest_version != current_version:
                        logger.info(f"Update available: {latest_version} (current: {current_version})")
                        logger.info("Run 'pip install --upgrade ktoolbox' or 'pipx upgrade ktoolbox' to update")
                    else:
                        logger.debug("You are using the latest version")
        except Exception as e:
            logger.debug(f"Failed to check PyPI for updates: {e}")

    except Exception as e:
        logger.warning(f"Update check encountered an unexpected error: {e!r}")
Source code in ktoolbox/utils.py
91
92
93
94
95
96
async def dump_search(result: List[BaseModel], path: Path):
    async with aiofiles.open(str(path), "w", encoding="utf-8") as f:
        await f.write(
            SearchResult(result=result)
            .model_dump_json(indent=config.json_dump_indent)
        )

Extract external file sharing links from text content.

Targets common cloud storage and file sharing services like: - Google Drive - MEGA - Dropbox - OneDrive - MediaFire - And other common file hosting services

Parameters:

Name Type Description Default
content str

Text content to extract links from

required
custom_patterns Optional[List[str]]

Custom regex patterns to use.

None

Returns:

Type Description
Set[str]

Set of unique external links found

Source code in ktoolbox/utils.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def extract_external_links(content: str, custom_patterns: Optional[List[str]] = None) -> Set[str]:
    """
    Extract external file sharing links from text content.

    Targets common cloud storage and file sharing services like:
    - Google Drive
    - MEGA
    - Dropbox
    - OneDrive
    - MediaFire
    - And other common file hosting services

    :param content: Text content to extract links from
    :param custom_patterns: Custom regex patterns to use.
    :return: Set of unique external links found
    """
    if not content:
        return set()

    external_link_patterns = custom_patterns if custom_patterns is not None else []

    links = set()

    # Combine all patterns
    combined_pattern = '|'.join(f'({pattern})' for pattern in external_link_patterns)

    # Find all matches
    matches = re.finditer(combined_pattern, content, re.IGNORECASE)

    for match in matches:
        # Get the full matched URL
        url = match.group(0)

        # Clean up HTML markup and common trailing punctuation that might be part of text
        # Stop at common HTML boundary characters and quotes
        url = re.sub(r'["\'>][^<]*$', '', url)  # Remove quote + content to end

        # Additional cleanup: Remove HTML tags that might have been captured
        url = re.sub(r'<[^>]*>.*$', '', url)  # Remove any HTML tags and everything after
        url = re.sub(r'"[^"]*$', '', url)  # Remove quote and everything after it

        # Remove trailing HTML tag fragments and punctuation
        url = re.sub(r'</[^>]*>?$', '', url)  # Remove closing tags or partial tags at end
        url = re.sub(r'[.,;!?)\]}>"\'\s]+$', '', url)  # Remove trailing punctuation

        # Decode HTML entities (like &amp; -> &, &lt; -> <, etc.)
        url = html.unescape(url)

        # Validate that it looks like a proper URL
        if len(url) > 10 and '.' in url:
            links.add(url)

    return links

generate_msg(title=None, **kwargs)

Generate message for BaseRet and logger

Parameters:

Name Type Description Default
title str

Message title

None
kwargs

Extra data

{}
Source code in ktoolbox/utils.py
45
46
47
48
49
50
51
52
53
54
55
56
57
def generate_msg(title: str = None, **kwargs):
    """
    Generate message for ``BaseRet`` and logger

    :param title: Message title
    :param kwargs: Extra data
    """
    title: str = title or ""
    extra_data = ", ".join(f"{k}: {v}" for k, v in kwargs.items())
    if title:
        return f"{title} - {extra_data}" if kwargs else title
    else:
        return extra_data if kwargs else ""

logger_init(cli_use=False, disable_stdout=False)

Initialize loguru logger

Parameters:

Name Type Description Default
cli_use bool

Set logger level INFO and filter out SUCCESS

False
disable_stdout bool

Disable default output stream

False
Source code in ktoolbox/utils.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def logger_init(cli_use: bool = False, disable_stdout: bool = False):
    """
    Initialize ``loguru`` logger

    :param cli_use: Set logger level ``INFO`` and filter out ``SUCCESS``
    :param disable_stdout: Disable default output stream
    """
    if disable_stdout:
        logger.remove()
    elif cli_use:
        logger.remove()
        logger.add(
            tqdm.write,
            colorize=True,
            format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
                   "<level>{level: <8}</level> | "
                   "<cyan>{name}</cyan> - <level>{message}</level>",
            level=logging.INFO,
            filter=lambda record: record["level"].name != "DEBUG"
        )
    if path := config.logger.path:
        path.mkdir(exist_ok=True)
        if path is not None:
            logger.add(
                path / DataStorageNameEnum.LogData.value,
                level=config.logger.level,
                rotation=config.logger.rotation,
                diagnose=True
            )

parse_webpage_url(url)

Fetch service, user_id, post_id, revision_id from webpage url

Each part can be None if not found in url.

Parameters:

Name Type Description Default
url str

Kemono Webpage url

required

Returns:

Type Description
Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]

Tuple of service, user_id, post_id, revision_id

Source code in ktoolbox/utils.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def parse_webpage_url(url: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
    # noinspection SpellCheckingInspection
    """
    Fetch **service**, **user_id**, **post_id**, **revision_id** from webpage url

    Each part can be ``None`` if not found in url.

    :param url: Kemono Webpage url
    :return: Tuple of **service**, **user_id**, **post_id**, **revision_id**
    """
    path_url = Path(url)
    parts = path_url.parts
    if (url_parts_len := len(parts)) < 9:
        # Pad to full size (now supporting revision URLs)
        parts += tuple(None for _ in range(9 - url_parts_len))
    _scheme, _netloc, service, _user_key, user_id, _post_key, post_id, _revision_key, revision_id = parts

    # Only return revision_id if we have the revision keyword
    if _revision_key != "revision":
        revision_id = None

    return service, user_id, post_id, revision_id

uvloop_init()

Set event loop policy to uvloop or winloop if available.

Uses winloop on Windows and uvloop on Unix-like systems for performance optimization.

Returns:

Type Description
bool

If event loop policy was set successfully

Source code in ktoolbox/utils.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def uvloop_init() -> bool:
    """
    Set event loop policy to uvloop or winloop if available.

    Uses winloop on Windows and uvloop on Unix-like systems for performance optimization.

    :return: If event loop policy was set successfully
    """
    if config.use_uvloop:
        if sys.platform == "win32":
            # Try to use winloop on Windows
            try:
                # noinspection PyUnresolvedReferences
                import winloop
            except ModuleNotFoundError:
                logger.debug(
                    "winloop is not installed, but it's optional. "
                    "You can install it with `pip install ktoolbox[winloop]`"
                )
            else:
                asyncio.set_event_loop_policy(winloop.EventLoopPolicy())
                logger.success("Set event loop policy to winloop successfully.")
                return True
        else:
            # Try to use uvloop on Unix-like systems
            try:
                # noinspection PyUnresolvedReferences
                import uvloop
            except ModuleNotFoundError:
                logger.debug(
                    "uvloop is not installed, but it's optional. "
                    "You can install it with `pip install ktoolbox[uvloop]`"
                )
            else:
                asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
                logger.success("Set event loop policy to uvloop successfully.")
                return True
    return False