🛠️ToolsShed

句子分割器

将文本按句子、段落或自定义分隔符分割。显示段落数量和长度统计。

常见问题

代码实现

import re

def split_sentences(text):
    # Split on .!? followed by space+uppercase (basic sentence detection)
    sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z"'])', text.strip())
    return [s.strip() for s in sentences if s.strip()]

def split_paragraphs(text):
    return [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]

def split_custom(text, delimiter):
    return [p.strip() for p in text.split(delimiter) if p.strip()]

def stats(segments):
    if not segments:
        return {}
    lengths = [len(s) for s in segments]
    return {
        "total": len(segments),
        "avg_length": sum(lengths) // len(lengths),
        "longest": max(lengths),
        "shortest": min(lengths),
    }

text = """Hello world. How are you today? I am doing well!
This is a second group of sentences. They continue here."""

sentences = split_sentences(text)
for i, s in enumerate(sentences, 1):
    print(f"{i}. {s}")
print(stats(sentences))

Comments & Feedback

Comments are powered by Giscus. Sign in with GitHub to leave a comment.