文章スプリッター
テキストを文章、段落、またはカスタム区切り文字で分割します。セグメント数と長さの統計を表示します。
よくある質問
コード実装
import re
def split_sentences(text):
# Split on .!? followed by space+uppercase (basic sentence detection)
sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z"'])', text.strip())
return [s.strip() for s in sentences if s.strip()]
def split_paragraphs(text):
return [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]
def split_custom(text, delimiter):
return [p.strip() for p in text.split(delimiter) if p.strip()]
def stats(segments):
if not segments:
return {}
lengths = [len(s) for s in segments]
return {
"total": len(segments),
"avg_length": sum(lengths) // len(lengths),
"longest": max(lengths),
"shortest": min(lengths),
}
text = """Hello world. How are you today? I am doing well!
This is a second group of sentences. They continue here."""
sentences = split_sentences(text)
for i, s in enumerate(sentences, 1):
print(f"{i}. {s}")
print(stats(sentences))Comments & Feedback
Comments are powered by Giscus. Sign in with GitHub to leave a comment.