๐Ÿ› ๏ธToolsShed

Duplicate Word Finder

Find repeated words in text with frequency counts and positions.

์ž์ฃผ ๋ฌป๋Š” ์งˆ๋ฌธ

์ฝ”๋“œ ๊ตฌํ˜„

import re
from collections import Counter

def find_duplicates(text: str, case_sensitive: bool = False) -> dict:
    """Find duplicate words and their positions in text."""
    processed = text if case_sensitive else text.lower()
    words = re.findall(r'\b[a-zA-Z]+\b', processed)
    freq = Counter(words)
    duplicates = {word: count for word, count in freq.items() if count > 1}

    # Find positions (1-based word index)
    positions = {word: [] for word in duplicates}
    for i, word in enumerate(words, 1):
        if word in positions:
            positions[word].append(i)

    return {
        'duplicates': sorted(duplicates.items(), key=lambda x: -x[1]),
        'positions': positions,
        'total_words': len(words),
        'unique_words': len(freq),
    }

text = "The cat sat on the mat and the cat was happy"
result = find_duplicates(text, case_sensitive=False)
for word, count in result['duplicates']:
    print(f"'{word}' appears {count} times at positions {result['positions'][word]}")

Comments & Feedback

Comments are powered by Giscus. Sign in with GitHub to leave a comment.