テキスト分析ツール
テキストを読み込んで単語頻度・文字数・センテンス数を分析しワードクラウドで可視化するツール。
1. アプリ概要
テキストを読み込んで単語頻度・文字数・センテンス数を分析しワードクラウドで可視化するツール。
このアプリはdataカテゴリの実践的なPythonアプリです。使用ライブラリは tkinter(標準ライブラリ)・wordcloud・matplotlib、難易度は ★★★ です。
Pythonの豊富なライブラリを活用することで、実用的なアプリを短いコードで実装できます。ソースコードをコピーして実行し、仕組みを理解したうえでカスタマイズに挑戦してみてください。
GUIアプリ開発はプログラミングの楽しさを実感できる最も効果的な学習方法のひとつです。変数・関数・クラス・イベント処理などの重要な概念が自然と身につきます。
2. 機能一覧
- テキスト分析ツールのメイン機能
- 直感的なGUIインターフェース
- 入力値のバリデーション
- エラーハンドリング
- 結果の見やすい表示
- クリア機能付き
3. 事前準備・環境
Python 3.10 以上 / Windows・Mac・Linux すべて対応
以下の環境で動作確認しています。
- Python 3.10 以上
- OS: Windows 10/11・macOS 12+・Ubuntu 20.04+
インストールが必要なライブラリ
pip install matplotlib
4. 完全なソースコード
右上の「コピー」ボタンをクリックするとコードをクリップボードにコピーできます。
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import re
import math
import threading
import collections
try:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class App080:
"""テキスト分析ツール"""
# 日本語・英語ストップワード(簡易版)
STOP_WORDS = {
"の", "に", "は", "を", "た", "が", "で", "て", "と", "し", "れ", "さ",
"ある", "いる", "も", "する", "から", "な", "こと", "として", "い", "や",
"れる", "など", "なっ", "ない", "この", "ため", "その", "あっ", "よう",
"また", "もの", "という", "あり", "まで", "られ", "なる", "へ", "か",
"だ", "これ", "によって", "により", "おり", "より", "による", "ず", "なり",
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "or", "and", "but", "not",
"this", "that", "it", "its", "i", "you", "he", "she", "we", "they",
}
def __init__(self, root):
self.root = root
self.root.title("テキスト分析ツール")
self.root.geometry("1050x660")
self.root.configure(bg="#1e1e1e")
self._text = ""
self._build_ui()
def _build_ui(self):
header = tk.Frame(self.root, bg="#252526", pady=6)
header.pack(fill=tk.X)
tk.Label(header, text="📝 テキスト分析ツール",
font=("Noto Sans JP", 12, "bold"),
bg="#252526", fg="#4fc3f7").pack(side=tk.LEFT, padx=12)
# ツールバー
tb = tk.Frame(self.root, bg="#2d2d2d", pady=4)
tb.pack(fill=tk.X)
ttk.Button(tb, text="📂 ファイルを開く",
command=self._open_file).pack(side=tk.LEFT, padx=4)
tk.Button(tb, text="▶ 分析", command=self._analyze,
bg="#1565c0", fg="white", relief=tk.FLAT,
font=("Arial", 10, "bold"), padx=12, pady=4,
activebackground="#0d47a1", bd=0).pack(side=tk.LEFT, padx=4)
ttk.Button(tb, text="🗑 クリア",
command=self._clear).pack(side=tk.LEFT, padx=4)
self.stop_var = tk.BooleanVar(value=True)
tk.Checkbutton(tb, text="ストップワード除外",
variable=self.stop_var,
bg="#2d2d2d", fg="#ccc", selectcolor="#3c3c3c",
activebackground="#2d2d2d").pack(side=tk.LEFT, padx=8)
# メインエリア (PanedWindow)
paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
paned.pack(fill=tk.BOTH, expand=True, padx=4, pady=4)
# 左: テキスト入力
left = tk.Frame(paned, bg="#1e1e1e")
paned.add(left, weight=1)
tk.Label(left, text="テキスト入力", bg="#1e1e1e", fg="#888",
font=("Arial", 9)).pack(anchor="w")
self.text_input = tk.Text(left, bg="#0d1117", fg="#c9d1d9",
font=("Arial", 10), relief=tk.FLAT,
insertbackground="white",
wrap=tk.WORD)
txsb = ttk.Scrollbar(left, command=self.text_input.yview)
self.text_input.configure(yscrollcommand=txsb.set)
txsb.pack(side=tk.RIGHT, fill=tk.Y)
self.text_input.pack(fill=tk.BOTH, expand=True)
self.text_input.bind("<<Modified>>", self._on_text_change)
# 右: 分析結果
right = tk.Frame(paned, bg="#1e1e1e")
paned.add(right, weight=1)
# 統計ノートブック
nb = ttk.Notebook(right)
nb.pack(fill=tk.BOTH, expand=True)
# タブ1: 基本統計
tab_stat = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_stat, text="基本統計")
self.stat_text = tk.Text(tab_stat, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.stat_text.pack(fill=tk.BOTH, expand=True)
# タブ2: 単語頻度
tab_freq = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_freq, text="単語頻度")
cols = ("rank", "word", "count", "pct")
self.freq_tree = ttk.Treeview(tab_freq, columns=cols,
show="headings", height=18)
self.freq_tree.heading("rank", text="#")
self.freq_tree.heading("word", text="単語")
self.freq_tree.heading("count", text="出現数")
self.freq_tree.heading("pct", text="割合")
self.freq_tree.column("rank", width=40, anchor="center")
self.freq_tree.column("word", width=140, anchor="w")
self.freq_tree.column("count", width=70, anchor="center")
self.freq_tree.column("pct", width=70, anchor="center")
fsb = ttk.Scrollbar(tab_freq, command=self.freq_tree.yview)
self.freq_tree.configure(yscrollcommand=fsb.set)
fsb.pack(side=tk.RIGHT, fill=tk.Y)
self.freq_tree.pack(fill=tk.BOTH, expand=True)
# タブ3: グラフ
tab_chart = tk.Frame(nb, bg="#0d1117")
nb.add(tab_chart, text="グラフ")
if MATPLOTLIB_AVAILABLE:
self.fig = Figure(figsize=(5, 4), facecolor="#0d1117")
self.ax = self.fig.add_subplot(111)
self.mpl_canvas = FigureCanvasTkAgg(self.fig, master=tab_chart)
self.mpl_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
else:
tk.Label(tab_chart,
text="pip install matplotlib でグラフが表示されます",
bg="#0d1117", fg="#555").pack(expand=True)
# タブ4: 文字解析
tab_char = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_char, text="文字解析")
self.char_text = tk.Text(tab_char, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.char_text.pack(fill=tk.BOTH, expand=True)
self.status_var = tk.StringVar(value="テキストを入力または読み込んでください")
tk.Label(self.root, textvariable=self.status_var,
bg="#252526", fg="#858585", font=("Arial", 9),
anchor="w", padx=8).pack(fill=tk.X, side=tk.BOTTOM)
# ── ファイル ─────────────────────────────────────────────
def _open_file(self):
path = filedialog.askopenfilename(
filetypes=[("テキスト", "*.txt *.md *.csv"), ("すべて", "*.*")])
if not path:
return
for enc in ("utf-8-sig", "utf-8", "shift-jis", "cp932", "euc-jp"):
try:
with open(path, encoding=enc) as f:
text = f.read()
break
except UnicodeDecodeError:
continue
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", text)
self.status_var.set(f"読み込み: {path}")
def _on_text_change(self, _=None):
self.text_input.edit_modified(False)
def _clear(self):
self.text_input.delete("1.0", tk.END)
for widget in (self.stat_text, self.char_text):
widget.configure(state=tk.NORMAL)
widget.delete("1.0", tk.END)
widget.configure(state=tk.DISABLED)
self.freq_tree.delete(*self.freq_tree.get_children())
# ── 分析 ─────────────────────────────────────────────────
def _analyze(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "テキストを入力してください")
return
self._text = text
self.status_var.set("分析中...")
threading.Thread(target=self._do_analyze, daemon=True).start()
def _do_analyze(self):
text = self._text
use_stop = self.stop_var.get()
# --- 基本統計 ---
chars = len(text)
chars_ns = len(text.replace(" ", "").replace("\n", ""))
lines = text.count("\n") + 1
words_raw = re.findall(r"\w+", text, re.UNICODE)
words = len(words_raw)
sents = len(re.findall(r"[。.!?!?]", text)) or 1
paras = len([p for p in text.split("\n\n") if p.strip()])
avg_wl = sum(len(w) for w in words_raw) / max(1, words)
avg_sw = words / sents
# ユニーク語数
unique = len(set(w.lower() for w in words_raw))
# 日本語文字率
jp_chars = len(re.findall(r"[\u3040-\u9fff]", text))
jp_rate = jp_chars / max(1, chars) * 100
stat_str = (
f"━━ 基本統計 ━━━━━━━━━━━━━━━━━━\n"
f" 文字数 (全体): {chars:>8,}\n"
f" 文字数 (空白除): {chars_ns:>8,}\n"
f" 行数: {lines:>8,}\n"
f" 段落数: {paras:>8,}\n"
f" 単語数: {words:>8,}\n"
f" ユニーク語数: {unique:>8,}\n"
f" 文数 (句点等): {sents:>8,}\n"
f" 平均語長: {avg_wl:>8.2f} 文字\n"
f" 平均文長: {avg_sw:>8.1f} 語/文\n"
f" 日本語文字率: {jp_rate:>7.1f}%\n"
f"\n━━ 可読性 ━━━━━━━━━━━━━━━━━━━\n"
)
# Flesch-Kincaid (英語向け簡易計算)
syllables = sum(max(1, len(re.findall(r"[aeiouAEIOU]", w)))
for w in words_raw)
fk = max(0, 206.835 - 1.015 * (words / sents)
- 84.6 * (syllables / max(1, words)))
stat_str += f" Flesch Reading Ease: {fk:.1f}\n"
# --- 単語頻度 ---
tokens = [w.lower() for w in words_raw]
if use_stop:
tokens = [t for t in tokens if t not in self.STOP_WORDS and len(t) > 1]
counter = collections.Counter(tokens)
top50 = counter.most_common(50)
total_t = sum(counter.values())
# --- 文字解析 ---
hiragana = len(re.findall(r"[\u3041-\u3096]", text))
katakana = len(re.findall(r"[\u30a1-\u30f6]", text))
kanji = len(re.findall(r"[\u4e00-\u9fff]", text))
ascii_a = len(re.findall(r"[a-zA-Z]", text))
digits = len(re.findall(r"\d", text))
punct = len(re.findall(r"[、。,.「」『』【】・…—\-!?!?]", text))
char_str = (
f"━━ 文字種別 ━━━━━━━━━━━━━━━━━\n"
f" ひらがな: {hiragana:>7,}\n"
f" カタカナ: {katakana:>7,}\n"
f" 漢字: {kanji:>7,}\n"
f" ASCII英字:{ascii_a:>7,}\n"
f" 数字: {digits:>7,}\n"
f" 句読点等: {punct:>7,}\n"
)
self.root.after(0, self._update_ui, stat_str, top50, total_t, char_str)
def _update_ui(self, stat_str, top50, total_t, char_str):
# 基本統計
self.stat_text.configure(state=tk.NORMAL)
self.stat_text.delete("1.0", tk.END)
self.stat_text.insert("1.0", stat_str)
self.stat_text.configure(state=tk.DISABLED)
# 単語頻度
self.freq_tree.delete(*self.freq_tree.get_children())
for i, (word, cnt) in enumerate(top50, 1):
pct = cnt / max(1, total_t) * 100
self.freq_tree.insert("", tk.END,
values=(i, word, cnt, f"{pct:.2f}%"))
# グラフ
if MATPLOTLIB_AVAILABLE and top50:
words = [w for w, _ in top50[:20]]
counts = [c for _, c in top50[:20]]
self.ax.clear()
self.ax.set_facecolor("#0d1117")
bars = self.ax.barh(list(reversed(words)), list(reversed(counts)),
color="#4fc3f7")
self.ax.set_title("単語頻度 Top 20", color="#c9d1d9", fontsize=10)
self.ax.tick_params(colors="#8b949e", labelsize=7)
for spine in self.ax.spines.values():
spine.set_edgecolor("#30363d")
self.ax.grid(True, axis="x", color="#21262d", linewidth=0.5)
self.fig.tight_layout()
self.mpl_canvas.draw()
# 文字解析
self.char_text.configure(state=tk.NORMAL)
self.char_text.delete("1.0", tk.END)
self.char_text.insert("1.0", char_str)
self.char_text.configure(state=tk.DISABLED)
self.status_var.set(
f"分析完了: {len(self._text):,} 文字 Top語: "
f"{top50[0][0] if top50 else '-'} ({top50[0][1] if top50 else 0} 回)")
if __name__ == "__main__":
root = tk.Tk()
app = App080(root)
root.mainloop()
5. コード解説
テキスト分析ツールのコードを詳しく解説します。クラスベースの設計で各機能を整理して実装しています。
クラス設計とコンストラクタ
App080クラスにアプリの全機能をまとめています。__init__でウィンドウ設定、_build_ui()でUI構築、process()でメイン処理を担当します。責任の分離により、コードが読みやすくなります。
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import re
import math
import threading
import collections
try:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class App080:
"""テキスト分析ツール"""
# 日本語・英語ストップワード(簡易版)
STOP_WORDS = {
"の", "に", "は", "を", "た", "が", "で", "て", "と", "し", "れ", "さ",
"ある", "いる", "も", "する", "から", "な", "こと", "として", "い", "や",
"れる", "など", "なっ", "ない", "この", "ため", "その", "あっ", "よう",
"また", "もの", "という", "あり", "まで", "られ", "なる", "へ", "か",
"だ", "これ", "によって", "により", "おり", "より", "による", "ず", "なり",
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "or", "and", "but", "not",
"this", "that", "it", "its", "i", "you", "he", "she", "we", "they",
}
def __init__(self, root):
self.root = root
self.root.title("テキスト分析ツール")
self.root.geometry("1050x660")
self.root.configure(bg="#1e1e1e")
self._text = ""
self._build_ui()
def _build_ui(self):
header = tk.Frame(self.root, bg="#252526", pady=6)
header.pack(fill=tk.X)
tk.Label(header, text="📝 テキスト分析ツール",
font=("Noto Sans JP", 12, "bold"),
bg="#252526", fg="#4fc3f7").pack(side=tk.LEFT, padx=12)
# ツールバー
tb = tk.Frame(self.root, bg="#2d2d2d", pady=4)
tb.pack(fill=tk.X)
ttk.Button(tb, text="📂 ファイルを開く",
command=self._open_file).pack(side=tk.LEFT, padx=4)
tk.Button(tb, text="▶ 分析", command=self._analyze,
bg="#1565c0", fg="white", relief=tk.FLAT,
font=("Arial", 10, "bold"), padx=12, pady=4,
activebackground="#0d47a1", bd=0).pack(side=tk.LEFT, padx=4)
ttk.Button(tb, text="🗑 クリア",
command=self._clear).pack(side=tk.LEFT, padx=4)
self.stop_var = tk.BooleanVar(value=True)
tk.Checkbutton(tb, text="ストップワード除外",
variable=self.stop_var,
bg="#2d2d2d", fg="#ccc", selectcolor="#3c3c3c",
activebackground="#2d2d2d").pack(side=tk.LEFT, padx=8)
# メインエリア (PanedWindow)
paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
paned.pack(fill=tk.BOTH, expand=True, padx=4, pady=4)
# 左: テキスト入力
left = tk.Frame(paned, bg="#1e1e1e")
paned.add(left, weight=1)
tk.Label(left, text="テキスト入力", bg="#1e1e1e", fg="#888",
font=("Arial", 9)).pack(anchor="w")
self.text_input = tk.Text(left, bg="#0d1117", fg="#c9d1d9",
font=("Arial", 10), relief=tk.FLAT,
insertbackground="white",
wrap=tk.WORD)
txsb = ttk.Scrollbar(left, command=self.text_input.yview)
self.text_input.configure(yscrollcommand=txsb.set)
txsb.pack(side=tk.RIGHT, fill=tk.Y)
self.text_input.pack(fill=tk.BOTH, expand=True)
self.text_input.bind("<<Modified>>", self._on_text_change)
# 右: 分析結果
right = tk.Frame(paned, bg="#1e1e1e")
paned.add(right, weight=1)
# 統計ノートブック
nb = ttk.Notebook(right)
nb.pack(fill=tk.BOTH, expand=True)
# タブ1: 基本統計
tab_stat = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_stat, text="基本統計")
self.stat_text = tk.Text(tab_stat, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.stat_text.pack(fill=tk.BOTH, expand=True)
# タブ2: 単語頻度
tab_freq = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_freq, text="単語頻度")
cols = ("rank", "word", "count", "pct")
self.freq_tree = ttk.Treeview(tab_freq, columns=cols,
show="headings", height=18)
self.freq_tree.heading("rank", text="#")
self.freq_tree.heading("word", text="単語")
self.freq_tree.heading("count", text="出現数")
self.freq_tree.heading("pct", text="割合")
self.freq_tree.column("rank", width=40, anchor="center")
self.freq_tree.column("word", width=140, anchor="w")
self.freq_tree.column("count", width=70, anchor="center")
self.freq_tree.column("pct", width=70, anchor="center")
fsb = ttk.Scrollbar(tab_freq, command=self.freq_tree.yview)
self.freq_tree.configure(yscrollcommand=fsb.set)
fsb.pack(side=tk.RIGHT, fill=tk.Y)
self.freq_tree.pack(fill=tk.BOTH, expand=True)
# タブ3: グラフ
tab_chart = tk.Frame(nb, bg="#0d1117")
nb.add(tab_chart, text="グラフ")
if MATPLOTLIB_AVAILABLE:
self.fig = Figure(figsize=(5, 4), facecolor="#0d1117")
self.ax = self.fig.add_subplot(111)
self.mpl_canvas = FigureCanvasTkAgg(self.fig, master=tab_chart)
self.mpl_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
else:
tk.Label(tab_chart,
text="pip install matplotlib でグラフが表示されます",
bg="#0d1117", fg="#555").pack(expand=True)
# タブ4: 文字解析
tab_char = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_char, text="文字解析")
self.char_text = tk.Text(tab_char, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.char_text.pack(fill=tk.BOTH, expand=True)
self.status_var = tk.StringVar(value="テキストを入力または読み込んでください")
tk.Label(self.root, textvariable=self.status_var,
bg="#252526", fg="#858585", font=("Arial", 9),
anchor="w", padx=8).pack(fill=tk.X, side=tk.BOTTOM)
# ── ファイル ─────────────────────────────────────────────
def _open_file(self):
path = filedialog.askopenfilename(
filetypes=[("テキスト", "*.txt *.md *.csv"), ("すべて", "*.*")])
if not path:
return
for enc in ("utf-8-sig", "utf-8", "shift-jis", "cp932", "euc-jp"):
try:
with open(path, encoding=enc) as f:
text = f.read()
break
except UnicodeDecodeError:
continue
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", text)
self.status_var.set(f"読み込み: {path}")
def _on_text_change(self, _=None):
self.text_input.edit_modified(False)
def _clear(self):
self.text_input.delete("1.0", tk.END)
for widget in (self.stat_text, self.char_text):
widget.configure(state=tk.NORMAL)
widget.delete("1.0", tk.END)
widget.configure(state=tk.DISABLED)
self.freq_tree.delete(*self.freq_tree.get_children())
# ── 分析 ─────────────────────────────────────────────────
def _analyze(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "テキストを入力してください")
return
self._text = text
self.status_var.set("分析中...")
threading.Thread(target=self._do_analyze, daemon=True).start()
def _do_analyze(self):
text = self._text
use_stop = self.stop_var.get()
# --- 基本統計 ---
chars = len(text)
chars_ns = len(text.replace(" ", "").replace("\n", ""))
lines = text.count("\n") + 1
words_raw = re.findall(r"\w+", text, re.UNICODE)
words = len(words_raw)
sents = len(re.findall(r"[。.!?!?]", text)) or 1
paras = len([p for p in text.split("\n\n") if p.strip()])
avg_wl = sum(len(w) for w in words_raw) / max(1, words)
avg_sw = words / sents
# ユニーク語数
unique = len(set(w.lower() for w in words_raw))
# 日本語文字率
jp_chars = len(re.findall(r"[\u3040-\u9fff]", text))
jp_rate = jp_chars / max(1, chars) * 100
stat_str = (
f"━━ 基本統計 ━━━━━━━━━━━━━━━━━━\n"
f" 文字数 (全体): {chars:>8,}\n"
f" 文字数 (空白除): {chars_ns:>8,}\n"
f" 行数: {lines:>8,}\n"
f" 段落数: {paras:>8,}\n"
f" 単語数: {words:>8,}\n"
f" ユニーク語数: {unique:>8,}\n"
f" 文数 (句点等): {sents:>8,}\n"
f" 平均語長: {avg_wl:>8.2f} 文字\n"
f" 平均文長: {avg_sw:>8.1f} 語/文\n"
f" 日本語文字率: {jp_rate:>7.1f}%\n"
f"\n━━ 可読性 ━━━━━━━━━━━━━━━━━━━\n"
)
# Flesch-Kincaid (英語向け簡易計算)
syllables = sum(max(1, len(re.findall(r"[aeiouAEIOU]", w)))
for w in words_raw)
fk = max(0, 206.835 - 1.015 * (words / sents)
- 84.6 * (syllables / max(1, words)))
stat_str += f" Flesch Reading Ease: {fk:.1f}\n"
# --- 単語頻度 ---
tokens = [w.lower() for w in words_raw]
if use_stop:
tokens = [t for t in tokens if t not in self.STOP_WORDS and len(t) > 1]
counter = collections.Counter(tokens)
top50 = counter.most_common(50)
total_t = sum(counter.values())
# --- 文字解析 ---
hiragana = len(re.findall(r"[\u3041-\u3096]", text))
katakana = len(re.findall(r"[\u30a1-\u30f6]", text))
kanji = len(re.findall(r"[\u4e00-\u9fff]", text))
ascii_a = len(re.findall(r"[a-zA-Z]", text))
digits = len(re.findall(r"\d", text))
punct = len(re.findall(r"[、。,.「」『』【】・…—\-!?!?]", text))
char_str = (
f"━━ 文字種別 ━━━━━━━━━━━━━━━━━\n"
f" ひらがな: {hiragana:>7,}\n"
f" カタカナ: {katakana:>7,}\n"
f" 漢字: {kanji:>7,}\n"
f" ASCII英字:{ascii_a:>7,}\n"
f" 数字: {digits:>7,}\n"
f" 句読点等: {punct:>7,}\n"
)
self.root.after(0, self._update_ui, stat_str, top50, total_t, char_str)
def _update_ui(self, stat_str, top50, total_t, char_str):
# 基本統計
self.stat_text.configure(state=tk.NORMAL)
self.stat_text.delete("1.0", tk.END)
self.stat_text.insert("1.0", stat_str)
self.stat_text.configure(state=tk.DISABLED)
# 単語頻度
self.freq_tree.delete(*self.freq_tree.get_children())
for i, (word, cnt) in enumerate(top50, 1):
pct = cnt / max(1, total_t) * 100
self.freq_tree.insert("", tk.END,
values=(i, word, cnt, f"{pct:.2f}%"))
# グラフ
if MATPLOTLIB_AVAILABLE and top50:
words = [w for w, _ in top50[:20]]
counts = [c for _, c in top50[:20]]
self.ax.clear()
self.ax.set_facecolor("#0d1117")
bars = self.ax.barh(list(reversed(words)), list(reversed(counts)),
color="#4fc3f7")
self.ax.set_title("単語頻度 Top 20", color="#c9d1d9", fontsize=10)
self.ax.tick_params(colors="#8b949e", labelsize=7)
for spine in self.ax.spines.values():
spine.set_edgecolor("#30363d")
self.ax.grid(True, axis="x", color="#21262d", linewidth=0.5)
self.fig.tight_layout()
self.mpl_canvas.draw()
# 文字解析
self.char_text.configure(state=tk.NORMAL)
self.char_text.delete("1.0", tk.END)
self.char_text.insert("1.0", char_str)
self.char_text.configure(state=tk.DISABLED)
self.status_var.set(
f"分析完了: {len(self._text):,} 文字 Top語: "
f"{top50[0][0] if top50 else '-'} ({top50[0][1] if top50 else 0} 回)")
if __name__ == "__main__":
root = tk.Tk()
app = App080(root)
root.mainloop()
UIレイアウトの構築
LabelFrameで入力エリアと結果エリアを視覚的に分けています。pack()で縦に並べ、expand=Trueで結果エリアが画面いっぱいに広がるよう設定しています。
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import re
import math
import threading
import collections
try:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class App080:
"""テキスト分析ツール"""
# 日本語・英語ストップワード(簡易版)
STOP_WORDS = {
"の", "に", "は", "を", "た", "が", "で", "て", "と", "し", "れ", "さ",
"ある", "いる", "も", "する", "から", "な", "こと", "として", "い", "や",
"れる", "など", "なっ", "ない", "この", "ため", "その", "あっ", "よう",
"また", "もの", "という", "あり", "まで", "られ", "なる", "へ", "か",
"だ", "これ", "によって", "により", "おり", "より", "による", "ず", "なり",
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "or", "and", "but", "not",
"this", "that", "it", "its", "i", "you", "he", "she", "we", "they",
}
def __init__(self, root):
self.root = root
self.root.title("テキスト分析ツール")
self.root.geometry("1050x660")
self.root.configure(bg="#1e1e1e")
self._text = ""
self._build_ui()
def _build_ui(self):
header = tk.Frame(self.root, bg="#252526", pady=6)
header.pack(fill=tk.X)
tk.Label(header, text="📝 テキスト分析ツール",
font=("Noto Sans JP", 12, "bold"),
bg="#252526", fg="#4fc3f7").pack(side=tk.LEFT, padx=12)
# ツールバー
tb = tk.Frame(self.root, bg="#2d2d2d", pady=4)
tb.pack(fill=tk.X)
ttk.Button(tb, text="📂 ファイルを開く",
command=self._open_file).pack(side=tk.LEFT, padx=4)
tk.Button(tb, text="▶ 分析", command=self._analyze,
bg="#1565c0", fg="white", relief=tk.FLAT,
font=("Arial", 10, "bold"), padx=12, pady=4,
activebackground="#0d47a1", bd=0).pack(side=tk.LEFT, padx=4)
ttk.Button(tb, text="🗑 クリア",
command=self._clear).pack(side=tk.LEFT, padx=4)
self.stop_var = tk.BooleanVar(value=True)
tk.Checkbutton(tb, text="ストップワード除外",
variable=self.stop_var,
bg="#2d2d2d", fg="#ccc", selectcolor="#3c3c3c",
activebackground="#2d2d2d").pack(side=tk.LEFT, padx=8)
# メインエリア (PanedWindow)
paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
paned.pack(fill=tk.BOTH, expand=True, padx=4, pady=4)
# 左: テキスト入力
left = tk.Frame(paned, bg="#1e1e1e")
paned.add(left, weight=1)
tk.Label(left, text="テキスト入力", bg="#1e1e1e", fg="#888",
font=("Arial", 9)).pack(anchor="w")
self.text_input = tk.Text(left, bg="#0d1117", fg="#c9d1d9",
font=("Arial", 10), relief=tk.FLAT,
insertbackground="white",
wrap=tk.WORD)
txsb = ttk.Scrollbar(left, command=self.text_input.yview)
self.text_input.configure(yscrollcommand=txsb.set)
txsb.pack(side=tk.RIGHT, fill=tk.Y)
self.text_input.pack(fill=tk.BOTH, expand=True)
self.text_input.bind("<<Modified>>", self._on_text_change)
# 右: 分析結果
right = tk.Frame(paned, bg="#1e1e1e")
paned.add(right, weight=1)
# 統計ノートブック
nb = ttk.Notebook(right)
nb.pack(fill=tk.BOTH, expand=True)
# タブ1: 基本統計
tab_stat = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_stat, text="基本統計")
self.stat_text = tk.Text(tab_stat, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.stat_text.pack(fill=tk.BOTH, expand=True)
# タブ2: 単語頻度
tab_freq = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_freq, text="単語頻度")
cols = ("rank", "word", "count", "pct")
self.freq_tree = ttk.Treeview(tab_freq, columns=cols,
show="headings", height=18)
self.freq_tree.heading("rank", text="#")
self.freq_tree.heading("word", text="単語")
self.freq_tree.heading("count", text="出現数")
self.freq_tree.heading("pct", text="割合")
self.freq_tree.column("rank", width=40, anchor="center")
self.freq_tree.column("word", width=140, anchor="w")
self.freq_tree.column("count", width=70, anchor="center")
self.freq_tree.column("pct", width=70, anchor="center")
fsb = ttk.Scrollbar(tab_freq, command=self.freq_tree.yview)
self.freq_tree.configure(yscrollcommand=fsb.set)
fsb.pack(side=tk.RIGHT, fill=tk.Y)
self.freq_tree.pack(fill=tk.BOTH, expand=True)
# タブ3: グラフ
tab_chart = tk.Frame(nb, bg="#0d1117")
nb.add(tab_chart, text="グラフ")
if MATPLOTLIB_AVAILABLE:
self.fig = Figure(figsize=(5, 4), facecolor="#0d1117")
self.ax = self.fig.add_subplot(111)
self.mpl_canvas = FigureCanvasTkAgg(self.fig, master=tab_chart)
self.mpl_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
else:
tk.Label(tab_chart,
text="pip install matplotlib でグラフが表示されます",
bg="#0d1117", fg="#555").pack(expand=True)
# タブ4: 文字解析
tab_char = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_char, text="文字解析")
self.char_text = tk.Text(tab_char, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.char_text.pack(fill=tk.BOTH, expand=True)
self.status_var = tk.StringVar(value="テキストを入力または読み込んでください")
tk.Label(self.root, textvariable=self.status_var,
bg="#252526", fg="#858585", font=("Arial", 9),
anchor="w", padx=8).pack(fill=tk.X, side=tk.BOTTOM)
# ── ファイル ─────────────────────────────────────────────
def _open_file(self):
path = filedialog.askopenfilename(
filetypes=[("テキスト", "*.txt *.md *.csv"), ("すべて", "*.*")])
if not path:
return
for enc in ("utf-8-sig", "utf-8", "shift-jis", "cp932", "euc-jp"):
try:
with open(path, encoding=enc) as f:
text = f.read()
break
except UnicodeDecodeError:
continue
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", text)
self.status_var.set(f"読み込み: {path}")
def _on_text_change(self, _=None):
self.text_input.edit_modified(False)
def _clear(self):
self.text_input.delete("1.0", tk.END)
for widget in (self.stat_text, self.char_text):
widget.configure(state=tk.NORMAL)
widget.delete("1.0", tk.END)
widget.configure(state=tk.DISABLED)
self.freq_tree.delete(*self.freq_tree.get_children())
# ── 分析 ─────────────────────────────────────────────────
def _analyze(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "テキストを入力してください")
return
self._text = text
self.status_var.set("分析中...")
threading.Thread(target=self._do_analyze, daemon=True).start()
def _do_analyze(self):
text = self._text
use_stop = self.stop_var.get()
# --- 基本統計 ---
chars = len(text)
chars_ns = len(text.replace(" ", "").replace("\n", ""))
lines = text.count("\n") + 1
words_raw = re.findall(r"\w+", text, re.UNICODE)
words = len(words_raw)
sents = len(re.findall(r"[。.!?!?]", text)) or 1
paras = len([p for p in text.split("\n\n") if p.strip()])
avg_wl = sum(len(w) for w in words_raw) / max(1, words)
avg_sw = words / sents
# ユニーク語数
unique = len(set(w.lower() for w in words_raw))
# 日本語文字率
jp_chars = len(re.findall(r"[\u3040-\u9fff]", text))
jp_rate = jp_chars / max(1, chars) * 100
stat_str = (
f"━━ 基本統計 ━━━━━━━━━━━━━━━━━━\n"
f" 文字数 (全体): {chars:>8,}\n"
f" 文字数 (空白除): {chars_ns:>8,}\n"
f" 行数: {lines:>8,}\n"
f" 段落数: {paras:>8,}\n"
f" 単語数: {words:>8,}\n"
f" ユニーク語数: {unique:>8,}\n"
f" 文数 (句点等): {sents:>8,}\n"
f" 平均語長: {avg_wl:>8.2f} 文字\n"
f" 平均文長: {avg_sw:>8.1f} 語/文\n"
f" 日本語文字率: {jp_rate:>7.1f}%\n"
f"\n━━ 可読性 ━━━━━━━━━━━━━━━━━━━\n"
)
# Flesch-Kincaid (英語向け簡易計算)
syllables = sum(max(1, len(re.findall(r"[aeiouAEIOU]", w)))
for w in words_raw)
fk = max(0, 206.835 - 1.015 * (words / sents)
- 84.6 * (syllables / max(1, words)))
stat_str += f" Flesch Reading Ease: {fk:.1f}\n"
# --- 単語頻度 ---
tokens = [w.lower() for w in words_raw]
if use_stop:
tokens = [t for t in tokens if t not in self.STOP_WORDS and len(t) > 1]
counter = collections.Counter(tokens)
top50 = counter.most_common(50)
total_t = sum(counter.values())
# --- 文字解析 ---
hiragana = len(re.findall(r"[\u3041-\u3096]", text))
katakana = len(re.findall(r"[\u30a1-\u30f6]", text))
kanji = len(re.findall(r"[\u4e00-\u9fff]", text))
ascii_a = len(re.findall(r"[a-zA-Z]", text))
digits = len(re.findall(r"\d", text))
punct = len(re.findall(r"[、。,.「」『』【】・…—\-!?!?]", text))
char_str = (
f"━━ 文字種別 ━━━━━━━━━━━━━━━━━\n"
f" ひらがな: {hiragana:>7,}\n"
f" カタカナ: {katakana:>7,}\n"
f" 漢字: {kanji:>7,}\n"
f" ASCII英字:{ascii_a:>7,}\n"
f" 数字: {digits:>7,}\n"
f" 句読点等: {punct:>7,}\n"
)
self.root.after(0, self._update_ui, stat_str, top50, total_t, char_str)
def _update_ui(self, stat_str, top50, total_t, char_str):
# 基本統計
self.stat_text.configure(state=tk.NORMAL)
self.stat_text.delete("1.0", tk.END)
self.stat_text.insert("1.0", stat_str)
self.stat_text.configure(state=tk.DISABLED)
# 単語頻度
self.freq_tree.delete(*self.freq_tree.get_children())
for i, (word, cnt) in enumerate(top50, 1):
pct = cnt / max(1, total_t) * 100
self.freq_tree.insert("", tk.END,
values=(i, word, cnt, f"{pct:.2f}%"))
# グラフ
if MATPLOTLIB_AVAILABLE and top50:
words = [w for w, _ in top50[:20]]
counts = [c for _, c in top50[:20]]
self.ax.clear()
self.ax.set_facecolor("#0d1117")
bars = self.ax.barh(list(reversed(words)), list(reversed(counts)),
color="#4fc3f7")
self.ax.set_title("単語頻度 Top 20", color="#c9d1d9", fontsize=10)
self.ax.tick_params(colors="#8b949e", labelsize=7)
for spine in self.ax.spines.values():
spine.set_edgecolor("#30363d")
self.ax.grid(True, axis="x", color="#21262d", linewidth=0.5)
self.fig.tight_layout()
self.mpl_canvas.draw()
# 文字解析
self.char_text.configure(state=tk.NORMAL)
self.char_text.delete("1.0", tk.END)
self.char_text.insert("1.0", char_str)
self.char_text.configure(state=tk.DISABLED)
self.status_var.set(
f"分析完了: {len(self._text):,} 文字 Top語: "
f"{top50[0][0] if top50 else '-'} ({top50[0][1] if top50 else 0} 回)")
if __name__ == "__main__":
root = tk.Tk()
app = App080(root)
root.mainloop()
イベント処理
ボタンのcommand引数でクリックイベントを、bind('
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import re
import math
import threading
import collections
try:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class App080:
"""テキスト分析ツール"""
# 日本語・英語ストップワード(簡易版)
STOP_WORDS = {
"の", "に", "は", "を", "た", "が", "で", "て", "と", "し", "れ", "さ",
"ある", "いる", "も", "する", "から", "な", "こと", "として", "い", "や",
"れる", "など", "なっ", "ない", "この", "ため", "その", "あっ", "よう",
"また", "もの", "という", "あり", "まで", "られ", "なる", "へ", "か",
"だ", "これ", "によって", "により", "おり", "より", "による", "ず", "なり",
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "or", "and", "but", "not",
"this", "that", "it", "its", "i", "you", "he", "she", "we", "they",
}
def __init__(self, root):
self.root = root
self.root.title("テキスト分析ツール")
self.root.geometry("1050x660")
self.root.configure(bg="#1e1e1e")
self._text = ""
self._build_ui()
def _build_ui(self):
header = tk.Frame(self.root, bg="#252526", pady=6)
header.pack(fill=tk.X)
tk.Label(header, text="📝 テキスト分析ツール",
font=("Noto Sans JP", 12, "bold"),
bg="#252526", fg="#4fc3f7").pack(side=tk.LEFT, padx=12)
# ツールバー
tb = tk.Frame(self.root, bg="#2d2d2d", pady=4)
tb.pack(fill=tk.X)
ttk.Button(tb, text="📂 ファイルを開く",
command=self._open_file).pack(side=tk.LEFT, padx=4)
tk.Button(tb, text="▶ 分析", command=self._analyze,
bg="#1565c0", fg="white", relief=tk.FLAT,
font=("Arial", 10, "bold"), padx=12, pady=4,
activebackground="#0d47a1", bd=0).pack(side=tk.LEFT, padx=4)
ttk.Button(tb, text="🗑 クリア",
command=self._clear).pack(side=tk.LEFT, padx=4)
self.stop_var = tk.BooleanVar(value=True)
tk.Checkbutton(tb, text="ストップワード除外",
variable=self.stop_var,
bg="#2d2d2d", fg="#ccc", selectcolor="#3c3c3c",
activebackground="#2d2d2d").pack(side=tk.LEFT, padx=8)
# メインエリア (PanedWindow)
paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
paned.pack(fill=tk.BOTH, expand=True, padx=4, pady=4)
# 左: テキスト入力
left = tk.Frame(paned, bg="#1e1e1e")
paned.add(left, weight=1)
tk.Label(left, text="テキスト入力", bg="#1e1e1e", fg="#888",
font=("Arial", 9)).pack(anchor="w")
self.text_input = tk.Text(left, bg="#0d1117", fg="#c9d1d9",
font=("Arial", 10), relief=tk.FLAT,
insertbackground="white",
wrap=tk.WORD)
txsb = ttk.Scrollbar(left, command=self.text_input.yview)
self.text_input.configure(yscrollcommand=txsb.set)
txsb.pack(side=tk.RIGHT, fill=tk.Y)
self.text_input.pack(fill=tk.BOTH, expand=True)
self.text_input.bind("<<Modified>>", self._on_text_change)
# 右: 分析結果
right = tk.Frame(paned, bg="#1e1e1e")
paned.add(right, weight=1)
# 統計ノートブック
nb = ttk.Notebook(right)
nb.pack(fill=tk.BOTH, expand=True)
# タブ1: 基本統計
tab_stat = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_stat, text="基本統計")
self.stat_text = tk.Text(tab_stat, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.stat_text.pack(fill=tk.BOTH, expand=True)
# タブ2: 単語頻度
tab_freq = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_freq, text="単語頻度")
cols = ("rank", "word", "count", "pct")
self.freq_tree = ttk.Treeview(tab_freq, columns=cols,
show="headings", height=18)
self.freq_tree.heading("rank", text="#")
self.freq_tree.heading("word", text="単語")
self.freq_tree.heading("count", text="出現数")
self.freq_tree.heading("pct", text="割合")
self.freq_tree.column("rank", width=40, anchor="center")
self.freq_tree.column("word", width=140, anchor="w")
self.freq_tree.column("count", width=70, anchor="center")
self.freq_tree.column("pct", width=70, anchor="center")
fsb = ttk.Scrollbar(tab_freq, command=self.freq_tree.yview)
self.freq_tree.configure(yscrollcommand=fsb.set)
fsb.pack(side=tk.RIGHT, fill=tk.Y)
self.freq_tree.pack(fill=tk.BOTH, expand=True)
# タブ3: グラフ
tab_chart = tk.Frame(nb, bg="#0d1117")
nb.add(tab_chart, text="グラフ")
if MATPLOTLIB_AVAILABLE:
self.fig = Figure(figsize=(5, 4), facecolor="#0d1117")
self.ax = self.fig.add_subplot(111)
self.mpl_canvas = FigureCanvasTkAgg(self.fig, master=tab_chart)
self.mpl_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
else:
tk.Label(tab_chart,
text="pip install matplotlib でグラフが表示されます",
bg="#0d1117", fg="#555").pack(expand=True)
# タブ4: 文字解析
tab_char = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_char, text="文字解析")
self.char_text = tk.Text(tab_char, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.char_text.pack(fill=tk.BOTH, expand=True)
self.status_var = tk.StringVar(value="テキストを入力または読み込んでください")
tk.Label(self.root, textvariable=self.status_var,
bg="#252526", fg="#858585", font=("Arial", 9),
anchor="w", padx=8).pack(fill=tk.X, side=tk.BOTTOM)
# ── ファイル ─────────────────────────────────────────────
def _open_file(self):
path = filedialog.askopenfilename(
filetypes=[("テキスト", "*.txt *.md *.csv"), ("すべて", "*.*")])
if not path:
return
for enc in ("utf-8-sig", "utf-8", "shift-jis", "cp932", "euc-jp"):
try:
with open(path, encoding=enc) as f:
text = f.read()
break
except UnicodeDecodeError:
continue
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", text)
self.status_var.set(f"読み込み: {path}")
def _on_text_change(self, _=None):
self.text_input.edit_modified(False)
def _clear(self):
self.text_input.delete("1.0", tk.END)
for widget in (self.stat_text, self.char_text):
widget.configure(state=tk.NORMAL)
widget.delete("1.0", tk.END)
widget.configure(state=tk.DISABLED)
self.freq_tree.delete(*self.freq_tree.get_children())
# ── 分析 ─────────────────────────────────────────────────
def _analyze(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "テキストを入力してください")
return
self._text = text
self.status_var.set("分析中...")
threading.Thread(target=self._do_analyze, daemon=True).start()
def _do_analyze(self):
text = self._text
use_stop = self.stop_var.get()
# --- 基本統計 ---
chars = len(text)
chars_ns = len(text.replace(" ", "").replace("\n", ""))
lines = text.count("\n") + 1
words_raw = re.findall(r"\w+", text, re.UNICODE)
words = len(words_raw)
sents = len(re.findall(r"[。.!?!?]", text)) or 1
paras = len([p for p in text.split("\n\n") if p.strip()])
avg_wl = sum(len(w) for w in words_raw) / max(1, words)
avg_sw = words / sents
# ユニーク語数
unique = len(set(w.lower() for w in words_raw))
# 日本語文字率
jp_chars = len(re.findall(r"[\u3040-\u9fff]", text))
jp_rate = jp_chars / max(1, chars) * 100
stat_str = (
f"━━ 基本統計 ━━━━━━━━━━━━━━━━━━\n"
f" 文字数 (全体): {chars:>8,}\n"
f" 文字数 (空白除): {chars_ns:>8,}\n"
f" 行数: {lines:>8,}\n"
f" 段落数: {paras:>8,}\n"
f" 単語数: {words:>8,}\n"
f" ユニーク語数: {unique:>8,}\n"
f" 文数 (句点等): {sents:>8,}\n"
f" 平均語長: {avg_wl:>8.2f} 文字\n"
f" 平均文長: {avg_sw:>8.1f} 語/文\n"
f" 日本語文字率: {jp_rate:>7.1f}%\n"
f"\n━━ 可読性 ━━━━━━━━━━━━━━━━━━━\n"
)
# Flesch-Kincaid (英語向け簡易計算)
syllables = sum(max(1, len(re.findall(r"[aeiouAEIOU]", w)))
for w in words_raw)
fk = max(0, 206.835 - 1.015 * (words / sents)
- 84.6 * (syllables / max(1, words)))
stat_str += f" Flesch Reading Ease: {fk:.1f}\n"
# --- 単語頻度 ---
tokens = [w.lower() for w in words_raw]
if use_stop:
tokens = [t for t in tokens if t not in self.STOP_WORDS and len(t) > 1]
counter = collections.Counter(tokens)
top50 = counter.most_common(50)
total_t = sum(counter.values())
# --- 文字解析 ---
hiragana = len(re.findall(r"[\u3041-\u3096]", text))
katakana = len(re.findall(r"[\u30a1-\u30f6]", text))
kanji = len(re.findall(r"[\u4e00-\u9fff]", text))
ascii_a = len(re.findall(r"[a-zA-Z]", text))
digits = len(re.findall(r"\d", text))
punct = len(re.findall(r"[、。,.「」『』【】・…—\-!?!?]", text))
char_str = (
f"━━ 文字種別 ━━━━━━━━━━━━━━━━━\n"
f" ひらがな: {hiragana:>7,}\n"
f" カタカナ: {katakana:>7,}\n"
f" 漢字: {kanji:>7,}\n"
f" ASCII英字:{ascii_a:>7,}\n"
f" 数字: {digits:>7,}\n"
f" 句読点等: {punct:>7,}\n"
)
self.root.after(0, self._update_ui, stat_str, top50, total_t, char_str)
def _update_ui(self, stat_str, top50, total_t, char_str):
# 基本統計
self.stat_text.configure(state=tk.NORMAL)
self.stat_text.delete("1.0", tk.END)
self.stat_text.insert("1.0", stat_str)
self.stat_text.configure(state=tk.DISABLED)
# 単語頻度
self.freq_tree.delete(*self.freq_tree.get_children())
for i, (word, cnt) in enumerate(top50, 1):
pct = cnt / max(1, total_t) * 100
self.freq_tree.insert("", tk.END,
values=(i, word, cnt, f"{pct:.2f}%"))
# グラフ
if MATPLOTLIB_AVAILABLE and top50:
words = [w for w, _ in top50[:20]]
counts = [c for _, c in top50[:20]]
self.ax.clear()
self.ax.set_facecolor("#0d1117")
bars = self.ax.barh(list(reversed(words)), list(reversed(counts)),
color="#4fc3f7")
self.ax.set_title("単語頻度 Top 20", color="#c9d1d9", fontsize=10)
self.ax.tick_params(colors="#8b949e", labelsize=7)
for spine in self.ax.spines.values():
spine.set_edgecolor("#30363d")
self.ax.grid(True, axis="x", color="#21262d", linewidth=0.5)
self.fig.tight_layout()
self.mpl_canvas.draw()
# 文字解析
self.char_text.configure(state=tk.NORMAL)
self.char_text.delete("1.0", tk.END)
self.char_text.insert("1.0", char_str)
self.char_text.configure(state=tk.DISABLED)
self.status_var.set(
f"分析完了: {len(self._text):,} 文字 Top語: "
f"{top50[0][0] if top50 else '-'} ({top50[0][1] if top50 else 0} 回)")
if __name__ == "__main__":
root = tk.Tk()
app = App080(root)
root.mainloop()
Textウィジェットでの結果表示
tk.Textウィジェットをstate=DISABLED(読み取り専用)で作成し、更新時はNORMALに変更してinsert()で内容を書き込み、再びDISABLEDに戻します。
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import re
import math
import threading
import collections
try:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class App080:
"""テキスト分析ツール"""
# 日本語・英語ストップワード(簡易版)
STOP_WORDS = {
"の", "に", "は", "を", "た", "が", "で", "て", "と", "し", "れ", "さ",
"ある", "いる", "も", "する", "から", "な", "こと", "として", "い", "や",
"れる", "など", "なっ", "ない", "この", "ため", "その", "あっ", "よう",
"また", "もの", "という", "あり", "まで", "られ", "なる", "へ", "か",
"だ", "これ", "によって", "により", "おり", "より", "による", "ず", "なり",
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "or", "and", "but", "not",
"this", "that", "it", "its", "i", "you", "he", "she", "we", "they",
}
def __init__(self, root):
self.root = root
self.root.title("テキスト分析ツール")
self.root.geometry("1050x660")
self.root.configure(bg="#1e1e1e")
self._text = ""
self._build_ui()
def _build_ui(self):
header = tk.Frame(self.root, bg="#252526", pady=6)
header.pack(fill=tk.X)
tk.Label(header, text="📝 テキスト分析ツール",
font=("Noto Sans JP", 12, "bold"),
bg="#252526", fg="#4fc3f7").pack(side=tk.LEFT, padx=12)
# ツールバー
tb = tk.Frame(self.root, bg="#2d2d2d", pady=4)
tb.pack(fill=tk.X)
ttk.Button(tb, text="📂 ファイルを開く",
command=self._open_file).pack(side=tk.LEFT, padx=4)
tk.Button(tb, text="▶ 分析", command=self._analyze,
bg="#1565c0", fg="white", relief=tk.FLAT,
font=("Arial", 10, "bold"), padx=12, pady=4,
activebackground="#0d47a1", bd=0).pack(side=tk.LEFT, padx=4)
ttk.Button(tb, text="🗑 クリア",
command=self._clear).pack(side=tk.LEFT, padx=4)
self.stop_var = tk.BooleanVar(value=True)
tk.Checkbutton(tb, text="ストップワード除外",
variable=self.stop_var,
bg="#2d2d2d", fg="#ccc", selectcolor="#3c3c3c",
activebackground="#2d2d2d").pack(side=tk.LEFT, padx=8)
# メインエリア (PanedWindow)
paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
paned.pack(fill=tk.BOTH, expand=True, padx=4, pady=4)
# 左: テキスト入力
left = tk.Frame(paned, bg="#1e1e1e")
paned.add(left, weight=1)
tk.Label(left, text="テキスト入力", bg="#1e1e1e", fg="#888",
font=("Arial", 9)).pack(anchor="w")
self.text_input = tk.Text(left, bg="#0d1117", fg="#c9d1d9",
font=("Arial", 10), relief=tk.FLAT,
insertbackground="white",
wrap=tk.WORD)
txsb = ttk.Scrollbar(left, command=self.text_input.yview)
self.text_input.configure(yscrollcommand=txsb.set)
txsb.pack(side=tk.RIGHT, fill=tk.Y)
self.text_input.pack(fill=tk.BOTH, expand=True)
self.text_input.bind("<<Modified>>", self._on_text_change)
# 右: 分析結果
right = tk.Frame(paned, bg="#1e1e1e")
paned.add(right, weight=1)
# 統計ノートブック
nb = ttk.Notebook(right)
nb.pack(fill=tk.BOTH, expand=True)
# タブ1: 基本統計
tab_stat = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_stat, text="基本統計")
self.stat_text = tk.Text(tab_stat, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.stat_text.pack(fill=tk.BOTH, expand=True)
# タブ2: 単語頻度
tab_freq = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_freq, text="単語頻度")
cols = ("rank", "word", "count", "pct")
self.freq_tree = ttk.Treeview(tab_freq, columns=cols,
show="headings", height=18)
self.freq_tree.heading("rank", text="#")
self.freq_tree.heading("word", text="単語")
self.freq_tree.heading("count", text="出現数")
self.freq_tree.heading("pct", text="割合")
self.freq_tree.column("rank", width=40, anchor="center")
self.freq_tree.column("word", width=140, anchor="w")
self.freq_tree.column("count", width=70, anchor="center")
self.freq_tree.column("pct", width=70, anchor="center")
fsb = ttk.Scrollbar(tab_freq, command=self.freq_tree.yview)
self.freq_tree.configure(yscrollcommand=fsb.set)
fsb.pack(side=tk.RIGHT, fill=tk.Y)
self.freq_tree.pack(fill=tk.BOTH, expand=True)
# タブ3: グラフ
tab_chart = tk.Frame(nb, bg="#0d1117")
nb.add(tab_chart, text="グラフ")
if MATPLOTLIB_AVAILABLE:
self.fig = Figure(figsize=(5, 4), facecolor="#0d1117")
self.ax = self.fig.add_subplot(111)
self.mpl_canvas = FigureCanvasTkAgg(self.fig, master=tab_chart)
self.mpl_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
else:
tk.Label(tab_chart,
text="pip install matplotlib でグラフが表示されます",
bg="#0d1117", fg="#555").pack(expand=True)
# タブ4: 文字解析
tab_char = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_char, text="文字解析")
self.char_text = tk.Text(tab_char, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.char_text.pack(fill=tk.BOTH, expand=True)
self.status_var = tk.StringVar(value="テキストを入力または読み込んでください")
tk.Label(self.root, textvariable=self.status_var,
bg="#252526", fg="#858585", font=("Arial", 9),
anchor="w", padx=8).pack(fill=tk.X, side=tk.BOTTOM)
# ── ファイル ─────────────────────────────────────────────
def _open_file(self):
path = filedialog.askopenfilename(
filetypes=[("テキスト", "*.txt *.md *.csv"), ("すべて", "*.*")])
if not path:
return
for enc in ("utf-8-sig", "utf-8", "shift-jis", "cp932", "euc-jp"):
try:
with open(path, encoding=enc) as f:
text = f.read()
break
except UnicodeDecodeError:
continue
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", text)
self.status_var.set(f"読み込み: {path}")
def _on_text_change(self, _=None):
self.text_input.edit_modified(False)
def _clear(self):
self.text_input.delete("1.0", tk.END)
for widget in (self.stat_text, self.char_text):
widget.configure(state=tk.NORMAL)
widget.delete("1.0", tk.END)
widget.configure(state=tk.DISABLED)
self.freq_tree.delete(*self.freq_tree.get_children())
# ── 分析 ─────────────────────────────────────────────────
def _analyze(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "テキストを入力してください")
return
self._text = text
self.status_var.set("分析中...")
threading.Thread(target=self._do_analyze, daemon=True).start()
def _do_analyze(self):
text = self._text
use_stop = self.stop_var.get()
# --- 基本統計 ---
chars = len(text)
chars_ns = len(text.replace(" ", "").replace("\n", ""))
lines = text.count("\n") + 1
words_raw = re.findall(r"\w+", text, re.UNICODE)
words = len(words_raw)
sents = len(re.findall(r"[。.!?!?]", text)) or 1
paras = len([p for p in text.split("\n\n") if p.strip()])
avg_wl = sum(len(w) for w in words_raw) / max(1, words)
avg_sw = words / sents
# ユニーク語数
unique = len(set(w.lower() for w in words_raw))
# 日本語文字率
jp_chars = len(re.findall(r"[\u3040-\u9fff]", text))
jp_rate = jp_chars / max(1, chars) * 100
stat_str = (
f"━━ 基本統計 ━━━━━━━━━━━━━━━━━━\n"
f" 文字数 (全体): {chars:>8,}\n"
f" 文字数 (空白除): {chars_ns:>8,}\n"
f" 行数: {lines:>8,}\n"
f" 段落数: {paras:>8,}\n"
f" 単語数: {words:>8,}\n"
f" ユニーク語数: {unique:>8,}\n"
f" 文数 (句点等): {sents:>8,}\n"
f" 平均語長: {avg_wl:>8.2f} 文字\n"
f" 平均文長: {avg_sw:>8.1f} 語/文\n"
f" 日本語文字率: {jp_rate:>7.1f}%\n"
f"\n━━ 可読性 ━━━━━━━━━━━━━━━━━━━\n"
)
# Flesch-Kincaid (英語向け簡易計算)
syllables = sum(max(1, len(re.findall(r"[aeiouAEIOU]", w)))
for w in words_raw)
fk = max(0, 206.835 - 1.015 * (words / sents)
- 84.6 * (syllables / max(1, words)))
stat_str += f" Flesch Reading Ease: {fk:.1f}\n"
# --- 単語頻度 ---
tokens = [w.lower() for w in words_raw]
if use_stop:
tokens = [t for t in tokens if t not in self.STOP_WORDS and len(t) > 1]
counter = collections.Counter(tokens)
top50 = counter.most_common(50)
total_t = sum(counter.values())
# --- 文字解析 ---
hiragana = len(re.findall(r"[\u3041-\u3096]", text))
katakana = len(re.findall(r"[\u30a1-\u30f6]", text))
kanji = len(re.findall(r"[\u4e00-\u9fff]", text))
ascii_a = len(re.findall(r"[a-zA-Z]", text))
digits = len(re.findall(r"\d", text))
punct = len(re.findall(r"[、。,.「」『』【】・…—\-!?!?]", text))
char_str = (
f"━━ 文字種別 ━━━━━━━━━━━━━━━━━\n"
f" ひらがな: {hiragana:>7,}\n"
f" カタカナ: {katakana:>7,}\n"
f" 漢字: {kanji:>7,}\n"
f" ASCII英字:{ascii_a:>7,}\n"
f" 数字: {digits:>7,}\n"
f" 句読点等: {punct:>7,}\n"
)
self.root.after(0, self._update_ui, stat_str, top50, total_t, char_str)
def _update_ui(self, stat_str, top50, total_t, char_str):
# 基本統計
self.stat_text.configure(state=tk.NORMAL)
self.stat_text.delete("1.0", tk.END)
self.stat_text.insert("1.0", stat_str)
self.stat_text.configure(state=tk.DISABLED)
# 単語頻度
self.freq_tree.delete(*self.freq_tree.get_children())
for i, (word, cnt) in enumerate(top50, 1):
pct = cnt / max(1, total_t) * 100
self.freq_tree.insert("", tk.END,
values=(i, word, cnt, f"{pct:.2f}%"))
# グラフ
if MATPLOTLIB_AVAILABLE and top50:
words = [w for w, _ in top50[:20]]
counts = [c for _, c in top50[:20]]
self.ax.clear()
self.ax.set_facecolor("#0d1117")
bars = self.ax.barh(list(reversed(words)), list(reversed(counts)),
color="#4fc3f7")
self.ax.set_title("単語頻度 Top 20", color="#c9d1d9", fontsize=10)
self.ax.tick_params(colors="#8b949e", labelsize=7)
for spine in self.ax.spines.values():
spine.set_edgecolor("#30363d")
self.ax.grid(True, axis="x", color="#21262d", linewidth=0.5)
self.fig.tight_layout()
self.mpl_canvas.draw()
# 文字解析
self.char_text.configure(state=tk.NORMAL)
self.char_text.delete("1.0", tk.END)
self.char_text.insert("1.0", char_str)
self.char_text.configure(state=tk.DISABLED)
self.status_var.set(
f"分析完了: {len(self._text):,} 文字 Top語: "
f"{top50[0][0] if top50 else '-'} ({top50[0][1] if top50 else 0} 回)")
if __name__ == "__main__":
root = tk.Tk()
app = App080(root)
root.mainloop()
例外処理とエラーハンドリング
try-exceptでValueErrorとExceptionを捕捉し、messagebox.showerror()でエラーメッセージを表示します。予期しないエラーも処理することで、アプリの堅牢性が向上します。
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import re
import math
import threading
import collections
try:
import matplotlib
matplotlib.use("TkAgg")
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
MATPLOTLIB_AVAILABLE = True
except ImportError:
MATPLOTLIB_AVAILABLE = False
class App080:
"""テキスト分析ツール"""
# 日本語・英語ストップワード(簡易版)
STOP_WORDS = {
"の", "に", "は", "を", "た", "が", "で", "て", "と", "し", "れ", "さ",
"ある", "いる", "も", "する", "から", "な", "こと", "として", "い", "や",
"れる", "など", "なっ", "ない", "この", "ため", "その", "あっ", "よう",
"また", "もの", "という", "あり", "まで", "られ", "なる", "へ", "か",
"だ", "これ", "によって", "により", "おり", "より", "による", "ず", "なり",
"a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
"have", "has", "had", "do", "does", "did", "will", "would", "could",
"should", "may", "might", "shall", "can", "to", "of", "in", "for",
"on", "with", "at", "by", "from", "as", "or", "and", "but", "not",
"this", "that", "it", "its", "i", "you", "he", "she", "we", "they",
}
def __init__(self, root):
self.root = root
self.root.title("テキスト分析ツール")
self.root.geometry("1050x660")
self.root.configure(bg="#1e1e1e")
self._text = ""
self._build_ui()
def _build_ui(self):
header = tk.Frame(self.root, bg="#252526", pady=6)
header.pack(fill=tk.X)
tk.Label(header, text="📝 テキスト分析ツール",
font=("Noto Sans JP", 12, "bold"),
bg="#252526", fg="#4fc3f7").pack(side=tk.LEFT, padx=12)
# ツールバー
tb = tk.Frame(self.root, bg="#2d2d2d", pady=4)
tb.pack(fill=tk.X)
ttk.Button(tb, text="📂 ファイルを開く",
command=self._open_file).pack(side=tk.LEFT, padx=4)
tk.Button(tb, text="▶ 分析", command=self._analyze,
bg="#1565c0", fg="white", relief=tk.FLAT,
font=("Arial", 10, "bold"), padx=12, pady=4,
activebackground="#0d47a1", bd=0).pack(side=tk.LEFT, padx=4)
ttk.Button(tb, text="🗑 クリア",
command=self._clear).pack(side=tk.LEFT, padx=4)
self.stop_var = tk.BooleanVar(value=True)
tk.Checkbutton(tb, text="ストップワード除外",
variable=self.stop_var,
bg="#2d2d2d", fg="#ccc", selectcolor="#3c3c3c",
activebackground="#2d2d2d").pack(side=tk.LEFT, padx=8)
# メインエリア (PanedWindow)
paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
paned.pack(fill=tk.BOTH, expand=True, padx=4, pady=4)
# 左: テキスト入力
left = tk.Frame(paned, bg="#1e1e1e")
paned.add(left, weight=1)
tk.Label(left, text="テキスト入力", bg="#1e1e1e", fg="#888",
font=("Arial", 9)).pack(anchor="w")
self.text_input = tk.Text(left, bg="#0d1117", fg="#c9d1d9",
font=("Arial", 10), relief=tk.FLAT,
insertbackground="white",
wrap=tk.WORD)
txsb = ttk.Scrollbar(left, command=self.text_input.yview)
self.text_input.configure(yscrollcommand=txsb.set)
txsb.pack(side=tk.RIGHT, fill=tk.Y)
self.text_input.pack(fill=tk.BOTH, expand=True)
self.text_input.bind("<<Modified>>", self._on_text_change)
# 右: 分析結果
right = tk.Frame(paned, bg="#1e1e1e")
paned.add(right, weight=1)
# 統計ノートブック
nb = ttk.Notebook(right)
nb.pack(fill=tk.BOTH, expand=True)
# タブ1: 基本統計
tab_stat = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_stat, text="基本統計")
self.stat_text = tk.Text(tab_stat, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.stat_text.pack(fill=tk.BOTH, expand=True)
# タブ2: 単語頻度
tab_freq = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_freq, text="単語頻度")
cols = ("rank", "word", "count", "pct")
self.freq_tree = ttk.Treeview(tab_freq, columns=cols,
show="headings", height=18)
self.freq_tree.heading("rank", text="#")
self.freq_tree.heading("word", text="単語")
self.freq_tree.heading("count", text="出現数")
self.freq_tree.heading("pct", text="割合")
self.freq_tree.column("rank", width=40, anchor="center")
self.freq_tree.column("word", width=140, anchor="w")
self.freq_tree.column("count", width=70, anchor="center")
self.freq_tree.column("pct", width=70, anchor="center")
fsb = ttk.Scrollbar(tab_freq, command=self.freq_tree.yview)
self.freq_tree.configure(yscrollcommand=fsb.set)
fsb.pack(side=tk.RIGHT, fill=tk.Y)
self.freq_tree.pack(fill=tk.BOTH, expand=True)
# タブ3: グラフ
tab_chart = tk.Frame(nb, bg="#0d1117")
nb.add(tab_chart, text="グラフ")
if MATPLOTLIB_AVAILABLE:
self.fig = Figure(figsize=(5, 4), facecolor="#0d1117")
self.ax = self.fig.add_subplot(111)
self.mpl_canvas = FigureCanvasTkAgg(self.fig, master=tab_chart)
self.mpl_canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)
else:
tk.Label(tab_chart,
text="pip install matplotlib でグラフが表示されます",
bg="#0d1117", fg="#555").pack(expand=True)
# タブ4: 文字解析
tab_char = tk.Frame(nb, bg="#1e1e1e")
nb.add(tab_char, text="文字解析")
self.char_text = tk.Text(tab_char, bg="#0d1117", fg="#c9d1d9",
font=("Courier New", 9), relief=tk.FLAT,
state=tk.DISABLED)
self.char_text.pack(fill=tk.BOTH, expand=True)
self.status_var = tk.StringVar(value="テキストを入力または読み込んでください")
tk.Label(self.root, textvariable=self.status_var,
bg="#252526", fg="#858585", font=("Arial", 9),
anchor="w", padx=8).pack(fill=tk.X, side=tk.BOTTOM)
# ── ファイル ─────────────────────────────────────────────
def _open_file(self):
path = filedialog.askopenfilename(
filetypes=[("テキスト", "*.txt *.md *.csv"), ("すべて", "*.*")])
if not path:
return
for enc in ("utf-8-sig", "utf-8", "shift-jis", "cp932", "euc-jp"):
try:
with open(path, encoding=enc) as f:
text = f.read()
break
except UnicodeDecodeError:
continue
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", text)
self.status_var.set(f"読み込み: {path}")
def _on_text_change(self, _=None):
self.text_input.edit_modified(False)
def _clear(self):
self.text_input.delete("1.0", tk.END)
for widget in (self.stat_text, self.char_text):
widget.configure(state=tk.NORMAL)
widget.delete("1.0", tk.END)
widget.configure(state=tk.DISABLED)
self.freq_tree.delete(*self.freq_tree.get_children())
# ── 分析 ─────────────────────────────────────────────────
def _analyze(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showwarning("警告", "テキストを入力してください")
return
self._text = text
self.status_var.set("分析中...")
threading.Thread(target=self._do_analyze, daemon=True).start()
def _do_analyze(self):
text = self._text
use_stop = self.stop_var.get()
# --- 基本統計 ---
chars = len(text)
chars_ns = len(text.replace(" ", "").replace("\n", ""))
lines = text.count("\n") + 1
words_raw = re.findall(r"\w+", text, re.UNICODE)
words = len(words_raw)
sents = len(re.findall(r"[。.!?!?]", text)) or 1
paras = len([p for p in text.split("\n\n") if p.strip()])
avg_wl = sum(len(w) for w in words_raw) / max(1, words)
avg_sw = words / sents
# ユニーク語数
unique = len(set(w.lower() for w in words_raw))
# 日本語文字率
jp_chars = len(re.findall(r"[\u3040-\u9fff]", text))
jp_rate = jp_chars / max(1, chars) * 100
stat_str = (
f"━━ 基本統計 ━━━━━━━━━━━━━━━━━━\n"
f" 文字数 (全体): {chars:>8,}\n"
f" 文字数 (空白除): {chars_ns:>8,}\n"
f" 行数: {lines:>8,}\n"
f" 段落数: {paras:>8,}\n"
f" 単語数: {words:>8,}\n"
f" ユニーク語数: {unique:>8,}\n"
f" 文数 (句点等): {sents:>8,}\n"
f" 平均語長: {avg_wl:>8.2f} 文字\n"
f" 平均文長: {avg_sw:>8.1f} 語/文\n"
f" 日本語文字率: {jp_rate:>7.1f}%\n"
f"\n━━ 可読性 ━━━━━━━━━━━━━━━━━━━\n"
)
# Flesch-Kincaid (英語向け簡易計算)
syllables = sum(max(1, len(re.findall(r"[aeiouAEIOU]", w)))
for w in words_raw)
fk = max(0, 206.835 - 1.015 * (words / sents)
- 84.6 * (syllables / max(1, words)))
stat_str += f" Flesch Reading Ease: {fk:.1f}\n"
# --- 単語頻度 ---
tokens = [w.lower() for w in words_raw]
if use_stop:
tokens = [t for t in tokens if t not in self.STOP_WORDS and len(t) > 1]
counter = collections.Counter(tokens)
top50 = counter.most_common(50)
total_t = sum(counter.values())
# --- 文字解析 ---
hiragana = len(re.findall(r"[\u3041-\u3096]", text))
katakana = len(re.findall(r"[\u30a1-\u30f6]", text))
kanji = len(re.findall(r"[\u4e00-\u9fff]", text))
ascii_a = len(re.findall(r"[a-zA-Z]", text))
digits = len(re.findall(r"\d", text))
punct = len(re.findall(r"[、。,.「」『』【】・…—\-!?!?]", text))
char_str = (
f"━━ 文字種別 ━━━━━━━━━━━━━━━━━\n"
f" ひらがな: {hiragana:>7,}\n"
f" カタカナ: {katakana:>7,}\n"
f" 漢字: {kanji:>7,}\n"
f" ASCII英字:{ascii_a:>7,}\n"
f" 数字: {digits:>7,}\n"
f" 句読点等: {punct:>7,}\n"
)
self.root.after(0, self._update_ui, stat_str, top50, total_t, char_str)
def _update_ui(self, stat_str, top50, total_t, char_str):
# 基本統計
self.stat_text.configure(state=tk.NORMAL)
self.stat_text.delete("1.0", tk.END)
self.stat_text.insert("1.0", stat_str)
self.stat_text.configure(state=tk.DISABLED)
# 単語頻度
self.freq_tree.delete(*self.freq_tree.get_children())
for i, (word, cnt) in enumerate(top50, 1):
pct = cnt / max(1, total_t) * 100
self.freq_tree.insert("", tk.END,
values=(i, word, cnt, f"{pct:.2f}%"))
# グラフ
if MATPLOTLIB_AVAILABLE and top50:
words = [w for w, _ in top50[:20]]
counts = [c for _, c in top50[:20]]
self.ax.clear()
self.ax.set_facecolor("#0d1117")
bars = self.ax.barh(list(reversed(words)), list(reversed(counts)),
color="#4fc3f7")
self.ax.set_title("単語頻度 Top 20", color="#c9d1d9", fontsize=10)
self.ax.tick_params(colors="#8b949e", labelsize=7)
for spine in self.ax.spines.values():
spine.set_edgecolor("#30363d")
self.ax.grid(True, axis="x", color="#21262d", linewidth=0.5)
self.fig.tight_layout()
self.mpl_canvas.draw()
# 文字解析
self.char_text.configure(state=tk.NORMAL)
self.char_text.delete("1.0", tk.END)
self.char_text.insert("1.0", char_str)
self.char_text.configure(state=tk.DISABLED)
self.status_var.set(
f"分析完了: {len(self._text):,} 文字 Top語: "
f"{top50[0][0] if top50 else '-'} ({top50[0][1] if top50 else 0} 回)")
if __name__ == "__main__":
root = tk.Tk()
app = App080(root)
root.mainloop()
6. ステップバイステップガイド
このアプリをゼロから自分で作る手順を解説します。コードをコピーするだけでなく、実際に手順を追って自分で書いてみましょう。
-
1ファイルを作成する
新しいファイルを作成して app080.py と保存します。
-
2クラスの骨格を作る
App080クラスを定義し、__init__とmainloop()の最小構成を作ります。
-
3タイトルバーを作る
Frameを使ってカラーバー付きのタイトルエリアを作ります。
-
4入力フォームを実装する
LabelFrameとEntryウィジェットで入力エリアを作ります。
-
5処理ロジックを実装する
_execute()メソッドにメインロジックを実装します。
-
6結果表示を実装する
TextウィジェットかLabelに結果を表示する_show_result()を実装します。
-
7エラー処理を追加する
try-exceptとmessageboxでエラーハンドリングを追加します。
7. カスタマイズアイデア
基本機能を習得したら、以下のカスタマイズに挑戦してみましょう。
💡 ダークモードを追加する
bg色・fg色を辞書で管理し、ボタン1つでダークモード・ライトモードを切り替えられるようにしましょう。
💡 データの保存機能
処理結果をCSV・TXTファイルに保存する機能を追加しましょう。filedialog.asksaveasfilename()でファイル保存ダイアログが使えます。
💡 設定ダイアログ
フォントサイズや色などの設定をユーザーが変更できるオプションダイアログを追加しましょう。
8. よくある問題と解決法
❌ 日本語フォントが表示されない
原因:システムに日本語フォントが見つからない場合があります。
解決法:font引数を省略するかシステムに合ったフォントを指定してください。
❌ ライブラリのインポートエラー
原因:必要なライブラリがインストールされていません。
解決法:pip install コマンドで必要なライブラリをインストールしてください。 (pip install wordcloud matplotlib)
❌ ウィンドウサイズが合わない
原因:画面解像度や表示スケールによって異なる場合があります。
解決法:root.geometry()で適切なサイズに調整してください。
9. 練習問題
アプリの理解を深めるための練習問題です。
-
課題1:機能拡張
テキスト分析ツールに新しい機能を1つ追加してみましょう。
-
課題2:UIの改善
色・フォント・レイアウトを変更して、より使いやすいUIにカスタマイズしましょう。
-
課題3:保存機能の追加
処理結果をファイルに保存する機能を追加しましょう。