dotenv>=0.9.9
pydantic
pygithub>=2.6.1
from gradio_ui import define_gradio_interface
def main():
interface = define_gradio_interface()
interface.launch(pwa=True)
if __name__ == "__main__":
main()
import gradio as gr
from output_handler import output_json
from utils import get_github_client
from repo_processor import process_repo, RepoResult
import functools
from argparse import Namespace
from concurrent.futures import ThreadPoolExecutor
import json
def start_process_handler(api_token: str, search_user: str, no_forks: bool):
if api_token.strip() == "":
return "トークンを入力してください。"
if search_user.strip() == "":
return "スキャンするGitHubユーザー名を入力してください。"
main_client = get_github_client(api_token)
user = main_client.get_user(search_user)
repos = user.get_repos()
args = Namespace(
username=search_user,
no_forks=no_forks, # フォークを除外する場合はTrueに設定
)
# ThreadPoolExecutorを使わずに順次処理
results: list[RepoResult] = []
for repo in repos:
result = process_repo(repo, args=args, token=api_token)
results.append(result)
will_json = output_json(results)
json_data = json.loads(will_json)
return json_data["emails"]
def define_gradio_interface():
with gr.Blocks(theme=gr.themes.Base()) as gui:
gr.Markdown(
"""
# GitHub Email Screener
情報は収集しません。安心してご利用ください。
"""
)
with gr.Row():
api_token = gr.Textbox(
label="GitHub API Token",
placeholder="GitHubのトークンを入力",
type="password",
)
with gr.Row():
search_user = gr.Textbox(
label="GitHub Username",
placeholder="スキャンするGitHubユーザー名を入力",
)
with gr.Row():
no_forks = gr.Checkbox(
label="フォークを除外",
value=True,
info="フォークされたリポジトリを除外する場合はチェックしてください。",
)
with gr.Row():
start_process = gr.Button("スキャン開始")
with gr.Row():
response = gr.Textbox(
label="Response",
placeholder="スキャン結果",
lines=10,
interactive=False,
)
start_process.click(
start_process_handler,
inputs=[api_token, search_user, no_forks],
outputs=response,
)
return gui
import json
from dataclasses import asdict
from pydantic import BaseModel
from repo_processor import RepoResult
class OutputJson(BaseModel):
repositories: dict[str, RepoResult]
emails: list[str]
def output_json(results: list[RepoResult], raw=True):
output = OutputJson(
repositories={},
emails=[],
)
emails = set()
for result in results:
if not result.error and result.fork_of is not None and result.emails is None:
continue
output.repositories[result.name] = result
if not result.fork_of and result.emails:
emails.update(result.emails)
output.emails = sorted(emails)
return output.model_dump_json(indent=2 if raw == False else None, exclude_none=True, exclude=set("name"))
from argparse import Namespace
from dataclasses import dataclass
from github import GithubException
from github.Repository import Repository
from pydantic import BaseModel
from utils import get_github_client
class RepoResult(BaseModel):
name: str
fork_of: str | None
emails: frozenset[str] | None
error: str | None
def process_repo(repo: Repository, args: Namespace, token: str) -> RepoResult:
client = get_github_client(token)
name = repo.full_name
fork_of = repo.parent.full_name if repo.fork and repo.parent else None
if args.no_forks and fork_of:
return RepoResult(name=name, fork_of=fork_of, emails=None, error=None)
emails: set[str] = set()
error = None
try:
commits = client.get_repo(name).get_commits()
for commit in commits:
if commit.commit.author.email:
emails.add(commit.commit.author.email)
except GithubException as e:
error = str(e)
return RepoResult(name=name, fork_of=fork_of, emails=frozenset(emails), error=error)
import os
import sys
import threading
from github import Github
thread_local = threading.local()
def sort_emails_by_domain(emails: set[str]) -> list[str]:
"""
Emailのリストをドメイン名でソートして返す
"""
return sorted(list(emails), key=lambda email: '@'.join(reversed(email.split('@'))))
def get_github_client(token: str) -> Github:
"""
各スレッドごとに固有のGitHub APIクライアントを、必要に応じて生成し、返す。
"""
if not hasattr(thread_local, 'client'):
thread_local.client = Github(token)
return thread_local.client