diff options
| author | Max Resnick <max@ofmax.li> | 2025-02-28 22:59:23 -0800 |
|---|---|---|
| committer | Max Resnick <max@ofmax.li> | 2025-02-28 22:59:23 -0800 |
| commit | 5680113281aa58b63b1bdd7445a17e281007df23 (patch) | |
| tree | a9e6438086c2fe8f61d44f5d46b358ca9cddabe4 /main.go | |
| download | unbound-adblock-config-5680113281aa58b63b1bdd7445a17e281007df23.tar.gz | |
feat: ad domain parser
Diffstat (limited to 'main.go')
| -rw-r--r-- | main.go | 114 |
1 files changed, 114 insertions, 0 deletions
@@ -0,0 +1,114 @@ +package main + +import ( + "bufio" + "fmt" + "log/slog" + "net/http" + "os" + "strings" +) + +func main() { + if len(os.Args) != 3 { + slog.Error("usage: program <url-list> <output-file>") + os.Exit(1) + } + + urls, err := fetchURLList(os.Args[1]) + if err != nil { + slog.Error("failed to fetch URL list", "error", err) + os.Exit(1) + } + + f, err := os.Create(os.Args[2]) + if err != nil { + slog.Error("failed to create output file", "error", err) + os.Exit(1) + } + defer f.Close() + + w := bufio.NewWriter(f) + defer w.Flush() + + domains := make(map[string]struct{}) + for i, url := range urls { + slog.Info("fetching domains", "url", url, "progress", fmt.Sprintf("%d/%d", i+1, len(urls))) + if err := fetchDomainsAndWrite(url, w, domains); err != nil { + slog.Warn("failed to process url", "url", url, "error", err) + continue + } + } + slog.Info("completed", "total_domains", len(domains)) +} + +func fetchURLList(url string) ([]string, error) { + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("http get failed: %w", err) + } + defer resp.Body.Close() + + var urls []string + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + urls = append(urls, line) + } + return urls, scanner.Err() +} + +func fetchDomainsAndWrite(url string, w *bufio.Writer, seen map[string]struct{}) error { + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("http get failed: %w", err) + } + defer resp.Body.Close() + + var count int + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + var domain string + // Handle "0.0.0.0 domain.com" format + if strings.Contains(line, " ") { + parts := strings.Fields(line) + if len(parts) >= 2 { + domain = parts[1] + } + } else { + domain = line + } + + // Basic domain validation and normalization + domain = strings.ToLower(strings.TrimSpace(domain)) + if domain == "" || !strings.Contains(domain, ".") || strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") { + continue + } + + // Skip if we've seen this domain before + if _, exists := seen[domain]; exists { + continue + } + seen[domain] = struct{}{} + count++ + + if _, err := fmt.Fprintf(w, "local-zone: %q refuse\n", domain); err != nil { + return fmt.Errorf("failed to write domain: %w", err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("scanner error: %w", err) + } + + slog.Info("processed url", "url", url, "new_domains", count) + return nil +} |