From 5680113281aa58b63b1bdd7445a17e281007df23 Mon Sep 17 00:00:00 2001 From: Max Resnick Date: Fri, 28 Feb 2025 22:59:23 -0800 Subject: feat: ad domain parser --- main.go | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 main.go (limited to 'main.go') diff --git a/main.go b/main.go new file mode 100644 index 0000000..5bb387d --- /dev/null +++ b/main.go @@ -0,0 +1,114 @@ +package main + +import ( + "bufio" + "fmt" + "log/slog" + "net/http" + "os" + "strings" +) + +func main() { + if len(os.Args) != 3 { + slog.Error("usage: program ") + os.Exit(1) + } + + urls, err := fetchURLList(os.Args[1]) + if err != nil { + slog.Error("failed to fetch URL list", "error", err) + os.Exit(1) + } + + f, err := os.Create(os.Args[2]) + if err != nil { + slog.Error("failed to create output file", "error", err) + os.Exit(1) + } + defer f.Close() + + w := bufio.NewWriter(f) + defer w.Flush() + + domains := make(map[string]struct{}) + for i, url := range urls { + slog.Info("fetching domains", "url", url, "progress", fmt.Sprintf("%d/%d", i+1, len(urls))) + if err := fetchDomainsAndWrite(url, w, domains); err != nil { + slog.Warn("failed to process url", "url", url, "error", err) + continue + } + } + slog.Info("completed", "total_domains", len(domains)) +} + +func fetchURLList(url string) ([]string, error) { + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("http get failed: %w", err) + } + defer resp.Body.Close() + + var urls []string + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + urls = append(urls, line) + } + return urls, scanner.Err() +} + +func fetchDomainsAndWrite(url string, w *bufio.Writer, seen map[string]struct{}) error { + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("http get failed: %w", err) + } + defer resp.Body.Close() + + var count int + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + + var domain string + // Handle "0.0.0.0 domain.com" format + if strings.Contains(line, " ") { + parts := strings.Fields(line) + if len(parts) >= 2 { + domain = parts[1] + } + } else { + domain = line + } + + // Basic domain validation and normalization + domain = strings.ToLower(strings.TrimSpace(domain)) + if domain == "" || !strings.Contains(domain, ".") || strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") { + continue + } + + // Skip if we've seen this domain before + if _, exists := seen[domain]; exists { + continue + } + seen[domain] = struct{}{} + count++ + + if _, err := fmt.Fprintf(w, "local-zone: %q refuse\n", domain); err != nil { + return fmt.Errorf("failed to write domain: %w", err) + } + } + + if err := scanner.Err(); err != nil { + return fmt.Errorf("scanner error: %w", err) + } + + slog.Info("processed url", "url", url, "new_domains", count) + return nil +} -- cgit v1.2.3