aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Resnick <max@ofmax.li>2025-02-28 22:59:23 -0800
committerMax Resnick <max@ofmax.li>2025-02-28 22:59:23 -0800
commit5680113281aa58b63b1bdd7445a17e281007df23 (patch)
treea9e6438086c2fe8f61d44f5d46b358ca9cddabe4
downloadunbound-adblock-config-5680113281aa58b63b1bdd7445a17e281007df23.tar.gz
feat: ad domain parser
Diffstat (limited to '')
-rw-r--r--README.md66
-rw-r--r--main.go114
-rw-r--r--main_test.go161
3 files changed, 341 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b06b0c2
--- /dev/null
+++ b/README.md
@@ -0,0 +1,66 @@
+# Unbound Ads Generator
+
+A tool to generate Unbound DNS server configuration for blocking advertising domains.
+
+## Overview
+
+This program fetches lists of advertising domains from multiple sources and generates a configuration file for the Unbound DNS server to block these domains. It supports multiple input formats and automatically deduplicates domains.
+
+## Usage
+
+```bash
+unbound-ads-generator <url-list> <output-file>
+```
+
+Where:
+- `url-list` is a URL pointing to a text file containing a list of URLs (one per line) that provide domain lists
+- `output-file` is the path where the Unbound configuration will be written
+
+### Input Format
+
+The URL list file should contain URLs (one per line) pointing to domain lists. Lines starting with # are treated as comments.
+
+Example URL list file:
+```
+# Ad blocking lists
+https://raw.githubusercontent.com/PolishFiltersTeam/KADhosts/master/KADhosts.txt
+https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Spam/hosts
+```
+
+The domain lists can be in either of these formats:
+```
+# IP and domain format
+0.0.0.0 advertising.example.com
+
+# Plain domain format
+advertising.example.com
+```
+
+### Output Format
+
+The program generates Unbound configuration in this format:
+```
+local-zone: "advertising.example.com" refuse
+local-zone: "another-ad.example.com" refuse
+```
+
+## Features
+
+- Supports multiple domain list formats
+- Automatically deduplicates domains
+- Case-insensitive domain matching
+- Progress logging
+- Comment handling
+- Basic domain validation
+
+## Building
+
+```bash
+go build
+```
+
+## Testing
+
+```bash
+go test -v
+```
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..5bb387d
--- /dev/null
+++ b/main.go
@@ -0,0 +1,114 @@
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "log/slog"
+ "net/http"
+ "os"
+ "strings"
+)
+
+func main() {
+ if len(os.Args) != 3 {
+ slog.Error("usage: program <url-list> <output-file>")
+ os.Exit(1)
+ }
+
+ urls, err := fetchURLList(os.Args[1])
+ if err != nil {
+ slog.Error("failed to fetch URL list", "error", err)
+ os.Exit(1)
+ }
+
+ f, err := os.Create(os.Args[2])
+ if err != nil {
+ slog.Error("failed to create output file", "error", err)
+ os.Exit(1)
+ }
+ defer f.Close()
+
+ w := bufio.NewWriter(f)
+ defer w.Flush()
+
+ domains := make(map[string]struct{})
+ for i, url := range urls {
+ slog.Info("fetching domains", "url", url, "progress", fmt.Sprintf("%d/%d", i+1, len(urls)))
+ if err := fetchDomainsAndWrite(url, w, domains); err != nil {
+ slog.Warn("failed to process url", "url", url, "error", err)
+ continue
+ }
+ }
+ slog.Info("completed", "total_domains", len(domains))
+}
+
+func fetchURLList(url string) ([]string, error) {
+ resp, err := http.Get(url)
+ if err != nil {
+ return nil, fmt.Errorf("http get failed: %w", err)
+ }
+ defer resp.Body.Close()
+
+ var urls []string
+ scanner := bufio.NewScanner(resp.Body)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line == "" || strings.HasPrefix(line, "#") {
+ continue
+ }
+ urls = append(urls, line)
+ }
+ return urls, scanner.Err()
+}
+
+func fetchDomainsAndWrite(url string, w *bufio.Writer, seen map[string]struct{}) error {
+ resp, err := http.Get(url)
+ if err != nil {
+ return fmt.Errorf("http get failed: %w", err)
+ }
+ defer resp.Body.Close()
+
+ var count int
+ scanner := bufio.NewScanner(resp.Body)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line == "" || strings.HasPrefix(line, "#") {
+ continue
+ }
+
+ var domain string
+ // Handle "0.0.0.0 domain.com" format
+ if strings.Contains(line, " ") {
+ parts := strings.Fields(line)
+ if len(parts) >= 2 {
+ domain = parts[1]
+ }
+ } else {
+ domain = line
+ }
+
+ // Basic domain validation and normalization
+ domain = strings.ToLower(strings.TrimSpace(domain))
+ if domain == "" || !strings.Contains(domain, ".") || strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") {
+ continue
+ }
+
+ // Skip if we've seen this domain before
+ if _, exists := seen[domain]; exists {
+ continue
+ }
+ seen[domain] = struct{}{}
+ count++
+
+ if _, err := fmt.Fprintf(w, "local-zone: %q refuse\n", domain); err != nil {
+ return fmt.Errorf("failed to write domain: %w", err)
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanner error: %w", err)
+ }
+
+ slog.Info("processed url", "url", url, "new_domains", count)
+ return nil
+}
diff --git a/main_test.go b/main_test.go
new file mode 100644
index 0000000..29a8341
--- /dev/null
+++ b/main_test.go
@@ -0,0 +1,161 @@
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+)
+
+func TestFetchURLList(t *testing.T) {
+ tests := []struct {
+ name string
+ content string
+ want []string
+ wantErr bool
+ }{
+ {
+ name: "basic list with comments",
+ content: `# comment
+https://example.com/1
+https://example.com/2
+# another comment
+https://example.com/3`,
+ want: []string{
+ "https://example.com/1",
+ "https://example.com/2",
+ "https://example.com/3",
+ },
+ },
+ {
+ name: "empty lines and whitespace",
+ content: ` https://example.com/1
+
+https://example.com/2`,
+ want: []string{
+ "https://example.com/1",
+ "https://example.com/2",
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Write([]byte(tt.content))
+ }))
+ defer ts.Close()
+
+ got, err := fetchURLList(ts.URL)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("fetchURLList() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if !stringSliceEqual(got, tt.want) {
+ t.Errorf("fetchURLList() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestFetchDomainsAndWrite(t *testing.T) {
+ tests := []struct {
+ name string
+ content string
+ want string
+ wantSeen map[string]struct{}
+ }{
+ {
+ name: "ip and domain format",
+ content: `# comment
+0.0.0.0 domain1.com
+0.0.0.0 domain2.com`,
+ want: `local-zone: "domain1.com" refuse
+local-zone: "domain2.com" refuse
+`,
+ wantSeen: map[string]struct{}{
+ "domain1.com": {},
+ "domain2.com": {},
+ },
+ },
+ {
+ name: "plain domain format",
+ content: `# comment
+domain1.com
+domain2.com`,
+ want: `local-zone: "domain1.com" refuse
+local-zone: "domain2.com" refuse
+`,
+ wantSeen: map[string]struct{}{
+ "domain1.com": {},
+ "domain2.com": {},
+ },
+ },
+ {
+ name: "mixed format with duplicates",
+ content: `domain1.com
+0.0.0.0 domain1.com
+0.0.0.0 DOMAIN1.COM
+domain2.com`,
+ want: `local-zone: "domain1.com" refuse
+local-zone: "domain2.com" refuse
+`,
+ wantSeen: map[string]struct{}{
+ "domain1.com": {},
+ "domain2.com": {},
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Write([]byte(tt.content))
+ }))
+ defer ts.Close()
+
+ var buf bytes.Buffer
+ w := bufio.NewWriter(&buf)
+ seen := make(map[string]struct{})
+
+ err := fetchDomainsAndWrite(ts.URL, w, seen)
+ if err != nil {
+ t.Fatalf("fetchDomainsAndWrite() error = %v", err)
+ }
+ w.Flush()
+
+ if got := buf.String(); got != tt.want {
+ t.Errorf("fetchDomainsAndWrite() output = %q, want %q", got, tt.want)
+ }
+
+ if !mapEqual(seen, tt.wantSeen) {
+ t.Errorf("fetchDomainsAndWrite() seen = %v, want %v", seen, tt.wantSeen)
+ }
+ })
+ }
+}
+
+func stringSliceEqual(a, b []string) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if a[i] != b[i] {
+ return false
+ }
+ }
+ return true
+}
+
+func mapEqual(a, b map[string]struct{}) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for k := range a {
+ if _, ok := b[k]; !ok {
+ return false
+ }
+ }
+ return true
+}