wip - surf-adblock - Surf adblock web extension
 (HTM) git clone git://git.codemadness.org/surf-adblock
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit b8f2056f0393290600d79f69c0ceb0ab3eb29072
 (DIR) parent 13facacc65a3b7895f171618c0e031a437ec53b3
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Mon,  5 Jun 2017 12:11:28 +0200
       
       wip
       
       Diffstat:
         M TODO                                |       9 +++++++++
         M adblock.c                           |      92 +++++++++++--------------------
         M adblock.h                           |       2 +-
         M surf-adblock.c                      |      14 ++++++--------
         M tests/tests.c                       |      21 +++++++++++++++++++++
       
       5 files changed, 69 insertions(+), 69 deletions(-)
       ---
 (DIR) diff --git a/TODO b/TODO
       @@ -1,4 +1,13 @@
       +- optimization: for simple patterns use: strstr, strcasestr, strcmp, strcasemp
       +
       +- combine rules in groups: display: none: display: initial, saves memory:
       +  rule1,rule2 { display: none; } vs rule 1{...}rule2{...}
       +
       +- checkrequest -> allowrequest.
       +
        - add test-case for global exception rules (no domains), see globalcss init code.
       +  also support inverse element hide rule: "~example.com##div.textad".
       +- support domain name anchor separately: "||".
        - test blocking websocket connections in surf.
        - skip protocol part when matching for now, (later add support for websockets).
        - for f->matchbegin, matchend etc: make sure to match domain properly, check part of
 (DIR) diff --git a/adblock.c b/adblock.c
       @@ -27,17 +27,31 @@ struct filterdomain {
                struct filterdomain *next;
        };
        
       -struct filterrule {
       +struct elementhiderule {
       +        /* is exception rule: #@# */
       +        int isexception;
       +        char *css; /* if non-NULL is CSS rule / hide element rule */
       +        struct filterdomain *domains;
       +
       +        struct elementhiderule *next;
       +};
       +
       +struct blockrule {
                /* type: match mask, must be atleast 32-bit, see FilterType enum */
       -        unsigned long block;
       +        unsigned long mask;
       +        /* is exception rule: prefix @@ */
       +        int isexception;
                int matchbegin;
                int matchend;
       -        /* is exception rule: prefix @@ for ABP or #@# for CSS */
       -        int isexception;
       -        char *css; /* if non-NULL is CSS rule / hide element rule */
                char *uri;
                struct filterdomain *domains;
       -        struct filterrule *next;
       +
       +        struct blockrule *next;
       +};
       +
       +struct rules {
       +        struct blockrule *blockrules;
       +        struct elementhiderule *elementhiderules;
        };
        
        enum {
       @@ -147,27 +161,6 @@ westrdup(const char *s)
                return p;
        }
        
       -void
       -cleanup(void)
       -{
       -        struct filterrule *r;
       -        struct filterdomain *d;
       -
       -        free(globalcss.data);
       -
       -        for (r = rules; r; r = rules) {
       -                for (d = r->domains; d; d = r->domains) {
       -                        free(d->domain);
       -                        r->domains = d->next;
       -                        free(d);
       -                }
       -                free(r->css);
       -                free(r->uri);
       -                rules = r->next;
       -                free(r);
       -        }
       -}
       -
        static size_t
        string_buffer_realloc(String *s, size_t newsz)
        {
       @@ -541,36 +534,6 @@ matchrule(struct filterrule *f, const char *uri, const char *type,
                        return 0;
        #endif
        
       -#if 0
       -        /* DEBUG: test, match if it is a simple pattern */
       -        char *p;
       -        p = strchr(f->uri, '*');
       -        if (!p)
       -                p = strchr(f->uri, '^');
       -        if (!p) {
       -                /* TODO: write a test-case */
       -                if (f->block & FilterTypeMatchCase) {
       -                        if (f->matchbegin)
       -                                m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
       -                        else if (f->matchend)
       -                                m = strlen(f->uri) <= strlen(uri) &&
       -                                        strcmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
       -                        else
       -                                m = strstr(uri, f->uri) ? 1 : 0;
       -                } else {
       -                        if (f->matchbegin)
       -                                m = strncasecmp(uri, f->uri, strlen(f->uri)) == 0;
       -                        else if (f->matchend)
       -                                m = strlen(f->uri) <= strlen(uri) &&
       -                                        strcasecmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
       -                        else
       -                                m = strcasestr(uri, f->uri) ? 1 : 0;
       -                }
       -                /*m = r ? !m : m;*/
       -                return m;
       -        }
       -#endif
       -
                r = snprintf(pat, sizeof(pat), "%s%s%s",
                        f->matchbegin ? "" : "*",
                        f->uri,
       @@ -580,6 +543,11 @@ matchrule(struct filterrule *f, const char *uri, const char *type,
                        return 0;
                }
        
       +        /* DEBUG */
       +        if (f->matchbegin) {
       +                printf("pat: %s,    uri: %s,    domain: %s\n", pat, uri, domain);
       +        }
       +
                m = 0;
                if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
        #if 0
       @@ -629,7 +597,7 @@ parserule(struct filterrule *f, char *s)
                }
        
                /* element hiding rule, NOTE: no wildcards are supported,
       -        "Simplified element hiding syntax" is not supported. */
       +           "Simplified element hiding syntax" (legacy) is not supported. */
                if ((p = strstr(s, "##"))) {
                        *p = '\0';
                        if (parsedomainselement(f, s) < 0)
       @@ -734,8 +702,8 @@ loadrules(FILE *fp)
                size_t linesiz = 0;
                ssize_t n;
                int ret;
       +        struct rules *rules;
        
       -        /* TODO: handle ferror() */
                /* load rules */
                while ((n = getline(&line, &linesiz, fp)) > 0) {
                        if (line[n - 1] == '\n')
       @@ -755,6 +723,10 @@ loadrules(FILE *fp)
                                return -1;
                        }
                }
       +        if (ferror(fp)) {
       +                weprintf("getline: %s\n", strerror(errno));
       +                return -1;
       +        }
                return (rules != NULL);
        }
        
       @@ -838,7 +810,7 @@ err:
        }
        
        int
       -checkrequest(const char *uri, const char *requri)
       +allowrequest(const char *uri, const char *requri)
        {
                char domain[256];
                struct filterrule *r;
 (DIR) diff --git a/adblock.h b/adblock.h
       @@ -1,4 +1,4 @@
       -int  checkrequest(const char *, const char *);
       +int  allowrequest(const char *, const char *);
        void cleanup(void);
        char *getdocumentcss(const char *);
        char *getglobalcss(void);
 (DIR) diff --git a/surf-adblock.c b/surf-adblock.c
       @@ -25,11 +25,11 @@ newpage(WebKitWebPage *page)
                        fprintf(stderr, "surf-adblock: calloc: %s\n", strerror(errno));
                        return NULL;
                }
       -        p->next = pages;
       -        pages = p;
       -
                p->id = webkit_web_page_get_id(page);
                p->webpage = page;
       +        p->next = pages;
       +
       +        pages = p;
        
                return p;
        }
       @@ -60,7 +60,7 @@ documentloaded(WebKitWebPage *wp, Page *p)
                }
        
                free(css);
       -        /* NOTE: globalcss free'd at cleanup() */
       +        /* NOTE: globalcss should not be free'd */
        }
        
        static gboolean
       @@ -68,15 +68,13 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
                           WebKitURIResponse *res, Page *p)
        {
                const char *uri, *requri;
       -        gboolean status = FALSE;
        
                if (!webkit_uri_request_get_http_method(req))
       -                return status;
       +                return TRUE; /* TRUE = don't handle any more events */
                uri = webkit_web_page_get_uri(p->webpage);
                requri = webkit_uri_request_get_uri(req);
       -        status = checkrequest(uri, requri) ? FALSE : TRUE;
        
       -        return status;
       +        return allowrequest(uri, requri) ? FALSE : TRUE;
        }
        
        static void
 (DIR) diff --git a/tests/tests.c b/tests/tests.c
       @@ -1,5 +1,26 @@
        #include "../adblock.c"
        
       +void
       +cleanup(void)
       +{
       +        struct filterrule *r;
       +        struct filterdomain *d;
       +
       +        free(globalcss.data);
       +
       +        for (r = rules; r; r = rules) {
       +                for (d = r->domains; d; d = r->domains) {
       +                        free(d->domain);
       +                        r->domains = d->next;
       +                        free(d);
       +                }
       +                free(r->css);
       +                free(r->uri);
       +                rules = r->next;
       +                free(r);
       +        }
       +}
       +
        int
        main(void)
        {