commit 2afe840edee2af591d55be73797dd23132a26027 Author: ganome Date: Thu Dec 4 18:34:35 2025 -0700 Initial Commit diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7349ef0 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +CC=gcc +CFLAGS=-Iinclude -Wall $(shell pkg-config --cflags libxml-2.0 libcurl) +LDFLAGS=$(shell pkg-config --libs libxml-2.0 libcurl) +SRCS=src/main.c src/scraper.c +TARGET=gscrape + +all: $(TARGET) + +$(TARGET): $(SRCS) + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +clean: + rm -f $(TARGET) *.o src/*.o diff --git a/gscrape b/gscrape new file mode 100755 index 0000000..f6ecf0c Binary files /dev/null and b/gscrape differ diff --git a/include/scraper.h b/include/scraper.h new file mode 100644 index 0000000..88d23be --- /dev/null +++ b/include/scraper.h @@ -0,0 +1,33 @@ +#ifndef SCRAPER_H +#define SCRAPER_H + +#include + +int fetch_url(const char *url, char **out_buf, size_t *out_len); +char *extract_title(const char *html, size_t len); + +/* Extract all and + * Returns 0 on success and allocates *out with a newline-separated list + * of "key: value" lines. Caller must free(*out). + */ +int extract_meta(const char *html, size_t len, char **out); + +/* Extract Open Graph tags (meta property="og:...") similarly. */ +int extract_og(const char *html, size_t len, char **out); + +/* Extract the first