gscrape/include/scraper.h
2025-12-04 18:37:33 -07:00

34 lines
1.2 KiB
C

#ifndef SCRAPER_H
#define SCRAPER_H
#include <stdlib.h>
int fetch_url(const char *url, char **out_buf, size_t *out_len);
char *extract_title(const char *html, size_t len);
/* Extract all <meta name=... content=...> and <meta property=... content=...>
* Returns 0 on success and allocates *out with a newline-separated list
* of "key: value" lines. Caller must free(*out).
*/
int extract_meta(const char *html, size_t len, char **out);
/* Extract Open Graph tags (meta property="og:...") similarly. */
int extract_og(const char *html, size_t len, char **out);
/* Extract the first <script type="application/ld+json"> that looks like a
* Product schema. Returns 0 and allocates *out_json (caller frees) or
* returns -1 if not found.
*/
int extract_jsonld_product(const char *html, size_t len, char **out_json);
/* Extract the text content of the first <h1> element, or NULL if none. */
char *extract_h1(const char *html, size_t len);
/* Extract product listings (one per line) as "name | price | url".
* Returns 0 and allocates *out on success (caller frees), or -1.
* Each line represents one product; fields are separated by '|'.
*/
int extract_products(const char *html, size_t len, const char *base_url, char **out);
#endif /* SCRAPER_H */