generated from obsproject/obs-plugintemplate
-
-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add HTML parsing and CSS Selectors (#48)
* initial * plain text * add lexbor * fix win32 cmake * fix headers * lint * cleanup
- Loading branch information
Showing
11 changed files
with
1,089 additions
and
373 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
include(ExternalProject) | ||
|
||
if(APPLE) | ||
set(LEXBOR_CMAKE_PLATFORM_OPTIONS -DCMAKE_OSX_ARCHITECTURES=x86_64$<SEMICOLON>arm64) | ||
else() | ||
if(WIN32) | ||
add_compile_definitions(LEXBOR_STATIC=1) | ||
set(LEXBOR_CMAKE_PLATFORM_OPTIONS "-DCMAKE_C_FLAGS=/W3 /utf-8 /MP" "-DCMAKE_CXX_FLAGS=/W3 /utf-8 /MP") | ||
else() | ||
set(LEXBOR_CMAKE_PLATFORM_OPTIONS -DCMAKE_SYSTEM_NAME=Linux) | ||
endif() | ||
endif() | ||
|
||
set(lexbor_lib_filename ${CMAKE_STATIC_LIBRARY_PREFIX}lexbor_static${CMAKE_STATIC_LIBRARY_SUFFIX}) | ||
|
||
ExternalProject_Add( | ||
lexbor_build | ||
GIT_REPOSITORY https://github.com/lexbor/lexbor.git | ||
GIT_TAG v2.3.0 | ||
CMAKE_GENERATOR ${CMAKE_GENERATOR} | ||
INSTALL_BYPRODUCTS <INSTALL_DIR>/lib/${lexbor_lib_filename} | ||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> | ||
-DLEXBOR_BUILD_SHARED=OFF | ||
-DLEXBOR_BUILD_STATIC=ON | ||
-DLEXBOR_BUILD_TESTS_CPP=OFF | ||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | ||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} | ||
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} | ||
-DCMAKE_LINKER=${CMAKE_LINKER} | ||
${LEXBOR_CMAKE_PLATFORM_OPTIONS}) | ||
|
||
ExternalProject_Get_Property(lexbor_build INSTALL_DIR) | ||
|
||
message(STATUS "lexbor will be installed to ${INSTALL_DIR}") | ||
|
||
# find the library | ||
set(lexbor_lib_location ${INSTALL_DIR}/lib/${lexbor_lib_filename}) | ||
|
||
message(STATUS "lexbor library expected at ${lexbor_lib_location}") | ||
|
||
add_library(lexbor_internal STATIC IMPORTED) | ||
set_target_properties(lexbor_internal PROPERTIES IMPORTED_LOCATION ${lexbor_lib_location}) | ||
|
||
add_library(liblexbor_internal INTERFACE) | ||
add_dependencies(liblexbor_internal lexbor_build) | ||
target_link_libraries(liblexbor_internal INTERFACE lexbor_internal) | ||
set_target_properties(liblexbor_internal PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#include "request-data.h" | ||
#include "plugin-support.h" | ||
#include "errors.h" | ||
|
||
#include <lexbor/html/parser.h> | ||
#include <lexbor/html/html.h> | ||
#include <lexbor/dom/interfaces/element.h> | ||
#include <lexbor/css/css.h> | ||
#include <lexbor/selectors/selectors.h> | ||
|
||
#include <obs-module.h> | ||
|
||
lxb_inline lxb_status_t serializer_callback(const lxb_char_t *data, size_t len, void *ctx) | ||
{ | ||
((std::string *)ctx)->append((const char *)data, len); | ||
return LXB_STATUS_OK; | ||
} | ||
|
||
lxb_status_t find_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t spec, void *data) | ||
{ | ||
UNUSED_PARAMETER(spec); | ||
std::string str; | ||
(void)lxb_html_serialize_deep_cb(node, serializer_callback, &str); | ||
((std::vector<std::string> *)data)->push_back(str); | ||
return LXB_STATUS_OK; | ||
} | ||
|
||
lxb_status_t find_with_selectors(const std::string &slctrs, lxb_html_document_t *document, | ||
std::vector<std::string> &found) | ||
{ | ||
/* Create CSS parser. */ | ||
lxb_css_parser_t *parser; | ||
lxb_css_selector_list_t *list; | ||
lxb_status_t status; | ||
lxb_dom_node_t *body; | ||
lxb_selectors_t *selectors; | ||
|
||
parser = lxb_css_parser_create(); | ||
status = lxb_css_parser_init(parser, NULL); | ||
if (status != LXB_STATUS_OK) { | ||
obs_log(LOG_ERROR, "Failed to setup CSS parser"); | ||
return EXIT_FAILURE; | ||
} | ||
|
||
/* Selectors. */ | ||
selectors = lxb_selectors_create(); | ||
status = lxb_selectors_init(selectors); | ||
if (status != LXB_STATUS_OK) { | ||
obs_log(LOG_ERROR, "Failed to setup Selectors"); | ||
return EXIT_FAILURE; | ||
} | ||
|
||
/* Parse and get the log. */ | ||
|
||
list = lxb_css_selectors_parse(parser, (const lxb_char_t *)slctrs.c_str(), slctrs.length()); | ||
if (parser->status != LXB_STATUS_OK) { | ||
obs_log(LOG_ERROR, "Failed to parse CSS selectors"); | ||
return EXIT_FAILURE; | ||
} | ||
|
||
/* Find HTML nodes by CSS Selectors. */ | ||
body = lxb_dom_interface_node(lxb_html_document_body_element(document)); | ||
|
||
status = lxb_selectors_find(selectors, body, list, find_callback, &found); | ||
if (status != LXB_STATUS_OK) { | ||
obs_log(LOG_ERROR, "Failed to find HTML nodes by CSS Selectors"); | ||
return EXIT_FAILURE; | ||
} | ||
|
||
/* Destroy Selectors object. */ | ||
(void)lxb_selectors_destroy(selectors, true); | ||
|
||
/* Destroy resources for CSS Parser. */ | ||
(void)lxb_css_parser_destroy(parser, true); | ||
|
||
/* Destroy all object for all CSS Selector List. */ | ||
lxb_css_selector_list_destroy_memory(list); | ||
|
||
return LXB_STATUS_OK; | ||
} | ||
|
||
struct request_data_handler_response parse_html(struct request_data_handler_response response, | ||
const url_source_request_data *request_data) | ||
{ | ||
lxb_status_t status; | ||
lxb_html_document_t *document; | ||
|
||
document = lxb_html_document_create(); | ||
if (document == NULL) { | ||
return make_fail_parse_response("Failed to setup HTML parser"); | ||
} | ||
|
||
status = lxb_html_document_parse(document, (const lxb_char_t *)response.body.c_str(), | ||
response.body.length()); | ||
if (status != LXB_STATUS_OK) { | ||
return make_fail_parse_response("Failed to parse HTML"); | ||
} | ||
|
||
std::string parsed_output = response.body; | ||
// Get the output value | ||
if (request_data->output_cssselector != "") { | ||
std::vector<std::string> found; | ||
if (find_with_selectors(request_data->output_cssselector, document, found) != | ||
LXB_STATUS_OK) { | ||
return make_fail_parse_response("Failed to find element with CSS selector"); | ||
} else { | ||
if (found.size() > 0) { | ||
std::copy(found.begin(), found.end(), | ||
std::back_inserter(response.body_parts_parsed)); | ||
} | ||
} | ||
} else { | ||
// Return the whole HTML object | ||
response.body_parts_parsed.push_back(parsed_output); | ||
} | ||
|
||
return response; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.