Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small fixes for readable #1

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,21 @@ int
main(int argc, char *argv[])
{
char *contents = NULL;
FILE *fp = fopen(argv[1], "r");

char *filename;
if (argc > 1) {
filename = argv[1];
} else {
printf("Usage: %s <filename>\n", argv[0]);
return -1;
}

FILE *fp = fopen(filename, "r");
if (!fp) {
printf("Error opening %s\n", filename);
return -1;
}

fseek(fp, 0, SEEK_END);
long len = ftell(fp);
rewind(fp);
Expand Down
46 changes: 26 additions & 20 deletions readable.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@


KHASH_MAP_INIT_STR(str, int);
#define __pointer_hash(x) (uintptr_t)(x)
#define __pointer_hash(x) (khint_t)(x)
#define __pointer_equal(x, y) (x == y)
KHASH_INIT(score, htmlNodePtr, float, 1, __pointer_hash, __pointer_equal);

#ifdef READABLE_USE_LIBICU
UChar *uastrdup(const char *s)
{
int len = strlen(s);
int len = (int)strlen(s);
UChar *us = malloc(sizeof(UChar) * (len + 1));
u_uastrcpy(us, s);
return us;
Expand Down Expand Up @@ -302,18 +302,20 @@ node_inner_html(htmlDocPtr doc, htmlNodePtr node)
char *html = NULL;
for (htmlNodePtr cur = node->children; cur; cur = cur->next) {
char *cur_html = node_html(doc, cur);
int len = strlen(cur_html);
ssize_t len = (ssize_t)strlen(cur_html);
ssize_t available_size = allocated_size - data_size - 1;
if (len > available_size) {
while (len > available_size) {
allocated_size = MAX(allocated_size * 1.2, 512);
allocated_size = MAX(allocated_size * 1.2f, 512);
available_size = allocated_size - data_size - 1;
}
html = realloc(html, allocated_size);
}
strncpy(html + data_size, cur_html, len);
data_size += len;
html[data_size] = '\0';
if (html) { // This satisfies the static analyzer
html[data_size] = '\0';
}
free(cur_html);
}
return html;
Expand All @@ -337,7 +339,7 @@ node_text_len(htmlNodePtr node)
int len = 0;
char *inner_text = node_inner_text(node);
if (inner_text) {
len = strlen(inner_text);
len = (int)strlen(inner_text);
free(inner_text);
}
return len;
Expand Down Expand Up @@ -425,10 +427,10 @@ float
name_score(const xmlChar *name)
{
float score = 0;
if (matches(POSITIVE_SCORE, name)) {
if (matches(POSITIVE_SCORE, (const char *)name)) {
score += 25;
}
if (matches(NEGATIVE_SCORE, name)) {
if (matches(NEGATIVE_SCORE, (const char *)name)) {
score -= 25;
}
return score;
Expand Down Expand Up @@ -587,7 +589,7 @@ clean_node_conditionally(htmlNodePtr node, kh_score_t *scores,
char *node_text = node_inner_text(node);
if (node_text) {
commas = number_of_commas(node_text);
text_len = strlen(node_text);
text_len = (int)strlen(node_text);
free(node_text);
}
if (commas < 10) {
Expand Down Expand Up @@ -707,8 +709,8 @@ clean_node(htmlDocPtr doc, htmlNodePtr node, kh_score_t *scores, int options,
}
xmlChar *alt = xmlGetProp(node, BAD_CAST "alt");
xmlChar *title = xmlGetProp(node, BAD_CAST "title");
#define xlen(x) (x ? strlen((char *)x) : 0)
int len = xlen(alt) + xlen(title) + strlen((char *)src);
#define xlen(x) (x ? (int)strlen((char *)x) : 0)
int len = xlen(alt) + xlen(title) + (int)strlen((char *)src);
char *test = malloc(len + 1);
strcpy(test, (char *)src);
free(src);
Expand Down Expand Up @@ -797,7 +799,7 @@ clean_node(htmlDocPtr doc, htmlNodePtr node, kh_score_t *scores, int options,
}
}
}
if (node->name[0] == 'p') {
if (node->name && node->name[0] == 'p') {
int nospaces = node_nospaces_len(node);
if (!nospaces) {
kh_str_t *tags = node_tags_count(node);
Expand Down Expand Up @@ -885,7 +887,7 @@ search_article_image(htmlNodePtr node, htmlNodePtr prev)
xmlChar *width = xmlGetProp(image, BAD_CAST "width");
xmlChar *height = xmlGetProp(image, BAD_CAST "height");

if (matches(UNLIKELY_ARTICLE_IMAGE, src)) {
if (matches(UNLIKELY_ARTICLE_IMAGE, (const char *)src)) {
score -= 20;
}
char *dot = strrchr((char *)src, '.');
Expand Down Expand Up @@ -1097,8 +1099,8 @@ style_px_dimensions(xmlChar *style, int *width, int *height)
if (*wp && *hp) {
char *wep = NULL;
char *hep = NULL;
*width = strtol(wp, &wep, 10);
*height = strtol(hp, &hep, 10);
*width = (int)strtol(wp, &wep, 10);
*height = (int)strtol(hp, &hep, 10);
if (wep && hep && *wep == 'p' && *hep == 'p' &&
*width && *height) {

Expand Down Expand Up @@ -1220,7 +1222,7 @@ readable(const char *html, const char *url, const char *encoding, int options)
if (!inner_text) {
continue;
}
int text_length = strlen(inner_text);
int text_length = (int)strlen(inner_text);
if (text_length < 25) {
free(inner_text);
continue;
Expand All @@ -1238,7 +1240,7 @@ readable(const char *html, const char *url, const char *encoding, int options)
grand_parent_score = initialize_node_score(scores, grand_parent, options);
candidates = rd_list_append(candidates, grand_parent);
/* Look up the parent score again, since the hash
table might have grown and rehased, invalidating
table might have grown and rehashed, invalidating
the pointer
*/
parent_score = lookup_score_ptr(scores, parent);
Expand Down Expand Up @@ -1296,6 +1298,10 @@ readable(const char *html, const char *url, const char *encoding, int options)
kh_value(scores, iter) = 0;
top_candidate_score = &(kh_value(scores, iter));
}
if (!top_candidate) {
xmlFreeDoc(doc);
return NULL;
}
#ifdef READABLE_DEBUG
char *debug_name = node_test_name(top_candidate);
DEBUG_LOG("Top candidate %s with score %f\n", debug_name, *top_candidate_score);
Expand All @@ -1322,7 +1328,7 @@ readable(const char *html, const char *url, const char *encoding, int options)
xmlChar *top_candidate_class = xmlGetProp(top_candidate, BAD_CAST "class");
DEBUG_LOG("Threshold %f\n", threshold);
/* Insert nodes in the article */
htmlNodePtr start = top_candidate->parent ? : top_candidate;
htmlNodePtr start = top_candidate->parent ? top_candidate->parent : top_candidate;
htmlNodePtr next;
for (htmlNodePtr cur = start->children; cur; cur = next) {
next = cur->next;
Expand Down Expand Up @@ -1355,7 +1361,7 @@ readable(const char *html, const char *url, const char *encoding, int options)
if (xmlStrEqual(cur->name, BAD_CAST "p")) {
float link_density = node_link_density(cur);
char *inner_text = node_inner_text(cur);
int text_len = inner_text ? strlen(inner_text) : 0;
int text_len = inner_text ? (int)strlen(inner_text) : 0;

if (text_len > 80 && link_density < 0.25) {
#ifdef READABLE_DEBUG
Expand Down Expand Up @@ -1487,7 +1493,7 @@ find_next_link(htmlDocPtr doc, xmlNodePtr node, const char *url)
return NULL;
}

if (xmlStrstr(href, BAD_CAST "http://") == href ||
if (xmlStrstr(BAD_CAST href, BAD_CAST "http://") == href ||
xmlStrstr(BAD_CAST url, BAD_CAST "https://") == href) {

return href;
Expand Down