Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bbc parser optimization #38

Merged
merged 20 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
bef1c3d
perf(bbc_parser): don't block with receive when starting bbcode parsers
unenglishable Dec 12, 2024
e4e269f
refactor(bbc_parser): split out parse with proc functionality
unenglishable Dec 12, 2024
6ef431a
feat(bbc_parser): use parse_with_proc for parsing list and list tuple
unenglishable Dec 12, 2024
f27dbfc
refactor(bbc_parser): remove timeout error handling from parse conven…
unenglishable Dec 12, 2024
81b1bf7
feat(post_json): implement format_proxy_posts_for_by_thread
unenglishable Dec 12, 2024
76adcf6
feat(bbc_parser): implement call handler for parsing list tuple
unenglishable Dec 12, 2024
81469e5
feat(bbc_parser): implement list tuple parser batch handler
unenglishable Dec 12, 2024
c3836c9
feat(post_json): use BBCParser.parse_list_tuple on body and signature…
unenglishable Dec 12, 2024
0712421
feat(post_json): zip parsed body/signature data back into posts
unenglishable Dec 12, 2024
43c0102
feat(post_json): add logging to batch processing/zip
unenglishable Dec 12, 2024
e90317d
perf(post_json): use batch processor for formatting proxy posts
unenglishable Dec 12, 2024
4a5e52f
refactor(post_json): remove unused proxy format function
unenglishable Dec 12, 2024
a4991fa
style(post_json,bbc_parser): mix format
unenglishable Dec 12, 2024
885396a
refactor(bbc_parser): update timeouts
unenglishable Dec 12, 2024
80a30e5
fix(bbc_parser): add :timeout atom to genserver tuple timeout return
unenglishable Dec 12, 2024
3b8b579
perf(bbc_parser): reduce genserver tuple call timeout
unenglishable Dec 12, 2024
c921a26
refactor(post_json): log unparsed tuple instead of post on error
unenglishable Dec 12, 2024
cf4bd49
style(bbc_parser): mix format
unenglishable Dec 13, 2024
cfefcd6
refactor(post_json): extract zip posts function
unenglishable Dec 17, 2024
857f275
style(post_json): mix format
unenglishable Dec 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 93 additions & 21 deletions lib/epochtalk_server/bbc_parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@ defmodule EpochtalkServer.BBCParser do
require Logger
alias Porcelain.Process, as: Proc

# poolboy genserver call timeout (ms)
# should be greater than internal porcelain php call
@call_timeout 500
# genserver call timeouts (ms)
@genserver_parse_timeout 5000
@genserver_parse_tuple_timeout 500

# poolboy timeout (ms)
@poolboy_transaction_timeout 200

# porcelain php parser call timeout (ms)
@receive_timeout 400
@receive_timeout 20

@moduledoc """
`BBCParser` genserver, runs interactive php shell to call bbcode parser
Expand All @@ -18,24 +22,70 @@ defmodule EpochtalkServer.BBCParser do
@impl true
def init(:ok), do: {:ok, load()}

@impl true
def handle_info({_pid, :data, :out, data}, state) do
Logger.debug("#{__MODULE__}(info): #{inspect(data)}")
{:noreply, state}
end

@impl true
def handle_call({:parse, ""}, _from, {proc, pid}),
do: {:reply, {:ok, ""}, {proc, pid}}

def handle_call({:parse, bbcode_data}, _from, {proc, pid}) when is_binary(bbcode_data) do
Proc.send_input(proc, "echo parse_bbc('#{bbcode_data}');\n")
Logger.debug(
"#{__MODULE__}(start parse): #{String.first(bbcode_data)} #{NaiveDateTime.utc_now()}"
)

parsed = parse_with_proc(bbcode_data, {proc, pid})

parsed =
receive do
{^pid, :data, :out, data} -> {:ok, data}
after
# time out after not receiving any data
@receive_timeout -> {:timeout, bbcode_data}
end
Logger.debug(
"#{__MODULE__}(finish parse): #{String.first(bbcode_data)} #{NaiveDateTime.utc_now()}"
)

{:reply, parsed, {proc, pid}}
end

def handle_call({:parse_list_tuple, {left_list, right_list}}, _from, {proc, pid}) do
Logger.debug("#{__MODULE__}(start parse list tuple): #{NaiveDateTime.utc_now()}")
parsed = parse_list_tuple_with_proc({left_list, right_list}, {proc, pid})
Logger.debug("#{__MODULE__}(finish parse list tuple): #{NaiveDateTime.utc_now()}")
{:reply, {:ok, parsed}, {proc, pid}}
end

defp parse_list_tuple_with_proc({left, right}, {proc, pid}) do
left = parse_list_with_proc(left, {proc, pid})
right = parse_list_with_proc(right, {proc, pid})
{left, right}
end

defp parse_list_with_proc(bbcode_data_list, {proc, pid}) do
bbcode_data_list
|> Enum.map(&parse_with_proc(&1, {proc, pid}))
end

defp parse_with_proc(nil, {_proc, _pid}), do: {:ok, nil}
defp parse_with_proc("", {_proc, _pid}), do: {:ok, ""}

defp parse_with_proc(bbcode_data, {proc, pid}) do
Proc.send_input(proc, "echo parse_bbc('#{bbcode_data}');\n")

receive do
{^pid, :data, :out, data} ->
{:ok, data}
after
# time out after not receiving any data
@receive_timeout ->
Logger.error("#{__MODULE__}(parse timeout): #{inspect(pid)}, #{inspect(bbcode_data)}")

bbcode_data =
"<p style=\"color:red;font-weight:bold\">((bbcode parse timeout))</p></br>" <>
bbcode_data

{:timeout, bbcode_data}
end
end

## === parser api functions ====

@doc """
Expand All @@ -46,25 +96,50 @@ defmodule EpochtalkServer.BBCParser do
@doc """
Uses poolboy to call parser
"""
def parse_list_tuple({left_bbcode_data, right_bbcode_data}) do
:poolboy.transaction(
:bbc_parser,
fn pid ->
try do
Logger.debug("#{__MODULE__}(parse): #{inspect(pid)}")

GenServer.call(
pid,
{:parse_list_tuple, {left_bbcode_data, right_bbcode_data}},
@genserver_parse_tuple_timeout
)
catch
e, r ->
# something went wrong, log the error
Logger.error(
"#{__MODULE__}(parse poolboy): #{inspect(pid)}, #{inspect(e)}, #{inspect(r)}"
)

left_bbcode_data = left_bbcode_data |> Enum.map(&{:timeout, &1})
right_bbcode_data = right_bbcode_data |> Enum.map(&{:timeout, &1})
{:error, {left_bbcode_data, right_bbcode_data}}
end
end,
@poolboy_transaction_timeout
)
end

def parse(bbcode_data) do
:poolboy.transaction(
:bbc_parser,
fn pid ->
try do
Logger.debug("#{__MODULE__}(parse): #{inspect(pid)}")

GenServer.call(pid, {:parse, bbcode_data}, @call_timeout)
GenServer.call(pid, {:parse, bbcode_data}, @genserver_parse_timeout)
|> case do
# on success, return parsed data
{:ok, parsed} ->
parsed

# on parse timeout, log and return unparsed data
{:timeout, unparsed} ->
Logger.error("#{__MODULE__}(parse timeout): #{inspect(pid)}, #{inspect(unparsed)}")

"<p style=\"color:red;font-weight:bold\">((bbcode parse timeout))</p></br>" <>
unparsed
unparsed
end
catch
e, r ->
Expand All @@ -76,7 +151,7 @@ defmodule EpochtalkServer.BBCParser do
bbcode_data
end
end,
@call_timeout
@poolboy_transaction_timeout
)
end

Expand All @@ -88,9 +163,6 @@ defmodule EpochtalkServer.BBCParser do
Proc.send_input(proc, "require 'parsing.php';\n")
Logger.debug("#{__MODULE__}(LOAD): #{inspect(pid)}")
# clear initial php interactive shell message
receive do
{^pid, :data, :out, data} -> Logger.debug("#{__MODULE__}: #{inspect(data)}")
end

{proc, pid}
end
Expand Down
91 changes: 72 additions & 19 deletions lib/epochtalk_server_web/json/post_json.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ defmodule EpochtalkServerWeb.Controllers.PostJSON do
alias EpochtalkServerWeb.Controllers.BoardJSON
alias EpochtalkServerWeb.Controllers.ThreadJSON
alias EpochtalkServerWeb.Helpers.ACL
require Logger

@moduledoc """
Renders and formats `Post` data, in JSON format for frontend
Expand Down Expand Up @@ -140,7 +141,7 @@ defmodule EpochtalkServerWeb.Controllers.PostJSON do
# format post data
posts =
posts
|> Enum.map(&format_proxy_post_data_for_by_thread(&1))
|> format_proxy_posts_for_by_thread()

# build by_thread results
%{
Expand Down Expand Up @@ -192,7 +193,7 @@ defmodule EpochtalkServerWeb.Controllers.PostJSON do
when is_list(posts) do
posts =
posts
|> Enum.map(&format_proxy_post_data_for_by_thread(&1))
|> format_proxy_posts_for_by_thread()

%{
posts: posts,
Expand Down Expand Up @@ -444,27 +445,79 @@ defmodule EpochtalkServerWeb.Controllers.PostJSON do
|> Map.delete(:role_name)
end

defp format_proxy_post_data_for_by_thread(post) do
body = String.replace(Map.get(post, :body) || Map.get(post, :body_html), "'", "\'")
defp format_proxy_posts_for_by_thread(posts) do
# extract body/signature lists from posts
{body_list, signature_list} =
posts
|> Enum.reduce({[], []}, fn post, {body_list, signature_list} ->
body = String.replace(Map.get(post, :body) || Map.get(post, :body_html), "'", "\'")

# add space to end if the last character is a backslash (fix for parser)
body_len = String.length(body)
last_char = String.slice(body, (body_len - 1)..body_len)
body = if last_char == "\\", do: body <> " ", else: body

signature =
if Map.get(post.user, :signature),
do: String.replace(post.user.signature, "'", "\'"),
else: nil

# return body/signature lists in reverse order
{[body | body_list], [signature | signature_list]}
end)

# add space to end if the last character is a backslash (fix for parser)
body_len = String.length(body)
last_char = String.slice(body, (body_len - 1)..body_len)
body = if last_char == "\\", do: body <> " ", else: body
# reverse body/signature lists
{body_list, signature_list} = {Enum.reverse(body_list), Enum.reverse(signature_list)}

parsed_body = EpochtalkServer.BBCParser.parse(body)
# parse body/signature lists
{parsed_body_list, parsed_signature_list} =
{body_list, signature_list}
|> EpochtalkServer.BBCParser.parse_list_tuple()
|> case do
{:ok, parsed_tuple} ->
parsed_tuple

signature =
if Map.get(post.user, :signature),
do: String.replace(post.user.signature, "'", "\'"),
else: nil
{:error, unparsed_tuple} ->
Logger.error("#{__MODULE__}(tuple parse): #{inspect(unparsed_tuple)}")
unparsed_tuple
end

parsed_signature =
if signature,
do: EpochtalkServer.BBCParser.parse(signature),
else: nil
zip_posts(posts, parsed_body_list, parsed_signature_list)
end

user = post.user |> Map.put(:signature, parsed_signature)
post |> Map.put(:body_html, parsed_body) |> Map.put(:user, user)
defp zip_posts(posts, parsed_body_list, parsed_signature_list) do
# zip posts with body/signature lists
Enum.zip_with(
[posts, parsed_body_list, parsed_signature_list],
fn [post, parsed_body, parsed_signature] ->
parsed_body =
case parsed_body do
{:ok, parsed_body} ->
Logger.debug("#{__MODULE__}(body): post_id #{inspect(post.id)}")
parsed_body

{:timeout, unparsed_body} ->
Logger.error("#{__MODULE__}(body timeout): post_id #{inspect(post.id)}")
unparsed_body
end

parsed_signature =
case parsed_signature do
{:ok, parsed_signature} ->
Logger.debug("#{__MODULE__}(signature): user_id #{inspect(post.user.id)}")
parsed_signature

{:timeout, unparsed_signature} ->
Logger.error("#{__MODULE__}(signature timeout): user_id #{inspect(post.user.id)}")
unparsed_signature
end

user = post.user |> Map.put(:signature, parsed_signature)

post
|> Map.put(:body_html, parsed_body)
|> Map.put(:user, user)
end
)
end
end
Loading