Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nchan messages intermittent with redis (AWS Elasticache for redis) #631

Open
Chris-Kozanecki opened this issue Jan 13, 2022 · 2 comments
Open

Comments

@Chris-Kozanecki
Copy link

Chris-Kozanecki commented Jan 13, 2022

Hello,
I think there might be a bug with the code, although I cant confirm. We are looking to horizontally scale our nginx servers so we tried to attach redis. Unfortunately, it seems that messages are either lost, or intermittently not being understood. When using in-memory there are no issues. I'm not sure if this is because of elasticache or something else. I'm willing to help troubleshoot.

My Config files look like this:
Server Include:

listen 80 ;
#listen [::]:80 ;
listen 443 ssl ;

ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
ssl_ciphers DEFAULT:+MEDIUM:!RC4:!3DES:!MD5!SHA1;

root /var/www/html;

autoindex off ;
index index.html index.htm ;

ssl_trusted_certificate /etc/nginx/iftapi_client_ca.crt ;
ssl_verify_client optional_no_ca ;


location ~ ^/a/([a-z]*)$ {
    fastcgi_param QUERY_STRING $query_string ;
    fastcgi_param REQUEST_METHOD $request_method ;
    fastcgi_param CGI_FUNCTION $1 ;
    fastcgi_param CGI_HTTP_AUTHORIZATION $http_authorization ;
    fastcgi_param IP_ADDR $remote_addr ;
    fastcgi_param USERAGENT $http_user_agent ;
    fastcgi_param CLIENT_CERT_VERIFY $ssl_client_verify ;
    fastcgi_param CLIENT_CERT_FINGERPRINT $ssl_client_fingerprint ;
    fastcgi_pass unix:/var/www/sockets/fcgiserv-socket;
}
location ~ ^/a/([A-Z0-9]*)/applongpoll$ {
    nchan_redis_pass primary_redis_cluster;
    nchan_subscriber;
    nchan_message_timeout 30s;
    nchan_message_buffer_length 1;
    nchan_subscriber_timeout 57;
    nchan_subscriber_compound_etag_message_id on;
    nchan_channel_id "ch_s_$1" ;
    nchan_authorize_request "/a/$1/appauth" ;
}
location ~ ^/a/([A-Za-f0-9]*)/([a-z]*)$ {
    fastcgi_param QUERY_STRING $query_string ;
    fastcgi_param REQUEST_METHOD $request_method ;
    fastcgi_param CGI_FUNCTION $2 ;
    fastcgi_param CGI_SERIAL $1 ;
    fastcgi_param CGI_AUTH none ;
    fastcgi_param CGI_HTTP_AUTHORIZATION $http_authorization ;
    fastcgi_param IP_ADDR $remote_addr ;
    fastcgi_param USERAGENT $http_user_agent ;
    fastcgi_pass unix:/var/www/sockets/fcgiserv-socket;
}
location ~ ^/a/([A-Z0-9]*)/([A-Z0-9]*)/longpoll$ {
    nchan_redis_pass primary_redis_cluster;
    nchan_subscriber;
    nchan_message_timeout 60s;
    nchan_message_buffer_length 10;
    nchan_subscriber_timeout 295;
    nchan_subscriber_compound_etag_message_id on;
    nchan_channel_id "ch_$1" ;
    nchan_authorize_request "/a/$1/$2/auth" ;
}
location ~ ^/a/([A-Z0-9]*)/([A-Z0-9]*)/longpoll2$ {
    nchan_redis_pass primary_redis_cluster;
    nchan_subscriber;
    nchan_message_timeout 60s;
    nchan_message_buffer_length 10;
    nchan_subscriber_timeout 25;
    nchan_subscriber_compound_etag_message_id on;
    nchan_channel_id "ch_$1" ;
    nchan_authorize_request "/a/$1/$2/auth" ;
}
location ~ ^/a/([A-Za-f0-9]*)/([A-Za-f0-9]*)/([a-z]*)$ {
    fastcgi_param QUERY_STRING $query_string ;
    fastcgi_param REQUEST_METHOD $request_method ;
    fastcgi_param CGI_FUNCTION $3 ;
    fastcgi_param CGI_SERIAL $1 ;
    fastcgi_param CGI_AUTH $2 ;
    fastcgi_param IP_ADDR $remote_addr ;
    fastcgi_param USERAGENT $http_user_agent ;
    fastcgi_pass unix:/var/www/sockets/fcgiserv-socket;
}
location / {
    expires epoch ;
    location /fw/ {
        autoindex on ;
    }
    charset us-ascii;
    try_files $uri $uri/ =404;
}

Publisher:

server {
    listen unix:/var/www/sockets/pubsocket ;

    root /doesnotexist;

    server_name pub.iftapi.net;

    nchan_message_timeout 10s;
    nchan_message_buffer_length 10;

    location ~ ^/a/([A-Z0-9]*)/applongpost$ {
        nchan_redis_pass primary_redis_cluster;
        nchan_publisher;
        nchan_message_timeout 30s;
        nchan_message_buffer_length 1;
        nchan_channel_id "ch_s_$1" ;
    }

    #deprecated - delete this one
    location ~ ^/a/([A-Z0-9]*)/([A-Z0-9]*)/longpost$ {
        nchan_redis_pass primary_redis_cluster;
        nchan_publisher;
        nchan_message_timeout 60s;
        nchan_message_buffer_length 10;
        nchan_channel_id "ch_$1" ;
    }

    location ~ ^/a/([A-Z0-9]*)/longpost$ {
        nchan_redis_pass primary_redis_cluster;
        nchan_publisher;
        nchan_message_timeout 60s;
        nchan_message_buffer_length 10;
        nchan_channel_id "ch_$1" ;
    }
}

nginx.conf:

user www-data;
worker_processes auto;
pid /run/nginx.pid;
include /etc/nginx/modules-enabled/*.conf;

events {
        worker_connections 768;
}

http {

        ##
        # Basic Settings
        ##

        sendfile on;
        tcp_nopush on;
        tcp_nodelay on;
        keepalive_timeout 65;
        types_hash_max_size 2048;
        include /etc/nginx/mime.types;
        default_type application/octet-stream;

        ##
        # SSL Settings
        ##

        ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE
        ssl_prefer_server_ciphers on;

        ##
        # Logging Settings
        ##

        access_log /var/log/nginx/access.log;
        error_log /var/log/nginx/error.log;

        ##
        # nchan Redis Settings
        ##
                
        upstream primary_redis_cluster {
            nchan_redis_server REDISURL;
        }  

        ##
        # Gzip Settings
        ##

        gzip on;

        ##
        # Virtual Host Configs
        ##

        include /etc/nginx/conf.d/*.conf;
        include /etc/nginx/sites-enabled/*;
}

Each website that allows sbscribers:

server {
    server_name SERVERNAME;
    include /etc/nginx/server_include.conf ;
}
@fschaulepp
Copy link

Hello,
we are experience a similar issue. We used nchan in the beginning on a single EC2 machine with in-memory. For high availability are testing running the nchan on multiple pods in a EKS with redis (AWS Elasticache v5.0.6) as the backend and a NLB loadbalance. Most of the time everything runs smooth. Just sometimes the SSE messages are not received by the subscriber, and we get a 202 response for POST request of the publisher. The SSE connection is created in the checkout process. The problems goes away after the subscriber connects again. I'm willing to help troubleshoot. We are currently using nchan version 1.2.8 and ngnix version 1.18.0

config:

upstream redis_cluster {
    nchan_redis_server ${REDIS_URL};
}

server {
        root /var/www/phalanx/html;
        index index.html;

        # Listen on the server_name as specified via the ENV
        server_name ${SERVER_NAME};

        location / {
            try_files $uri $uri/ =404;
        }

        #pubsub
        location ~ /sub/((\w+|\-)+)$ {
            nchan_subscriber;
            nchan_channel_id $1;
            nchan_redis_pass redis_cluster;
        }
	    location ~ /pub/((\w+|\-)+)$  {
            nchan_publisher;
	        nchan_channel_id $1;
            nchan_message_timeout 10s;
            nchan_message_buffer_length 20;
            nchan_redis_pass redis_cluster;
        }

	    location /nchan_stub_status {
   	    nchan_stub_status;
  	}

    listen 80;
}

@slact
Copy link
Owner

slact commented Dec 15, 2022

Please try version 1.3.5, many Redis-related issues were fixed since 1.2.8

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants