-
Notifications
You must be signed in to change notification settings - Fork 538
/
Copy pathclient.go
1241 lines (1147 loc) · 46.8 KB
/
client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2021 IBM Corp and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* and Eclipse Distribution License v1.0 which accompany this distribution.
*
* The Eclipse Public License is available at
* https://www.eclipse.org/legal/epl-2.0/
* and the Eclipse Distribution License is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* Contributors:
* Seth Hoenig
* Allan Stockdill-Mander
* Mike Robertson
* Matt Brittan
*/
// Portions copyright © 2018 TIBCO Software Inc.
// Package mqtt provides an MQTT v3.1.1 client library.
package mqtt
import (
"bytes"
"context"
"errors"
"fmt"
"net"
"strings"
"sync"
"sync/atomic"
"time"
"golang.org/x/sync/semaphore"
"github.com/eclipse/paho.mqtt.golang/packets"
)
// Client is the interface definition for a Client as used by this
// library, the interface is primarily to allow mocking tests.
//
// It is an MQTT v3.1.1 client for communicating
// with an MQTT server using non-blocking methods that allow work
// to be done in the background.
// An application may connect to an MQTT server using:
//
// A plain TCP socket (e.g. mqtt://test.mosquitto.org:1833)
// A secure SSL/TLS socket (e.g. tls://test.mosquitto.org:8883)
// A websocket (e.g ws://test.mosquitto.org:8080 or wss://test.mosquitto.org:8081)
// Something else (using `options.CustomOpenConnectionFn`)
//
// To enable ensured message delivery at Quality of Service (QoS) levels
// described in the MQTT spec, a message persistence mechanism must be
// used. This is done by providing a type which implements the Store
// interface. For convenience, FileStore and MemoryStore are provided
// implementations that should be sufficient for most use cases. More
// information can be found in their respective documentation.
// Numerous connection options may be specified by configuring a
// and then supplying a ClientOptions type.
// Implementations of Client must be safe for concurrent use by multiple
// goroutines
type Client interface {
// IsConnected returns a bool signifying whether
// the client is connected or not.
IsConnected() bool
// IsConnectionOpen return a bool signifying whether the client has an active
// connection to mqtt broker, i.e not in disconnected or reconnect mode
IsConnectionOpen() bool
// Connect will create a connection to the message broker, by default
// it will attempt to connect at v3.1.1 and auto retry at v3.1 if that
// fails
Connect() Token
// Disconnect will end the connection with the server, but not before waiting
// the specified number of milliseconds to wait for existing work to be
// completed.
Disconnect(quiesce uint)
// Publish will publish a message with the specified QoS and content
// to the specified topic.
// Returns a token to track delivery of the message to the broker
Publish(topic string, qos byte, retained bool, payload interface{}) Token
// Subscribe starts a new subscription. Provide a MessageHandler to be executed when
// a message is published on the topic provided, or nil for the default handler.
//
// If options.OrderMatters is true (the default) then callback must not block or
// call functions within this package that may block (e.g. Publish) other than in
// a new go routine.
// callback must be safe for concurrent use by multiple goroutines.
Subscribe(topic string, qos byte, callback MessageHandler) Token
// SubscribeMultiple starts a new subscription for multiple topics. Provide a MessageHandler to
// be executed when a message is published on one of the topics provided, or nil for the
// default handler.
//
// If options.OrderMatters is true (the default) then callback must not block or
// call functions within this package that may block (e.g. Publish) other than in
// a new go routine.
// callback must be safe for concurrent use by multiple goroutines.
SubscribeMultiple(filters map[string]byte, callback MessageHandler) Token
// Unsubscribe will end the subscription from each of the topics provided.
// Messages published to those topics from other clients will no longer be
// received.
Unsubscribe(topics ...string) Token
// AddRoute allows you to add a handler for messages on a specific topic
// without making a subscription. For example having a different handler
// for parts of a wildcard subscription or for receiving retained messages
// upon connection (before Sub scribe can be processed).
//
// If options.OrderMatters is true (the default) then callback must not block or
// call functions within this package that may block (e.g. Publish) other than in
// a new go routine.
// callback must be safe for concurrent use by multiple goroutines.
AddRoute(topic string, callback MessageHandler)
// OptionsReader returns a ClientOptionsReader which is a copy of the clientoptions
// in use by the client.
OptionsReader() ClientOptionsReader
}
// client implements the Client interface
// clients are safe for concurrent use by multiple
// goroutines
type client struct {
lastSent atomic.Value // time.Time - the last time a packet was successfully sent to network
lastReceived atomic.Value // time.Time - the last time a packet was successfully received from network
pingOutstanding int32 // set to 1 if a ping has been sent but response not ret received
status connectionStatus // see constants in status.go for values
messageIds // effectively a map from message id to token completor
obound chan *PacketAndToken // outgoing publish packet
oboundP chan *PacketAndToken // outgoing 'priority' packet (anything other than publish)
msgRouter *router // routes topics to handlers
persist Store
options ClientOptions
optionsMu sync.Mutex // Protects the options in a few limited cases where needed for testing
conn net.Conn // the network connection, must only be set with connMu locked (only used when starting/stopping workers)
connMu sync.Mutex // mutex for the connection (again only used in two functions)
stop chan struct{} // Closed to request that workers stop
workers sync.WaitGroup // used to wait for workers to complete (ping, keepalive, errwatch, resume)
commsStopped chan struct{} // closed when the comms routines have stopped (kept running until after workers have closed to avoid deadlocks)
backoff *backoffController
}
// NewClient will create an MQTT v3.1.1 client with all of the options specified
// in the provided ClientOptions. The client must have the Connect method called
// on it before it may be used. This is to make sure resources (such as a net
// connection) are created before the application is actually ready.
func NewClient(o *ClientOptions) Client {
c := &client{}
c.options = *o
if c.options.Store == nil {
c.options.Store = NewMemoryStore()
}
switch c.options.ProtocolVersion {
case 3, 4:
c.options.protocolVersionExplicit = true
case 0x83, 0x84:
c.options.protocolVersionExplicit = true
default:
c.options.ProtocolVersion = 4
c.options.protocolVersionExplicit = false
}
c.persist = c.options.Store
c.messageIds = messageIds{index: make(map[uint16]tokenCompletor)}
c.msgRouter = newRouter()
c.msgRouter.setDefaultHandler(c.options.DefaultPublishHandler)
c.obound = make(chan *PacketAndToken)
c.oboundP = make(chan *PacketAndToken)
c.backoff = newBackoffController()
return c
}
// AddRoute allows you to add a handler for messages on a specific topic
// without making a subscription. For example having a different handler
// for parts of a wildcard subscription
//
// If options.OrderMatters is true (the default) then callback must not block or
// call functions within this package that may block (e.g. Publish) other than in
// a new go routine.
// callback must be safe for concurrent use by multiple goroutines.
func (c *client) AddRoute(topic string, callback MessageHandler) {
if callback != nil {
c.msgRouter.addRoute(topic, callback)
}
}
// IsConnected returns a bool signifying whether
// the client is connected or not.
// connected means that the connection is up now OR it will
// be established/reestablished automatically when possible
// Warning: The connection status may change at any time so use this with care!
func (c *client) IsConnected() bool {
// This will need to change if additional statuses are added
s, r := c.status.ConnectionStatusRetry()
switch {
case s == connected:
return true
case c.options.ConnectRetry && s == connecting:
return true
case c.options.AutoReconnect:
return s == reconnecting || (s == disconnecting && r) // r indicates we will reconnect
default:
return false
}
}
// IsConnectionOpen return a bool signifying whether the client has an active
// connection to mqtt broker, i.e. not in disconnected or reconnect mode
// Warning: The connection status may change at any time so use this with care!
func (c *client) IsConnectionOpen() bool {
return c.status.ConnectionStatus() == connected
}
// ErrNotConnected is the error returned from function calls that are
// made when the client is not connected to a broker
var ErrNotConnected = errors.New("not Connected")
// Connect will create a connection to the message broker, by default
// it will attempt to connect at v3.1.1 and auto retry at v3.1 if that
// fails
// Note: If using QOS1+ and CleanSession=false it is advisable to add
// routes (or a DefaultPublishHandler) prior to calling Connect()
// because queued messages may be delivered immediately post connection
func (c *client) Connect() Token {
t := newToken(packets.Connect).(*ConnectToken)
DEBUG.Println(CLI, "Connect()")
connectionUp, err := c.status.Connecting()
if err != nil {
if err == errAlreadyConnectedOrReconnecting && c.options.AutoReconnect {
// When reconnection is active we don't consider calls tro Connect to ba an error (mainly for compatability)
WARN.Println(CLI, "Connect() called but not disconnected")
t.returnCode = packets.Accepted
t.flowComplete()
return t
}
ERROR.Println(CLI, err) // CONNECT should never be called unless we are disconnected
t.setError(err)
return t
}
c.persist.Open()
if c.options.ConnectRetry {
c.reserveStoredPublishIDs() // Reserve IDs to allow publishing before connect complete
}
go func() {
if len(c.options.Servers) == 0 {
t.setError(fmt.Errorf("no servers defined to connect to"))
if err := connectionUp(false); err != nil {
ERROR.Println(CLI, err.Error())
}
return
}
RETRYCONN:
var conn net.Conn
var rc byte
var err error
conn, rc, t.sessionPresent, err = c.attemptConnection()
if err != nil {
if c.options.ConnectRetry {
DEBUG.Println(CLI, "Connect failed, sleeping for", int(c.options.ConnectRetryInterval.Seconds()), "seconds and will then retry, error:", err.Error())
time.Sleep(c.options.ConnectRetryInterval)
if c.status.ConnectionStatus() == connecting { // Possible connection aborted elsewhere
goto RETRYCONN
}
}
ERROR.Println(CLI, "Failed to connect to a broker")
c.persist.Close()
t.returnCode = rc
t.setError(err)
if err := connectionUp(false); err != nil {
ERROR.Println(CLI, err.Error())
}
return
}
inboundFromStore := make(chan packets.ControlPacket) // there may be some inbound comms packets in the store that are awaiting processing
if c.startCommsWorkers(conn, connectionUp, inboundFromStore) { // note that this takes care of updating the status (to connected or disconnected)
// Take care of any messages in the store
if !c.options.CleanSession {
c.resume(c.options.ResumeSubs, inboundFromStore)
} else {
c.persist.Reset()
}
} else { // Note: With the new status subsystem this should only happen if Disconnect called simultaneously with the above
WARN.Println(CLI, "Connect() called but connection established in another goroutine")
}
close(inboundFromStore)
t.flowComplete()
DEBUG.Println(CLI, "exit startClient")
}()
return t
}
// internal function used to reconnect the client when it loses its connection
// The connection status MUST be reconnecting prior to calling this function (via call to status.connectionLost)
func (c *client) reconnect(connectionUp connCompletedFn) {
DEBUG.Println(CLI, "enter reconnect")
var (
initSleep = 1 * time.Second
conn net.Conn
)
// If the reason of connection lost is same as the before one, sleep timer is set before attempting connection is started.
// Sleep time is exponentially increased as the same situation continues
if slp, isContinual := c.backoff.sleepWithBackoff("connectionLost", initSleep, c.options.MaxReconnectInterval, 3*time.Second, true); isContinual {
DEBUG.Println(CLI, "Detect continual connection lost after reconnect, slept for", int(slp.Seconds()), "seconds")
}
for {
if nil != c.options.OnReconnecting {
c.options.OnReconnecting(c, &c.options)
}
var err error
conn, _, _, err = c.attemptConnection()
if err == nil {
break
}
sleep, _ := c.backoff.sleepWithBackoff("attemptReconnection", initSleep, c.options.MaxReconnectInterval, c.options.ConnectTimeout, false)
DEBUG.Println(CLI, "Reconnect failed, slept for", int(sleep.Seconds()), "seconds:", err)
if c.status.ConnectionStatus() != reconnecting { // Disconnect may have been called
if err := connectionUp(false); err != nil { // Should always return an error
ERROR.Println(CLI, err.Error())
}
DEBUG.Println(CLI, "Client moved to disconnected state while reconnecting, abandoning reconnect")
return
}
}
inboundFromStore := make(chan packets.ControlPacket) // there may be some inbound comms packets in the store that are awaiting processing
if c.startCommsWorkers(conn, connectionUp, inboundFromStore) { // note that this takes care of updating the status (to connected or disconnected)
c.resume(c.options.ResumeSubs, inboundFromStore)
}
close(inboundFromStore)
}
// attemptConnection makes a single attempt to connect to each of the brokers
// the protocol version to use is passed in (as c.options.ProtocolVersion)
// Note: Does not set c.conn in order to minimise race conditions
// Returns:
// net.Conn - Connected network connection
// byte - Return code (packets.Accepted indicates a successful connection).
// bool - SessionPresent flag from the connect ack (only valid if packets.Accepted)
// err - Error (err != nil guarantees that conn has been set to active connection).
func (c *client) attemptConnection() (net.Conn, byte, bool, error) {
protocolVersion := c.options.ProtocolVersion
var (
sessionPresent bool
conn net.Conn
err error
rc byte
)
c.optionsMu.Lock() // Protect c.options.Servers so that servers can be added in test cases
brokers := c.options.Servers
c.optionsMu.Unlock()
for _, broker := range brokers {
cm := newConnectMsgFromOptions(&c.options, broker)
DEBUG.Println(CLI, "about to write new connect msg")
CONN:
tlsCfg := c.options.TLSConfig
if c.options.OnConnectAttempt != nil {
DEBUG.Println(CLI, "using custom onConnectAttempt handler...")
tlsCfg = c.options.OnConnectAttempt(broker, c.options.TLSConfig)
}
connDeadline := time.Now().Add(c.options.ConnectTimeout) // Time by which connection must be established
dialer := c.options.Dialer
if dialer == nil { //
WARN.Println(CLI, "dialer was nil, using default")
dialer = &net.Dialer{Timeout: 30 * time.Second}
}
// Start by opening the network connection (tcp, tls, ws) etc
if c.options.CustomOpenConnectionFn != nil {
conn, err = c.options.CustomOpenConnectionFn(broker, c.options)
} else {
conn, err = openConnection(broker, tlsCfg, c.options.ConnectTimeout, c.options.HTTPHeaders, c.options.WebsocketOptions, dialer)
}
if err != nil {
ERROR.Println(CLI, err.Error())
WARN.Println(CLI, "failed to connect to broker, trying next")
rc = packets.ErrNetworkError
continue
}
DEBUG.Println(CLI, "socket connected to broker")
// Now we perform the MQTT connection handshake ensuring that it does not exceed the timeout
if err := conn.SetDeadline(connDeadline); err != nil {
ERROR.Println(CLI, "set deadline for handshake ", err)
}
// Now we perform the MQTT connection handshake
rc, sessionPresent, err = connectMQTT(conn, cm, protocolVersion)
if rc == packets.Accepted {
if err := conn.SetDeadline(time.Time{}); err != nil {
ERROR.Println(CLI, "reset deadline following handshake ", err)
}
break // successfully connected
}
// We may have to attempt the connection with MQTT 3.1
_ = conn.Close()
if !c.options.protocolVersionExplicit && protocolVersion == 4 { // try falling back to 3.1?
DEBUG.Println(CLI, "Trying reconnect using MQTT 3.1 protocol")
protocolVersion = 3
goto CONN
}
if c.options.protocolVersionExplicit { // to maintain logging from previous version
ERROR.Println(CLI, "Connecting to", broker, "CONNACK was not CONN_ACCEPTED, but rather", packets.ConnackReturnCodes[rc])
}
}
// If the connection was successful we set member variable and lock in the protocol version for future connection attempts (and users)
if rc == packets.Accepted {
c.options.ProtocolVersion = protocolVersion
c.options.protocolVersionExplicit = true
} else {
// Maintain same error format as used previously
if rc != packets.ErrNetworkError { // mqtt error
err = packets.ConnErrors[rc]
} else { // network error (if this occurred in ConnectMQTT then err will be nil)
err = fmt.Errorf("%w : %w", packets.ConnErrors[rc], err)
}
}
return conn, rc, sessionPresent, err
}
// Disconnect will end the connection with the server, but not before waiting
// the specified number of milliseconds to wait for existing work to be
// completed.
// WARNING: `Disconnect` may return before all activities (goroutines) have completed. This means that
// reusing the `client` may lead to panics. If you want to reconnect when the connection drops then use
// `SetAutoReconnect` and/or `SetConnectRetry`options instead of implementing this yourself.
func (c *client) Disconnect(quiesce uint) {
done := make(chan struct{}) // Simplest way to ensure quiesce is always honoured
go func() {
defer close(done)
disDone, err := c.status.Disconnecting()
if err != nil {
// Status has been set to disconnecting, but we had to wait for something else to complete
WARN.Println(CLI, err.Error())
return
}
defer func() {
c.disconnect() // Force disconnection
disDone() // Update status
}()
DEBUG.Println(CLI, "disconnecting")
dm := packets.NewControlPacket(packets.Disconnect).(*packets.DisconnectPacket)
dt := newToken(packets.Disconnect)
select {
case c.oboundP <- &PacketAndToken{p: dm, t: dt}:
// wait for work to finish, or quiesce time consumed
DEBUG.Println(CLI, "calling WaitTimeout")
dt.WaitTimeout(time.Duration(quiesce) * time.Millisecond)
DEBUG.Println(CLI, "WaitTimeout done")
// Below code causes a potential data race. Following status refactor it should no longer be required
// but leaving in as need to check code further.
// case <-c.commsStopped:
// WARN.Println("Disconnect packet could not be sent because comms stopped")
case <-time.After(time.Duration(quiesce) * time.Millisecond):
WARN.Println("Disconnect packet not sent due to timeout")
}
}()
// Return when done or after timeout expires (would like to change but this maintains compatibility)
delay := time.NewTimer(time.Duration(quiesce) * time.Millisecond)
select {
case <-done:
if !delay.Stop() {
<-delay.C
}
case <-delay.C:
}
}
// forceDisconnect will end the connection with the mqtt broker immediately (used for tests only)
func (c *client) forceDisconnect() {
disDone, err := c.status.Disconnecting()
if err != nil {
// Possible that we are not actually connected
WARN.Println(CLI, err.Error())
return
}
DEBUG.Println(CLI, "forcefully disconnecting")
c.disconnect()
disDone()
}
// disconnect cleans up after a final disconnection (user requested so no auto reconnection)
func (c *client) disconnect() {
done := c.stopCommsWorkers()
if done != nil {
<-done // Wait until the disconnect is complete (to limit chance that another connection will be started)
DEBUG.Println(CLI, "forcefully disconnecting")
c.messageIds.cleanUp()
DEBUG.Println(CLI, "disconnected")
c.persist.Close()
}
}
// internalConnLost cleanup when connection is lost or an error occurs
// Note: This function will not block
func (c *client) internalConnLost(whyConnLost error) {
// It is possible that internalConnLost will be called multiple times simultaneously
// (including after sending a DisconnectPacket) as such we only do cleanup etc if the
// routines were actually running and are not being disconnected at users request
DEBUG.Println(CLI, "internalConnLost called")
disDone, err := c.status.ConnectionLost(c.options.AutoReconnect && c.status.ConnectionStatus() > connecting)
if err != nil {
if err == errConnLossWhileDisconnecting || err == errAlreadyHandlingConnectionLoss {
return // Loss of connection is expected or already being handled
}
ERROR.Println(CLI, fmt.Sprintf("internalConnLost unexpected status: %s", err.Error()))
return
}
// c.stopCommsWorker returns a channel that is closed when the operation completes. This was required prior
// to the implementation of proper status management but has been left in place, for now, to minimise change
stopDone := c.stopCommsWorkers()
// stopDone was required in previous versions because there was no connectionLost status (and there were
// issues with status handling). This code has been left in place for the time being just in case the new
// status handling contains bugs (refactoring required at some point).
if stopDone == nil { // stopDone will be nil if workers already in the process of stopping or stopped
ERROR.Println(CLI, "internalConnLost stopDone unexpectedly nil - BUG BUG")
// Cannot really do anything other than leave things disconnected
if _, err = disDone(false); err != nil { // Safest option - cannot leave status as connectionLost
ERROR.Println(CLI, fmt.Sprintf("internalConnLost failed to set status to disconnected (stopDone): %s", err.Error()))
}
return
}
// It may take a while for the disconnection to complete whatever called us needs to exit cleanly so finnish in goRoutine
go func() {
DEBUG.Println(CLI, "internalConnLost waiting on workers")
<-stopDone
DEBUG.Println(CLI, "internalConnLost workers stopped")
reConnDone, err := disDone(true)
if err != nil {
ERROR.Println(CLI, "failure whilst reporting completion of disconnect", err)
} else if reConnDone == nil { // Should never happen
ERROR.Println(CLI, "BUG BUG BUG reconnection function is nil", err)
}
reconnect := err == nil && reConnDone != nil
if c.options.CleanSession && !reconnect {
c.messageIds.cleanUp() // completes PUB/SUB/UNSUB tokens
} else if !c.options.ResumeSubs {
c.messageIds.cleanUpSubscribe() // completes SUB/UNSUB tokens
}
if reconnect {
go c.reconnect(reConnDone) // Will set connection status to reconnecting
}
if c.options.OnConnectionLost != nil {
go c.options.OnConnectionLost(c, whyConnLost)
}
DEBUG.Println(CLI, "internalConnLost complete")
}()
}
// startCommsWorkers is called when the connection is up.
// It starts off the routines needed to process incoming and outgoing messages.
// Returns true if the comms workers were started (i.e. successful connection)
// connectionUp(true) will be called once everything is up; connectionUp(false) will be called on failure
func (c *client) startCommsWorkers(conn net.Conn, connectionUp connCompletedFn, inboundFromStore <-chan packets.ControlPacket) bool {
DEBUG.Println(CLI, "startCommsWorkers called")
c.connMu.Lock()
defer c.connMu.Unlock()
if c.conn != nil { // Should never happen due to new status handling; leaving in for safety for the time being
WARN.Println(CLI, "startCommsWorkers called when commsworkers already running BUG BUG")
_ = conn.Close() // No use for the new network connection
if err := connectionUp(false); err != nil {
ERROR.Println(CLI, err.Error())
}
return false
}
c.conn = conn // Store the connection
c.stop = make(chan struct{})
if c.options.KeepAlive != 0 {
atomic.StoreInt32(&c.pingOutstanding, 0)
c.lastReceived.Store(time.Now())
c.lastSent.Store(time.Now())
c.workers.Add(1)
go keepalive(c, conn)
}
// matchAndDispatch will process messages received from the network. It may generate acknowledgements
// It will complete when incomingPubChan is closed and will close ackOut prior to exiting
incomingPubChan := make(chan *packets.PublishPacket)
c.workers.Add(1) // Done will be called when ackOut is closed
ackOut := c.msgRouter.matchAndDispatch(incomingPubChan, c.options.Order, c)
// The connection is now ready for use (we spin up a few go routines below).
// It is possible that Disconnect has been called in the interim...
// issue 675:we will allow the connection to complete before the Disconnect is allowed to proceed
// as if a Disconnect event occurred immediately after connectionUp(true) completed.
if err := connectionUp(true); err != nil {
ERROR.Println(CLI, err)
}
DEBUG.Println(CLI, "client is connected/reconnected")
if c.options.OnConnect != nil {
go c.options.OnConnect(c)
}
// c.oboundP and c.obound need to stay active for the life of the client because, depending upon the options,
// messages may be published while the client is disconnected (they will block unless in a goroutine). However
// to keep the comms routines clean we want to shutdown the input messages it uses so create out own channels
// and copy data across.
commsobound := make(chan *PacketAndToken) // outgoing publish packets
commsoboundP := make(chan *PacketAndToken) // outgoing 'priority' packet
c.workers.Add(1)
go func() {
defer c.workers.Done()
for {
select {
case msg := <-c.oboundP:
commsoboundP <- msg
case msg := <-c.obound:
commsobound <- msg
case msg, ok := <-ackOut:
if !ok {
ackOut = nil // ignore channel going forward
c.workers.Done() // matchAndDispatch has completed
continue // await next message
}
commsoboundP <- msg
case <-c.stop:
// Attempt to transmit any outstanding acknowledgements (this may well fail but should work if this is a clean disconnect)
if ackOut != nil {
for msg := range ackOut {
commsoboundP <- msg
}
c.workers.Done() // matchAndDispatch has completed
}
close(commsoboundP) // Nothing sending to these channels anymore so close them and allow comms routines to exit
close(commsobound)
DEBUG.Println(CLI, "startCommsWorkers output redirector finished")
return
}
}
}()
commsIncomingPub, commsErrors := startComms(c.conn, c, inboundFromStore, commsoboundP, commsobound)
c.commsStopped = make(chan struct{})
go func() {
for {
if commsIncomingPub == nil && commsErrors == nil {
break
}
select {
case pub, ok := <-commsIncomingPub:
if !ok {
// Incoming comms has shutdown
close(incomingPubChan) // stop the router
commsIncomingPub = nil
continue
}
// Care is needed here because an error elsewhere could trigger a deadlock
sendPubLoop:
for {
select {
case incomingPubChan <- pub:
break sendPubLoop
case err, ok := <-commsErrors:
if !ok { // commsErrors has been closed so we can ignore it
commsErrors = nil
continue
}
ERROR.Println(CLI, "Connect comms goroutine - error triggered during send Pub", err)
c.internalConnLost(err) // no harm in calling this if the connection is already down (or shutdown is in progress)
continue
}
}
case err, ok := <-commsErrors:
if !ok {
commsErrors = nil
continue
}
ERROR.Println(CLI, "Connect comms goroutine - error triggered", err)
c.internalConnLost(err) // no harm in calling this if the connection is already down (or shutdown is in progress)
continue
}
}
DEBUG.Println(CLI, "incoming comms goroutine done")
close(c.commsStopped)
}()
DEBUG.Println(CLI, "startCommsWorkers done")
return true
}
// stopWorkersAndComms - Cleanly shuts down worker go routines (including the comms routines) and waits until everything has stopped
// Returns nil if workers did not need to be stopped; otherwise returns a channel which will be closed when the stop is complete
// Note: This may block so run as a go routine if calling from any of the comms routines
// Note2: It should be possible to simplify this now that the new status management code is in place.
func (c *client) stopCommsWorkers() chan struct{} {
DEBUG.Println(CLI, "stopCommsWorkers called")
// It is possible that this function will be called multiple times simultaneously due to the way things get shutdown
c.connMu.Lock()
if c.conn == nil {
DEBUG.Println(CLI, "stopCommsWorkers done (not running)")
c.connMu.Unlock()
return nil
}
// It is important that everything is stopped in the correct order to avoid deadlocks. The main issue here is
// the router because it both receives incoming publish messages and also sends outgoing acknowledgements. To
// avoid issues we signal the workers to stop and close the connection (it is probably already closed but
// there is no harm in being sure). We can then wait for the workers to finnish before closing outbound comms
// channels which will allow the comms routines to exit.
// We stop all non-comms related workers first (ping, keepalive, errwatch, resume etc) so they don't get blocked waiting on comms
close(c.stop) // Signal for workers to stop
c.conn.Close() // Possible that this is already closed but no harm in closing again
c.conn = nil // Important that this is the only place that this is set to nil
c.connMu.Unlock() // As the connection is now nil we can unlock the mu (allowing subsequent calls to exit immediately)
doneChan := make(chan struct{})
go func() {
DEBUG.Println(CLI, "stopCommsWorkers waiting for workers")
c.workers.Wait()
// Stopping the workers will allow the comms routines to exit; we wait for these to complete
DEBUG.Println(CLI, "stopCommsWorkers waiting for comms")
<-c.commsStopped // wait for comms routine to stop
DEBUG.Println(CLI, "stopCommsWorkers done")
close(doneChan)
}()
return doneChan
}
// Publish will publish a message with the specified QoS and content
// to the specified topic.
// Returns a token to track delivery of the message to the broker
func (c *client) Publish(topic string, qos byte, retained bool, payload interface{}) Token {
token := newToken(packets.Publish).(*PublishToken)
DEBUG.Println(CLI, "enter Publish")
switch {
case !c.IsConnected():
token.setError(ErrNotConnected)
return token
case c.status.ConnectionStatus() == reconnecting && qos == 0:
// message written to store and will be sent when connection comes up
token.flowComplete()
return token
}
pub := packets.NewControlPacket(packets.Publish).(*packets.PublishPacket)
pub.Qos = qos
pub.TopicName = topic
pub.Retain = retained
switch p := payload.(type) {
case string:
pub.Payload = []byte(p)
case []byte:
pub.Payload = p
case bytes.Buffer:
pub.Payload = p.Bytes()
default:
token.setError(fmt.Errorf("unknown payload type"))
return token
}
if pub.Qos != 0 && pub.MessageID == 0 {
mID := c.getID(token)
if mID == 0 {
token.setError(fmt.Errorf("no message IDs available"))
return token
}
pub.MessageID = mID
token.messageID = mID
}
persistOutbound(c.persist, pub)
switch c.status.ConnectionStatus() {
case connecting:
DEBUG.Println(CLI, "storing publish message (connecting), topic:", topic)
case reconnecting:
DEBUG.Println(CLI, "storing publish message (reconnecting), topic:", topic)
case disconnecting:
DEBUG.Println(CLI, "storing publish message (disconnecting), topic:", topic)
default:
DEBUG.Println(CLI, "sending publish message, topic:", topic)
publishWaitTimeout := c.options.WriteTimeout
if publishWaitTimeout == 0 {
publishWaitTimeout = time.Second * 30
}
t := time.NewTimer(publishWaitTimeout)
defer t.Stop()
select {
case c.obound <- &PacketAndToken{p: pub, t: token}:
case <-t.C:
token.setError(errors.New("publish was broken by timeout"))
}
}
return token
}
// Subscribe starts a new subscription. Provide a MessageHandler to be executed when
// a message is published on the topic provided.
//
// If options.OrderMatters is true (the default) then callback must not block or
// call functions within this package that may block (e.g. Publish) other than in
// a new go routine.
// callback must be safe for concurrent use by multiple goroutines.
func (c *client) Subscribe(topic string, qos byte, callback MessageHandler) Token {
token := newToken(packets.Subscribe).(*SubscribeToken)
DEBUG.Println(CLI, "enter Subscribe")
if !c.IsConnected() {
token.setError(ErrNotConnected)
return token
}
if !c.IsConnectionOpen() {
switch {
case !c.options.ResumeSubs:
// if not connected and resumeSubs not set this sub will be thrown away
token.setError(fmt.Errorf("not currently connected and ResumeSubs not set"))
return token
case c.options.CleanSession && c.status.ConnectionStatus() == reconnecting:
// if reconnecting and cleanSession is true this sub will be thrown away
token.setError(fmt.Errorf("reconnecting state and cleansession is true"))
return token
}
}
sub := packets.NewControlPacket(packets.Subscribe).(*packets.SubscribePacket)
if err := validateTopicAndQos(topic, qos); err != nil {
token.setError(err)
return token
}
sub.Topics = append(sub.Topics, topic)
sub.Qoss = append(sub.Qoss, qos)
if strings.HasPrefix(topic, "$share/") {
topic = strings.Join(strings.Split(topic, "/")[2:], "/")
}
if strings.HasPrefix(topic, "$queue/") {
topic = strings.TrimPrefix(topic, "$queue/")
}
if callback != nil {
c.msgRouter.addRoute(topic, callback)
}
token.subs = append(token.subs, topic)
if sub.MessageID == 0 {
mID := c.getID(token)
if mID == 0 {
token.setError(fmt.Errorf("no message IDs available"))
return token
}
sub.MessageID = mID
token.messageID = mID
}
DEBUG.Println(CLI, sub.String())
if c.options.ResumeSubs { // Only persist if we need this to resume subs after a disconnection
persistOutbound(c.persist, sub)
}
switch c.status.ConnectionStatus() {
case connecting:
DEBUG.Println(CLI, "storing subscribe message (connecting), topic:", topic)
case reconnecting:
DEBUG.Println(CLI, "storing subscribe message (reconnecting), topic:", topic)
case disconnecting:
DEBUG.Println(CLI, "storing subscribe message (disconnecting), topic:", topic)
default:
DEBUG.Println(CLI, "sending subscribe message, topic:", topic)
subscribeWaitTimeout := c.options.WriteTimeout
if subscribeWaitTimeout == 0 {
subscribeWaitTimeout = time.Second * 30
}
select {
case c.oboundP <- &PacketAndToken{p: sub, t: token}:
case <-time.After(subscribeWaitTimeout):
token.setError(errors.New("subscribe was broken by timeout"))
}
}
DEBUG.Println(CLI, "exit Subscribe")
return token
}
// SubscribeMultiple starts a new subscription for multiple topics. Provide a MessageHandler to
// be executed when a message is published on one of the topics provided.
//
// If options.OrderMatters is true (the default) then callback must not block or
// call functions within this package that may block (e.g. Publish) other than in
// a new go routine.
// callback must be safe for concurrent use by multiple goroutines.
func (c *client) SubscribeMultiple(filters map[string]byte, callback MessageHandler) Token {
var err error
token := newToken(packets.Subscribe).(*SubscribeToken)
DEBUG.Println(CLI, "enter SubscribeMultiple")
if !c.IsConnected() {
token.setError(ErrNotConnected)
return token
}
if !c.IsConnectionOpen() {
switch {
case !c.options.ResumeSubs:
// if not connected and resumesubs not set this sub will be thrown away
token.setError(fmt.Errorf("not currently connected and ResumeSubs not set"))
return token
case c.options.CleanSession && c.status.ConnectionStatus() == reconnecting:
// if reconnecting and cleanSession is true this sub will be thrown away
token.setError(fmt.Errorf("reconnecting state and cleansession is true"))
return token
}
}
sub := packets.NewControlPacket(packets.Subscribe).(*packets.SubscribePacket)
if sub.Topics, sub.Qoss, err = validateSubscribeMap(filters); err != nil {
token.setError(err)
return token
}
if callback != nil {
for topic := range filters {
c.msgRouter.addRoute(topic, callback)
}
}
token.subs = make([]string, len(sub.Topics))
copy(token.subs, sub.Topics)
if sub.MessageID == 0 {
mID := c.getID(token)
if mID == 0 {
token.setError(fmt.Errorf("no message IDs available"))
return token
}
sub.MessageID = mID
token.messageID = mID
}
if c.options.ResumeSubs { // Only persist if we need this to resume subs after a disconnection
persistOutbound(c.persist, sub)
}
switch c.status.ConnectionStatus() {
case connecting:
DEBUG.Println(CLI, "storing subscribe message (connecting), topics:", sub.Topics)
case reconnecting:
DEBUG.Println(CLI, "storing subscribe message (reconnecting), topics:", sub.Topics)
case disconnecting:
DEBUG.Println(CLI, "storing subscribe message (disconnecting), topics:", sub.Topics)
default:
DEBUG.Println(CLI, "sending subscribe message, topics:", sub.Topics)
subscribeWaitTimeout := c.options.WriteTimeout
if subscribeWaitTimeout == 0 {
subscribeWaitTimeout = time.Second * 30
}
select {
case c.oboundP <- &PacketAndToken{p: sub, t: token}:
case <-time.After(subscribeWaitTimeout):
token.setError(errors.New("subscribe was broken by timeout"))
}
}
DEBUG.Println(CLI, "exit SubscribeMultiple")
return token
}
// reserveStoredPublishIDs reserves the ids for publish packets in the persistent store to ensure these are not duplicated
func (c *client) reserveStoredPublishIDs() {
// The resume function sets the stored id for publish packets only (some other packets
// will get new ids in net code). This means that the only keys we need to ensure are
// unique are the publish ones (and these will completed/replaced in resume() )
if !c.options.CleanSession {
storedKeys := c.persist.All()
for _, key := range storedKeys {
packet := c.persist.Get(key)
if packet == nil {
continue
}
switch packet.(type) {
case *packets.PublishPacket:
details := packet.Details()
token := &PlaceHolderToken{id: details.MessageID}
c.claimID(token, details.MessageID)
}
}
}
}
// Load all stored messages and resend them
// Call this to ensure QOS > 1,2 even after an application crash
// Note: This function will exit if c.stop is closed (this allows the shutdown to proceed avoiding a potential deadlock)
// other than that it does not return until all messages in the store have been sent (connect() does not complete its
// token before this completes)
func (c *client) resume(subscription bool, ibound chan packets.ControlPacket) {