-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpubs2018.bib
1177 lines (1168 loc) · 131 KB
/
pubs2018.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{Ali2018,
abstract = {Learning representation from audio data has shown advantages over the handcrafted features such as mel-frequency cepstral coefficients (MFCCs) in many audio applications. In most of the representation learning approaches, the connectionist systems have been used to learn and extract latent features from the fixed length data. In this paper, we propose an approach to combine the learned features and the MFCC features for speaker recognition task, which can be applied to audio scripts of different lengths. In particular, we study the use of features from different levels of deep belief network for quantizing the audio data into vectors of audio word counts. These vectors represent the audio scripts of different lengths that make them easier to train a classifier. We show in the experiment that the audio word count vectors generated from mixture of DBN features at different layers give better performance than the MFCC features. We also can achieve further improvement by combining the audio word count vector and the MFCC features.},
author = {H Ali and S N Tran and E Benetos and A S d'Avila Garcez},
doi = {10.1007/s00521-016-2501-7},
issn = {0941-0643},
issue = {6},
journal = {Neural Computing and Applications},
month = {3},
pages = {13-19},
publisher = {Springer Verlag (Germany)},
title = {Speaker recognition with hybrid features from a deep belief network},
volume = {29},
url = {http://link.springer.com/article/10.1007/s00521-016-2501-7},
year = {2018},
}
@inproceedings{Allik2018,
abstract = {© 2018 IW3C2 (International World Wide Web Conference Committee), published under Creative Commons CC BY 4.0 License. MusicLynx is a web application for music discovery that enables users to explore an artist similarity graph constructed by linking together various open public data sources. It provides a multifaceted browsing platform that strives for an alternative, graph-based representation of artist connections to the grid-like conventions of traditional recommendation systems. Bipartite graph filtering of the Linked Data cloud, content-based music information retrieval, machine learning on crowd-sourced information and Semantic Web technologies are combined to analyze existing and create new categories of music artists through which they are connected. The categories can uncover similarities between artists who otherwise may not be immediately associated: for example, they may share ethnic background or nationality, common musical style or be signed to the same record label, come from the same geographic origin, share a fate or an affliction, or have made similar lifestyle choices. They may also prefer similar musical keys, instrumentation, rhythmic attributes, or even moods their music evokes. This demonstration is primarily meant to showcase the graph-based artist discovery interface of MusicLynx: how artists are connected through various categories, how the different graph filtering methods affect the topology and geometry of linked artists graphs, and ways in which users can connect to external services for additional content and information about objects of their interest.},
author = {A Allik and F Thalmann and M Sandler},
doi = {10.1145/3184558.3186970},
isbn = {9781450356404},
journal = {The Web Conference 2018 - Companion of the World Wide Web Conference, WWW 2018},
month = {4},
pages = {167-170},
title = {MusicLynx: Exploring Music Through Artist Similarity Graphs},
year = {2018},
}
@inproceedings{ARMITAGE2018,
author = {J D K ARMITAGE and A MCPHERSON},
month = {6},
journal = {Proc. of the New Interfaces for Musical Expression (NIME)},
title = {Crafting Digital Musical Instruments: An Exploratory Workshop Study},
year = {2018},
}
@article{Baker2018,
abstract = {© 2018 Intellect Ltd Project Review. Wearable technologies are a nascent market, growing exponentially and moving into our everyday lives more and more. They are exciting in their capacity to break down barriers between artists and designers and digital technology companies. Technology is becoming more efficient, accurate, and personalized. Hardware is becoming smaller, less visible, more connected and the collected data more seamless and ubiquitous. At the core of the wearable technology concerns is the amount of data that electronics companies are allowed to collect, in particular of their users’ personal data. Numerous technology companies and start-ups are working to make the next wearable device or application for body data tracking. This article provides a first critical analysis of the selection and monitoring processes used in these Open Calls and reports on initial work of the WEAR Sustain network, which for eighteen months has operated as a Pan-European catalyst for 46 projects in wearable technology design and development to the point of market and investment readiness, and discussing next steps for its Sustainability Toolkit for lasting impact.},
author = {C Baker and H Ranaivoson and B Greinke and N Bryan-Kinns},
doi = {10.1386/vcr.8.1.91_1},
issue = {1},
journal = {Virtual Creativity},
month = {6},
pages = {91-105},
title = {Wear: Wearable technologists engage with artists for responsible innovation: Processes and progress},
volume = {8},
year = {2018},
}
@inproceedings{BEAR2018,
abstract = {We present a new extensible and divisible taxonomy for open set sound scene analysis. This new model allows complex scene analysis with tangible descriptors and perception labels. Its novel structure is a cluster graph such that each cluster (or subset) can stand alone for targeted analyses such as office sound event detection, whilst maintaining integrity over the whole graph (superset) of labels. The key design benefit is its extensibility as new labels are needed during new data capture. Furthermore, datasets which use the same taxonomy are easily augmented, saving future data collection effort. We balance the details needed for complex scene analysis with avoiding 'the taxonomy of everything' with our framework to ensure no duplicity in the superset of labels and demonstrate this with DCASE challenge classifications.},
author = {H BEAR and E BENETOS},
journal = {http://dcase.community/workshop2018/},
month = {11},
title = {An extensible cluster-graph taxonomy for open set sound scene analysis},
url = {http://dcase.community/workshop2018/},
year = {2018},
}
@inproceedings{Bechhofer2018,
author = {S Bechhofer and G Fazekas and K Page},
isbn = {9781450364959},
journal = {ACM International Conference Proceeding Series},
month = {10},
title = {Preface: ACM International Conference Proceeding Series},
year = {2018},
}
@book_section{BENETOS2018,
author = {E BENETOS and D STOWELL and M PLUMBLEY},
doi = {10.1007/978-3-319-63450-0},
edition = {1},
editor = {T Virtanen and M PLUMBLEY and D Ellis},
isbn = {978-3-319-63449-4},
issue = {8},
journal = {Computational Analysis of Sound Scenes and Events},
month = {1},
pages = {215-242},
publisher = {Springer International Publishing},
title = {Approaches to complex sound scene analysis},
url = {http://www.springer.com/gb/book/9783319634494},
year = {2018},
}
@article{Bengler2018,
author = {B Bengler and F Martin and N Bryan-Kinns},
doi = {10.1145/3183349},
issn = {1072-5520},
issue = {2},
journal = {Interactions},
month = {2},
pages = {12-13},
title = {Collidoscope},
volume = {25},
year = {2018},
}
@inproceedings{BIN2018,
abstract = {This paper presents a study examining the effects of disflu- ent design on audience perception of digital musical instru- ment (DMI) performance. Disfluency, defined as a barrier to effortless cognitive processing, has been shown to gen- erate better results in some contexts as it engages higher levels of cognition. We were motivated to determine if dis- fluent design in a DMI would result in a risk state that audiences would be able to perceive, and if this would have any effect on their evaluation of the performance. A DMI was produced that incorporated a disfluent characteristic: It would turn itself off if not constantly moved. Six phys- ically identical instruments were produced, each in one of three versions: Control (no disfluent characteristics), mild disfluency (turned itself off slowly), and heightened disflu- ency (turned itself off more quickly). 6 percussionists each performed on one instrument for a live audience (N=31), and data was collected in the form of real-time feedback (via a mobile phone app), and post-hoc surveys. Though there was little difference in ratings of enjoyment between the versions of the instrument, the real-time and qualita- tive data suggest that disfluent behaviour in a DMI may be a way for audiences to perceive and appreciate performer skill.},
author = {S M A BIN and N BRYAN-KINNS and A P MCPHERSON},
month = {6},
journal = {Proc. of the New Interfaces for Musical Expression (NIME)},
title = {Risky business: Disfluency as a design strategy},
year = {2018},
}
@inproceedings{Bromham2018,
abstract = {© 2018 KASHYAP. Dynamic range compressors (DRC) are one of the most commonly used audio effect in music production. The timing settings are particularly important for controlling the manner in which they will shape an audio signal. We present a subjective user study of DRC, where a series of different compressor attack and release setting are varied and applied to a set of 30 sec audio tracks. Participants are then asked to rate which ballistic settings are most appropriate for the style of music in their judgement and asked to select one of a series of tag words, to describe the style or setting of the song. Results show that the attack parameter influences perceived style, more than the release parameter. From the study this is seen more evidently in the case of Jazz and Rock styles than in EDM or Hip-Hop. The area of intelligent Music production systems might benefit from this study in the future as it may help to inform appropriateness for certain DRC settings in varying styles.},
author = {G Bromham and D Moffat and M Barthet and G Fazekas},
journal = {145th Audio Engineering Society International Convention, AES 2018},
month = {1},
title = {The impact of compressor ballistics on the perceived style of music},
year = {2018},
}
@inproceedings{bryan-kinns2018,
author = {N Bryan-Kinns},
doi = {10.14236/ewic/hci2018.98},
issn = {1477-9358},
journal = {Electronic Workshops in Computing (eWiC)},
month = {7},
title = {Case Study of Data Mining Mutual Engagement},
year = {2018},
}
@article{bryan-kinns2018b,
abstract = {© The Author(s) 2018. Co-creation across cultures is a fertile area for the study of design and human computer interaction. Many studies have examined what can be learnt from cultures across the world and how cultures respond to interactive technology, and yet open questions remain on how to engage people in cocreation across cultures. In this article, we reflect on a study of cross-cultural co-creation with the Kam ethnic minority group of China. We report on the kinds of collaboration and value that emerged through the co-creation of an interactive drama, and how a traditional Chinese literature composition method was used to structure the design process. We present a notation for describing cross-cultural co-creation and reflect on the careful balance that we found needed to be struck between the depth of co-creation, immersion in local culture, cultural exchange and interactivity. We report on the use of our approach to elicit serendipitous design opportunities in-situ and how our non-utilitarian approach allowed us to explore different meanings of 'interactivity' across cultures.},
author = {N Bryan-Kinns and W Wang and T Ji},
doi = {10.1093/iwc/iwy010},
issn = {0953-5438},
issue = {4},
journal = {Interacting with Computers},
month = {7},
pages = {273-292},
title = {Exploring interactivity and co-creation in rural China},
volume = {30},
year = {2018},
}
@inproceedings{bryan-kinns2018c,
author = {N Bryan-Kinns and W Wang and Y Wu},
doi = {10.14236/ewic/hci2018.214},
issn = {1477-9358},
journal = {Electronic Workshops in Computing (eWiC)},
month = {7},
title = {Thematic Analysis for Sonic Interaction Design},
year = {2018},
}
@inproceedings{BUYS2018,
abstract = {Although the physics of the bowed violin string are well understood, most audio feature extraction algorithms for violin still rely on general-purpose signal processing methods with latencies and accuracy rates that are unsuitable for real-time professional-calibre performance. Starting from a pickup which cleanly captures the motion of the bowed string with minimal colouration from the bridge and body, we present a lightweight time-domain method for modelling string motion using segmented linear regression. The algorithm leverages knowledge of the patterns of Helmholtz motion to produce a set of features which can be used for control of real-time synthesis processes. The goal of the paper is not a back-extraction of physical ground truth, but a responsive, low-latency feature space suitable for performance applications.},
author = {K BUYS and A MCPHERSON},
doi = {10.5281/zenodo.1422597},
journal = {https://zenodo.org/record/1422597},
month = {7},
title = {Real-time bowed string feature extraction for performance applications},
year = {2018},
}
@inproceedings{CHETTRI2018b,
abstract = {Playing recorded speech samples of an enrolled speaker - "replay attack" - is a simple approach to bypass an automatic speaker verification (ASV) system. The vulnerability of ASV systems to such attacks has been acknowledged and studied, but there has been no research into what spoofing detection systems are actually learning to discriminate. In this paper, we analyse the local behaviour of a replay spoofing detection system based on convolutional neural networks (CNNs) adapted from a state-of-the-art CNN (LCNN-FFT) submitted at the ASVspoof 2017 challenge. We generate temporal and spectral explanations for predictions of the model using the SLIME algorithm. Our findings suggest that in most instances of spoofing the model is using information in the first 400 milliseconds of each audio instance to make the class prediction. Knowledge of the characteristics that spoofing detection systems are exploiting can help build less vulnerable ASV systems, other spoofing detection systems, as well as better evaluation databases.},
author = {B CHETTRI and S MISHRA and B STURM and E BENETOS},
journal = {http://www.slt2018.org/},
month = {12},
pages = {92-97},
publisher = {IEEE},
title = {Analysing the predictions of a CNN-based replay spoofing detection system},
year = {2018},
}
@inproceedings{Choi2018,
abstract = {In this paper, we empirically investigate the effect of audio preprocessing on music tagging with deep neural networks. While it is important to choose the best preprocessing strategy from an engineering perspective, it usually has been out of the focus in many academic research. We perform comprehensive experiments involving audio preprocessing using different time-frequency representations, logarithmic magnitude compression, frequency weighting, and scaling. We show that many commonly used input audio preprocessing techniques are redundant except logarithmic magnitude compression.},
author = {K Choi and G Fazekas and M Sandler and K Cho},
journal = {Proc. of the 26th European Signal Processing Conference (EUSIPCO 2018), 3-7 Sept, Rome, Italy},
note = {keywords: Signal Processing, Deep Learning, MIR, Auto-tagging date-added: 2018-05-06 23:32:25 +0000 date-modified: 2018-05-29 23:32:25 +0000},
title = {A Comparison of Audio Signal Preprocessing Methods for Deep Neural Networks on Music Tagging},
year = {2018},
}
@article{Choi2018b,
abstract = {Deep neural networks (DNN) have been successfully applied to music classification including music tagging. However, there are several open questions regarding the training, evaluation, and analysis of DNNs. In this article, we investigate specific aspects of neural networks, the effects of noisy labels, to deepen our understanding of their properties. We analyse and (re-)validate a large music tagging dataset to investigate the reliability of training and evaluation. Using a trained network, we compute label vector similarities which is compared to groundtruth similarity. The results highlight several important aspects of music tagging and neural networks. We show that networks can be effective despite relatively large error rates in groundtruth datasets, while conjecturing that label noise can be the cause of varying tag-wise performance differences. Lastly, the analysis of our trained network provides valuable insight into the relationships between music tags. These results highlight the benefit of using data-driven methods to address automatic music tagging.},
author = {K Choi and G Fazekas and M Sandler and K Cho},
doi = {10.1109/TETCI.2017.2771298},
issue = {2},
journal = {IEEE Transactions on Emerging Topics in Computational Intelligence},
month = {3},
note = {date-added: 2018-06-06 23:32:25 +0000 date-modified: 2018-05-06 23:32:25 +0000 keywords: evaluation, music tagging, deep learning, CNN bdsk-url-1: https://arxiv.org/pdf/1706.02361.pdf bdsk-url-2: https://dx.doi.org/10.1109/TETCI.2017.2771298},
pages = {139-149},
publisher = {IEEE},
title = {The Effects of Noisy Labels on Deep Convolutional Neural Networks for Music Tagging},
volume = {2},
url = {http://semanticaudio.net/},
year = {2018},
}
@inproceedings{CHOURDAKIS2018,
abstract = {A radio play is a form of drama which exists in the acoustic domain and is usually consumed over broadcast radio.
In this paper a method is proposed that, given a story in the
form of unstructured text, produces a radio play that tells
this story. First, information about characters, acting lines,
and environments is retrieved from the text. The information extracted serves to generate a production script which
can be used either by producers of radiodrama, or subsequently used to automatically generate the radio play as an
audio file. The system is evaluated in two parts: precision,
recall, and f1 scores are computed for the information retrieval part while multistimulus listening tests are used for
subjective evaluation of the generated audio.},
author = {E T CHOURDAKIS and JOSHUAD REISS},
month = {7},
title = {From my pen to your ears: automatic production of radio plays from unstructured story text},
url = {https://scholar.google.co.uk/citations?hl=en&user=Hf0rcRcAAAAJ},
year = {2018},
}
@article{Dixon2018,
author = {S Dixon and E Gómez and A Volk},
doi = {10.5334/tismir.22},
issn = {2514-3298},
issue = {1},
journal = {Transactions of the International Society for Music Information Retrieval},
month = {1},
pages = {1-3},
publisher = {Ubiquity Press},
title = {Editorial: Introducing the Transactions of the International Society for Music Information Retrieval},
volume = {1},
year = {2018},
}
@inproceedings{Droog2018,
abstract = {© Proceedings of AISB Annual Convention 2018. All rights reserved. Automatic summarization is dominated by approaches which focus on the selection and concatenation of material in a text. What can be achieved by such approaches is intrinsically limited and far below what can be achieved by human summarizers. There is evidence that successfully creating a rich representation of text, including details of its narrative structure, would help to create more human-like summaries. This paper describes a part of our ongoing work on a cognitively inspired, creative approach to summarization. Here we detail our work on the detection of narrative structure in order to help build rich interpretations of a text and help give rise to a creative approach to summarization. In particular we consider the domain of Russian folktales. Using Vladimir Propp’s thorough description of the interrelations between the narrative elements of such tales, we pose this task as a constraint satisfaction problem. While we only consider this small domain, our approach can be applied to any domain of text on which enough constraints can be placed.},
author = {M Droog-Hayes and G Wiggins and M Purver},
journal = {Proceedings of AISB Annual Convention 2018},
month = {1},
title = {Automatic detection of narrative structure for high-level story representation},
year = {2018},
}
@article{Duffy2018,
abstract = {© 2018, Hacettepe University. All rights reserved. Whilst the focus of attention in an instrumental music lesson is refinement of the student’s musical performance, conversation plays an essential role; not just as a way to analyse the student’s musical contributions, but to organise them within the lesson flow. Participants may respond to talk through performance and vice versa, or even spend periods of time exchanging purely musical contributions. The short musical fragments exchanged by the participants are managed within lesson dialogue in ways analogous to conversational turn-taking. Problems in the student’s performance are refined through both student self-initiated and tutor other-initiated repair, initiated by embodied action and play. A fundamental part of turn-taking is managing the transition to a new speaker. The presence of musical contributions allows for additional types of transition, for example from a turn at talk, to a musical contribution. In conversation, there is generally a preference for a short pause at the transition to a new speaker, and overlap tends to be minimised when it occurs. Through detailed qualitative video analysis of a one-to-one clarinet lesson, we find differences in the preferences regarding overlap when purely musical contributions are being exchanged, and that the duration of overlap during these exchanges of fragments of music are significant.},
author = {S Duffy and P G T Healey},
doi = {10.16986/HUJE.2018038809},
issn = {1300-5340},
issue = {Special Issue},
journal = {Hacettepe Egitim Dergisi},
month = {1},
pages = {316-333},
title = {Refining musical performance through overlap},
volume = {33},
year = {2018},
}
@article{Duffy2018b,
abstract = {Clapping Music is a minimalist work by Steve Reich based on twelve phased variations of a rhythmic pattern. It has been reimagined as a game-based mobile application, designed with a dual purpose. First, to introduce new audiences to the Minimalist genre through interaction with the piece presented as an engaging game. Second, to use large-scale data collection within the app to address research questions about the factors determining rhythm production performance. The twelve patterns can be differentiated using existing theories of rhythmic complexity. Using performance indicators from the game such as tap accuracy we can determine which patterns players found most challenging and so assess hypotheses from theoretical models with empirical evidence. The app has been downloaded over 140,000 times since the launch in July 2015, and over 46 million rows of gameplay data have been collected, requiring a big data approach to analysis. The results shed light on the rhythmic factors contributing to performance difficulty and show that the effect of making a transition from one pattern to the next is as significant, in terms of pattern difficulty, as the inherent complexity of the pattern itself. Challenges that arose in applying this novel approach are discussed.},
author = {S Duffy and M Pearce},
doi = {10.1371/journal.pone.0205847},
issue = {10},
journal = {PLoS One},
month = {10},
pages = {e0205847–e0205847},
title = {What makes rhythms hard to perform? An investigation using Steve Reich's Clapping Music.},
volume = {13},
url = {https://www.ncbi.nlm.nih.gov/pubmed/30335798},
year = {2018},
}
@inproceedings{Fano2018,
abstract = {Kernel Additive Modelling (KAM) is a framework for source separation aiming to explicitly model inherent properties of sound sources to help with their identification and separation. KAM separates a given source by applying robust statistics on the selection of time-frequency bins obtained through a source-specific kernel, typically the k-NN function. Even though the parameter k appears to be key for a successful separation, little discussion on its influence or optimisation can be found in the literature. Here we propose a novel method, based on graph theory statistics, to automatically optimise k in a vocal separation task. We introduce the k-NN hubness as an indicator to find a tailored k at a low computational cost. Subsequently, we evaluate our method in comparison to the common approach to choose k. We further discuss the influence and importance of this parameter with illuminating results.},
author = {D Fano Yela and D Stowell and M Sandler},
doi = {10.1007/978-3-319-93764-9_27},
isbn = {9783319937632},
issn = {0302-9743},
journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
month = {6},
pages = {280-289},
title = {Does K matter? k-NN hubness analysis for kernel additive modelling vocal separation},
volume = {10891 LNCS},
year = {2018},
}
@inproceedings{Flynn2018,
abstract = {© 2018 Audio Engineering Society. All Rights Reserved. Current closed-form IIR methods for approximating an analogue prototype filter in the discrete-domain do not match frequency response phase. The frequency sampling method can match phase, but requires extremely long filter lengths (and corresponding latency) to perform well at low frequencies. We propose a method for discretising an analogue prototype that does not succumb to these issues. Contrary to the IIR methods, it accurately approximates the phase, as well as the magnitude response. The proposed method exhibits good low frequency resolution using much smaller filter lengths than design by frequency sampling.},
author = {J Flynn and J D Reiss},
journal = {144th Audio Engineering Society Convention 2018},
month = {1},
title = {Improving the frequency response magnitude and phase of analogue-matched digital filters},
year = {2018},
}
@inproceedings{Freeman2018,
abstract = {© 2018 ISAST. How can we describe data when used as an art material? As the number of artists using data in their work increases, so too must our ability to describe the material in a way that is understood by both specialist and general audiences alike. Based on a review of existing vocabularies, glossaries and taxonomies of data, we propose our own concise taxonomy. To conclude, we propose the adoption of this concise taxonomy by artists, critics and curators, and suggest that ongoing refinement of the taxonomy takes place through crowdsourced knowledge sharing on the Web.},
author = {J Freeman and G Wiggins and G Starks and M Sandler},
doi = {10.1162/LEON_a_01414},
issn = {0024-094X},
issue = {1},
journal = {Leonardo},
month = {2},
pages = {75-79},
title = {A concise taxonomy for describing data as an art material},
volume = {51},
year = {2018},
}
@inproceedings{Frieler2018,
abstract = {© Klaus Frieler, Frank Höger, Martin Pfleiderer, Simon Dixon. This paper presents two novel user interfaces for investigating the pattern content in monophonic jazz solos and exemplifies how these interfaces could be used for research on jazz improvisation. In jazz improvisation, patterns are of particular interest for the analysis of improvisation styles, the oral transmission of musical language, the practice of improvisation, and the psychology of creative processes. The ongoing project “Dig That Lick” is devoted to addressing these questions with the help of a large database of jazz solo transcriptions generated by automated melody extraction algorithms. To expose these transcriptions to jazz researchers, two prototypes of user interfaces were designed that work currently with the 456 manually transcribed jazz solos of the Weimar Jazz Database. The first one is a Shiny application that allows exploring a set of 653 of the most common patterns by eminent players. The second one is a web interface for a general two-staged pattern search in the Weimar Jazz Database featuring regular expressions. These applications aim on the one hand at an expert audience of jazz researchers to facilitate generating and testing hypotheses about patterns in jazz improvisation, and on the other hand at a wider audience of jazz teachers, students, and fans.},
author = {K Frieler and F Höger and M Pfleiderer and S Dixon},
isbn = {9782954035123},
journal = {Proceedings of the 19th International Society for Music Information Retrieval Conference, ISMIR 2018},
month = {1},
pages = {777-783},
title = {Two web applications for exploring melodic patterns in jazz solos},
year = {2018},
}
@inproceedings{Galindo2018,
abstract = {© 2018 Association for Computing Machinery. This paper describes an interactive scenography designed to enhance the use of embodied imagination in a stroke survivors' performance workshop called The Green Screening workshop, conceived by the company Split Britches.We explore performance techniques combined with live motion capture to provide participants with an abstract visual world that helps them to enact fantasy scenes they create in front of an audience. A simple interface provides real-time visualisations of participant's body movements in three different scenarios and promotes engagement with the co-present audience. The system was evaluated in two workshops with stroke survivors. The results indicate that the system is effective in encouraging participants' creative use of embodied improvisation.},
author = {R P Galindo Esparza and P G T Healey and L Weaver and M Delbridge},
doi = {10.1145/3212721.3212845},
isbn = {9781450365048},
journal = {ACM International Conference Proceeding Series},
month = {6},
title = {Augmented embodiment: Developing interactive technology for stroke survivors short paper},
year = {2018},
}
@article{GODDARD2018,
abstract = {Computationally creative systems require semantic information when reflecting or self reasoning on their output. In this paper, we outline the design of a computationally creative musical performance system aimed at producing virtuosic interpretations of musical pieces and provide an overview of its implementation. The case-based reasoning part of the system relies on a measure of musical similarity based on the FANTASTIC and SynPy toolkits which provide melodic and syncopated rhythmic features, respectively. We conducted a listening test based on pair-wise comparison to assess to what extent the machine-based similarity models match human perception. We found the machine-based models to differ significantly to human responses due to differences in participants' responses. The best performing model relied on features from the FANTASTIC toolkit obtaining a rank match rate with human response of 63%, whilst features from the SynPy toolkit only obtained a ranking match rate of 46%. Whilst more work is needed on a stronger model of similarity, we do not believe these results prevent FANTASTIC features being used as a measure of similarity within creative systems.},
author = {C GODDARD and M BARTHET and G WIGGINS},
doi = {10.17743/jaes.2018.0012},
issn = {1549-4950},
journal = {Journal of the Audio Engineering Society},
month = {4},
publisher = {Audio Engineering Society},
title = {Assessing Musical Similarity for Computational Music Creativity},
year = {2018},
}
@inproceedings{Goodman2018,
abstract = {© 2018 IEEE. As wearable technologies and etextiles mature they are being increasingly used in couture and high street fashion. However, much of the innovation in this space has been driven by technological and commercial imperatives. As the wearables and etextile markets mature it is time to consider this technological landscape in the bigger picture of a sustainable human-centred world. This paper reports on initial findings from 48 projects supported through the EU funded WEAR Sustain network to examine sustainable and ethical approaches to wearable technology design. Case studies of collaborations between artists and technologists in designing and realising sustainable and ethical wearable technologies are presented, and an initial set of themes emerging from detailed analysis of all the project updates are outlined highlighting the importance of cross-disciplinary hubs, mentors, and networks.},
author = {L Goodman and N Bryan-Kinns and Y Wu and S Liu and C Baker},
doi = {10.1109/GEM.2018.8516276},
isbn = {9781538663042},
journal = {2018 IEEE Games, Entertainment, Media Conference, GEM 2018},
month = {10},
pages = {1-3},
title = {WEAR Sustain Network: Ethical and Sustainable Technology Innovation in Wearables and Etextiles},
year = {2018},
}
@article{Healey2018,
abstract = {Miscommunication is a neglected issue in the cognitive sciences, where it has often been discounted as noise in the system. This special issue argues for the opposite view: Miscommunication is a highly structured and ubiquitous feature of human interaction that systematically underpins people's ability to create and maintain shared languages. Contributions from conversation analysis, computational linguistics, experimental psychology, and formal semantics provide evidence for these claims. They highlight the multi-modal, multi-person character of miscommunication. They demonstrate the incremental, contingent, and locally adaptive nature of the processes people use to detect and deal with miscommunication. They show how these processes can drive language change. In doing so, these contributions introduce an alternative perspective on what successful communication is, new methods for studying it, and application areas where these ideas have a particular impact. We conclude that miscommunication is not noise but essential to the productive flexibility of human communication, especially our ability to respond constructively to new people and new situations.},
author = {P G T Healey and J P de Ruiter and G J Mills},
doi = {10.1111/tops.12340},
issue = {2},
journal = {Top Cogn Sci},
month = {5},
pages = {264-278},
title = {Editors' Introduction: Miscommunication.},
volume = {10},
url = {https://www.ncbi.nlm.nih.gov/pubmed/29749040},
year = {2018},
}
@generic{HEALEY2018b,
author = {P G T HEALEY and M R J PURVER},
month = {11},
title = {Self-Repetition in Dialogue and Monologue},
url = {http://www.eecs.qmul.ac.uk/~mpurver/papers/healey-purver18semdial.pdf},
year = {2018},
}
@article{Healey2018c,
abstract = {People give feedback in conversation: both positive signals of understanding, such as nods, and negative signals of misunderstanding, such as frowns. How do signals of understanding and misunderstanding affect the coordination of language use in conversation? Using a chat tool and a maze-based reference task, we test two experimental manipulations that selectively interfere with feedback in live conversation: (a) "Attenuation" that replaces positive signals of understanding such as "right" or "okay" with weaker, more provisional signals such as "errr" or "umm" and (2) "Amplification" that replaces relatively specific signals of misunderstanding from clarification requests such as "on the left?" with generic signals of trouble such as "huh?" or "eh?". The results show that Amplification promotes rapid convergence on more systematic, abstract ways of describing maze locations while Attenuation has no significant effect. We interpret this as evidence that "running repairs"-the processes of dealing with misunderstandings on the fly-are key drivers of semantic coordination in dialogue. This suggests a new direction for experimental work on conversation and a productive way to connect the empirical accounts of Conversation Analysis with the representational and processing concerns of Formal Semantics and Psycholinguistics.},
author = {P G T Healey and G J Mills and A Eshghi and C Howes},
doi = {10.1111/tops.12336},
issue = {2},
journal = {Top Cogn Sci},
month = {4},
pages = {367-388},
title = {Running Repairs: Coordinating Meaning in Dialogue.},
volume = {10},
url = {https://www.ncbi.nlm.nih.gov/pubmed/29687611},
year = {2018},
}
@inproceedings{Heitlinger2018,
abstract = {© 2018 ACM. We present a case study of a participatory design project in the space of sustainable smart cities and Internet of Things. We describe our design process that led to the development of an interactive seed library that tells the stories of culturally diverse urban food growers, and networked environmental sensors from their gardens, as a way to support more sustainable food practices in the city. This paper contributes to an emerging body of empirical work within participatory design that seeks to involve citizens in the design of smart cities and Internet of Things, particularly in the context of marginalised and culturally diverse urban communities. It also contributes empirical work towards non-utilitarian approaches to sustainable smart cities through a discussion of designing for urban diversity and slowness.},
author = {S Heitlinger and N Bryan-Kinns and R Comber},
doi = {10.1145/3210604.3210620},
isbn = {9781450364645},
journal = {ACM International Conference Proceeding Series},
month = {9},
title = {Connected seeds and sensors: Co-designing internet of things for sustainable smart cities with urban food-growing communities},
volume = {2},
year = {2018},
}
@article{Hu2018,
abstract = {© 2018, Springer Nature B.V. Design thinking holds the key to innovation processes, but is often difficult to detect because of its implicit nature. We undertook a study of novice designers engaged in team-based design exercises in order to explore the correlation between design thinking and designers’ physical (observable) behavior and to identify new, objective, design thinking identification methods. Our study addresses the topic by using data collection method of “think aloud” and data analysis method of “protocol analysis” along with the unconstrained concept generation environment. Collected data from the participants without service design experience were analyzed by open and selective coding. Through the research, we found correlations between physical activity and divergent thinking, and also identified physical behaviors that predict a designer’s transition to divergent thinking. We conclude that there are significant relations between designers’ design thinking and the behavioral features of their body and face. This approach opens possible new ways to undertake design process research and also design capability evaluation.},
author = {Y Hu and X Du and N Bryan-Kinns and Y Guo},
doi = {10.1007/s10798-018-9479-7},
issn = {0957-7572},
journal = {International Journal of Technology and Design Education},
month = {10},
title = {Identifying divergent design thinking through the observable behavior of service design novices},
year = {2018},
}
@article{JACK2018,
abstract = {Asynchrony between tactile and auditory feedback (action-sound latency) when playing a musical instrument is widely recognised as disruptive to musical performance. In this paper we present a study that assesses the effects of delayed auditory feedback on the timing accuracy and judgements of instrument quality for two groups of participants: professional percussionists and non-percussionist amateur musicians. The amounts of delay tested in this study are relatively small in comparison to similar studies of auditory delays in a musical context (0ms, 10ms, 10ms±3ms, 20ms). We found that both groups rated the zero latency condition as higher quality for a series of quality measures in comparison to 10ms±3ms and 20ms latency, but did not show a significant difference in rating between 10ms latency and zero latency. Professional percussionists were more aware of the latency conditions and showed less variation of timing under the latency conditions, although this ability decreased as the temporal demands of the task increased. We compare our findings from each group and discuss them in relation to latency in interactive digital systems more generally and experimentally similar work on sensorimotor control and rhythmic performance.},
author = {R H JACK and A MEHRABI and T Stockman and A MCPHERSON},
doi = {10.1525/mp.2018.36.1.109},
editor = {K Stevens and K Hutchings},
issn = {0730-7829},
journal = {Music Perception},
month = {8},
publisher = {University of California Press},
title = {Action-sound Latency and the Perceived Quality of Digital Musical Instruments: Comparing Professional Percussionists and Amateur Musicians},
year = {2018},
}
@inproceedings{Jillings2018,
abstract = {© 2018 KASHYAP. Subjective experiments are a cornerstone of modern research, with a variety of tasks being undertaken by subjects. In the field of audio, subjective listening tests provide validation for research and aid fair comparison between techniques or devices such as coding performance, speakers, mixes and source separation systems. Several interfaces have been designed to mitigate biases and to standardise procedures, enabling indirect comparisons. The number of different combinations of interface and test design make it extremely difficult to conduct a truly unbiased listening test. This paper resolves the largest of these variables by identifying the impact the interface itself has on a purely auditory test. This information is used to make recommendations for specific categories of listening tests.},
author = {N Jillings and B De Man and R Stables and J D Reiss},
journal = {145th Audio Engineering Society International Convention, AES 2018},
month = {1},
title = {Investigation into the effects of subjective test interface choice on the validity of results},
year = {2018},
}
@book_section{KUDUMAKIS2018b,
author = {P KUDUMAKIS and J Corral García and I Barbancho and L J. Tardón and M SANDLER},
doi = {10.1007/978-3-662-55004-5_45},
editor = {R Bader},
issue = {45},
journal = {Springer Handbook of Systematic Musicology},
month = {1},
pages = {911-921},
publisher = {Springer, Berlin, Heidelberg},
title = {Enabling Interactive and Interoperable Semantic Music Applications},
year = {2018},
}
@book_section{Lavia2018,
abstract = {© 2018 by IGI Global. All rights reserved. More accurate non-participatory parameters and psychoacoustics to assess human perceptual responses to the acoustic environment are critical to inform effective urban sound planning and applied soundscape practice. Non-participatory observation methods are widely used by experts to capture animal behavior. In 2012, Lavia and Witchel applied these principles and methodologies for the first time to capturing and assessing human behavior "in the wild" to changes to the acoustic environment using added sound and music interventions in a clubbing district. Subsequent work was conducted with Aletta and Kang and Healey, Howes, Steffens, and Fiebig to begin characterizing the acoustic environment and human responses to align the perceptual and physical findings. Here, the authors report on new work and analysis and propose a preliminary predictive agile applied soundscape framework using non-participatory observation methods and psychoacoustics to be used with environmental assessment practice and evolving urban soundscape planning methods by researchers, practitioners, and policy makers.},
author = {L Lavia and H J Witchel and F Aletta and J Steffens and A Fiebig and J Kang and C Howes and P G T Healey},
doi = {10.4018/978-1-5225-3637-6.ch004},
isbn = {152253637X},
journal = {Handbook of Research on Perception-Driven Approaches to Urban Assessment and Design},
month = {1},
pages = {73-98},
title = {Non-participant observation methods for soundscape design and urban panning},
year = {2018},
}
@inproceedings{Li2018,
abstract = {© 2018 Technical Committee on Control Theory, Chinese Association of Automation. Analysing expressive timing in performed music can help machine to perform various perceptual tasks such as identifying performers and understand music structures in classical music. A hierarchical structure is commonly used for expressive timing analysis. This paper provides a statistical demonstration to support the use of hierarchical structure in expressive timing analysis by presenting two groups of model selection tests. The first model selection test uses expressive timing to determine the location of music structure boundaries. The second model selection test is matching a piece of performance with the same performer playing another given piece. Comparing the results of model selection tests, the preferred hierarchical structures in these two model selection tests are not the same. While determining music structure boundaries demands a hierarchical structure with more levels in the expressive timing analysis, a hierarchical structure with less levels helps identifying the dedicated performer in most cases.},
author = {S Li and S Dixon and M D Plumbley},
doi = {10.23919/ChiCC.2018.8483169},
isbn = {9789881563941},
issn = {1934-1768},
journal = {Chinese Control Conference, CCC},
month = {10},
pages = {3190-3195},
title = {A Demonstration of Hierarchical Structure Usage in Expressive Timing Analysis by Model Selection Tests},
volume = {2018-July},
year = {2018},
}
@article{Liang2018,
abstract = {This paper presents a study of piano pedalling gestures and techniques on the sustain pedal from the perspective of measurement, recognition and visualisation. Pedalling gestures can be captured by a dedicated measurement system where the sensor data can be simultaneously recorded alongside the piano sound under normal playing conditions. Using the sensor data collected from the system, the recognition is comprised of two separate tasks: pedal onset/offset detection and classification by technique. The onset and offset times of each pedalling technique were computed using signal processing algorithms. Based on features extracted from every segment when the pedal is pressed, the task of classifying the segments by pedalling technique was undertaken using machine learning methods. We compared Support Vector Machines (SVM) and hidden Markov models (HMM) for this task. Our system achieves high accuracies, over 0.7 F1 score for all techniques and over 0.9 on average. The recognition results can be represented using novel pedalling notations and visualised in an audio-based score following application.},
author = {B Liang and G Fazekas and M Sandler},
doi = {10.17743/jaes.2018.0035},
issue = {47},
journal = {JAES Special Issue on Participatory Sound And Music Interaction Using Semantic Audio},
month = {6},
note = {date-added: 2018-06-06 23:32:25 +0000 date-modified: 2018-05-06 23:32:25 +0000 keywords: sensor system, piano pedalling, measurement, machine learning, gesture recognition, piano transcription},
pages = {xxxx–xxxx},
title = {Measurement, Recognition and Visualisation of Piano Pedalling Gestures and Techniques},
volume = {2},
year = {2018},
}
@inproceedings{Liang2018b,
abstract = {In this paper, the problem of legato pedalling technique detection in polyphonic piano music is addressed. We propose a novel detection method exploiting the effect of sympathetic resonance which can be enhanced by a legato-pedal onset. To measure the effect, specific piano transcription was performed using the templates of pre-recorded isolated notes, from which partial frequencies were estimated. This promotes the acquisition of residual components associated to the weak co-excitation of damped notes due to the legato pedalling technique. Features that represent the sympathetic resonance measure were extracted from residuals. We finally used a logistic regression classifier to distinguish the existence of legato-pedal onsets.},
author = {B Liang and G Fazekas and M Sandler},
doi = {10.23919/EUSIPCO.2018.8553341},
journal = {Proceedings of the 26th European Signal Processing Conference (EUSIPCO 2018)},
month = {9},
pages = {2484-2488},
publisher = {IEEE},
title = {Piano Legato-Pedal Onset Detection Based on a Sympathetic Resonance Measure},
year = {2018},
}
@inproceedings{Marengo2018,
abstract = {The digitization of art collections is a great opportunity to engage audiences beyond the context of the museum visit. Interfaces to access collections have been initially tailored for professional search tasks: the new challenge is how to design systems for open, casual, and leisure-based explorations. In a human-centered framework, the users’ perspective is a fundamental step to design and improve creative solutions. How can we listen to and understand the potential users, in order to design meaningful experiences? How can we collect insights, and what do these tell us about the users and the systems? We explore the use of inquiry techniques as a method to surface the curiosities people have for paintings. During two iterations, visitors of public events wrote questions they had about selected paintings. 138 Post-its were collected and thematically analyzed. Results highlight that curiosities are contextualized, and that artworks are interpreted mainly as scenes. People are interested in meanings and symbols; they also displayed the use of fantasy and empathy. Additionally, we evaluated the effect of age, previous knowledge of the painting, and frequency of visiting museums on the questions’ content through statistical analysis. While no strong finding emerged, we noticed that adults and kids likewise display an active role in the inquiry process, and that a previous knowledge of the painting is connected to more descriptive and atomic curiosities. In the discussion, we suggest design opportunities might lay in the interactive discovery of information, in storytelling-based descriptions, and in emotional connection. Our findings suggest that in leisure-based explorations atomic information might not be satisfying, and that descriptions should be contextualized to the painting. Our presentation will be an opportunity to discuss the value of the method, and to comment on how the insights could be embedded into the design of leisure-based experiences.},
author = {L Marengo and G Fazekas and A Tombros},
journal = {Proc. International Conference on Museums and the Web 2018, April 18-21, Vancouver, Canada.},
note = {date-added: 2018-05-01 00:11:04 +0000 date-modified: 2018-05-01 00:16:25 +0000 keywords: visual art, information design, inquiry techniques, user requirements, online collections, interaction design bdsk-url-1: http://mw18.mwconf.org/paper/i-wonder-inquiry-techniques-as-a-method-to-gain-insights-into-peoples-encounters-with-visual-art},
title = {I Wonder... Inquiry Techniques As A Method To Gain Insights Into People’s Encounters With Visual Art},
url = {http://mw18.mwconf.org/paper/i-wonder-inquiry-techniques-as-a-method-to-gain-insights-into-peoples-encounters-with-visual-art},
year = {2018},
}
@inproceedings{martinez2018,
abstract = {This work aims to implement a novel deep learning architecture to perform audio processing in the context of matched equalization. Most existing methods for automatic and matched equalization show effective performance and their goal is to find a respective transfer function given a frequency response. Neverthe-less, these procedures require a prior knowledge of the type offilters to be modeled. In addition, fixed filter bank architecturesare required in automatic mixing contexts. Based on end-to-endconvolutional neural networks, we introduce a general purpose ar-chitecture for equalization matching. Thus, by using an end-to-end learning approach, the model approximates the equalizationtarget as a content-based transformation without directly findingthe transfer function. The network learns how to process the au-dio directly in order to match the equalized target audio. We trainthe network through unsupervised and supervised learning proce-dures. We analyze what the model is actually learning and howthe given task is accomplished. We show the model performing matched equalization forshelving, peaking, lowpass and highpass IIR and FIR equalizers.},
author = {M Martinez Ramirez and J Reiss},
month = {9},
title = {End-to-end equalization with convolutional neural networks},
url = {http://www.m-marco.com/},
year = {2018},
}
@inproceedings{McArthur2018,
abstract = {© Audio Engineering Society. All rights reserved. This study examines auditory distance discrimination in cinematic virtual reality. It uses controlled stimuli with audio-visual distance variations, to determine if mismatch stimuli are detected. It asks if visual conditions - either equally or unequally distanced from the user, and environmental conditions - either a reverberant space as opposed to a freer field, impact accuracy in discrimination between congruent and incongruent aural and visual cues. A Repertory Grid Technique-derived design is used, whereby participant-specific constructs are translated into numerical ratings. Discrimination of auditory event mismatch was improved for stimuli with varied visual-event distances, though not for equidistant visual events. This may demonstrate that visual cues alert users to matches and mismatches.},
author = {A McArthur and M Sandler and R Stewart},
isbn = {9781510870390},
journal = {Proceedings of the AES International Conference},
month = {1},
pages = {24-33},
title = {Perception of mismatched auditory distance - Cinematic VR},
volume = {2018-August},
year = {2018},
}
@article{McCabe2018,
abstract = {Copyright © 2018 Cognitive Science Society, Inc. The effectiveness of medical treatment depends on the quality of the patient–clinician relationship. It has been proposed that this depends on the extent to which the patient and clinician build a shared understanding of illness and treatment. Here, we use the tools of conversation analysis (CA) to explore this idea in the context of psychiatric consultations. The CA “repair” framework provides an analysis of the processes people use to deal with problems in speaking, hearing, and understanding. These problems are especially critical in the treatment of psychosis where patients and health care professionals need to communicate about the disputed meaning of hallucinations and delusion. Patients do not feel understood, they are frequently non-adherent with treatment, and many have poor outcomes. We present an overview of two studies focusing on the role of repair as a mechanism for producing and clarifying meaning in psychiatrist–patient communication and its association with treatment outcomes. The first study shows patient clarification or repair of psychiatrists’ talk is associated with better patient adherence to treatment. The second study shows that training which emphasizes the importance of building an understanding of patients’ psychotic experiences increases psychiatrists’ self-repair. We propose that psychiatrists are working harder to make their talk understandable and acceptable to the patient by taking the patient's perspective into account. We conclude that these findings provide evidence that repair is an important mechanism for building shared understanding in doctor–patient communication and contributes to better therapeutic relationships and treatment adherence. The conversation analytic account of repair is currently the most sophisticated empirical model for analyzing how people construct shared meaning and understanding. Repair appears to reflect greater commitment to and engagement in communication and improve both the quality and outcomes of communication. Reducing potential miscommunication between psychiatrists and their patients with psychosis is a low-cost means of enhancing treatment from both the psychiatrist and patient perspective. Given that misunderstanding and miscommunication are particularly problematic in psychosis, this is critical for improving the longer term outcomes of treatment for these patients who often have poor relationships with psychiatrists and health care services more widely.},
author = {R McCabe and P G T Healey},
doi = {10.1111/tops.12337},
issn = {1756-8757},
issue = {2},
journal = {Topics in Cognitive Science},
month = {4},
pages = {409-424},
title = {Miscommunication in Doctor–Patient Communication},
volume = {10},
year = {2018},
}
@inproceedings{Mehrabi2018,
abstract = {© 2018 IEEE. The expressive nature of the voice provides a powerful medium for communicating sonic ideas, motivating recent research on methods for query by vocalisation. Meanwhile, deep learning methods have demonstrated state-of-the-art results for matching vocal imitations to imitated sounds, yet little is known about how well learned features represent the perceptual similarity between vocalisations and queried sounds. In this paper, we address this question using similarity ratings between vocal imitations and imitated drum sounds. We use a linear mixed effect regression model to show how features learned by convolutional auto-encoders (CAEs) perform as predictors for perceptual similarity between sounds. Our experiments show that CAEs outperform three baseline feature sets (spectrogram-based representations, MFCCs, and temporal features) at predicting the subjective similarity ratings. We also investigate how the size and shape of the encoded layer effects the predictive power of the learned features. The results show that preservation of temporal information is more important than spectral resolution for this application.},
author = {A Mehrabi and K Choi and S Dixon and M Sandler},
doi = {10.1109/ICASSP.2018.8461566},
isbn = {9781538646588},
issn = {1520-6149},
journal = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
month = {9},
pages = {356-360},
title = {Similarity Measures for Vocal-Based Drum Sample Retrieval Using Deep Convolutional Auto-Encoders},
volume = {2018-April},
year = {2018},
}
@inproceedings{Men2018,
author = {L Men and N Bryan-Kinns},
doi = {10.1109/SIVE.2018.8577094},
isbn = {9781538657133},
journal = {2018 IEEE 4th VR Workshop on Sonic Interactions for Virtual Environments, SIVE 2018},
month = {12},
title = {LeMo: Supporting Collaborative Music Making in Virtual Reality},
year = {2018},
}
@article{Mesaros2018,
abstract = {Public evaluation campaigns and datasets promote active development in target research areas, allowing direct comparison of algorithms. The second edition of the challenge on detection and classification of acoustic scenes and events (DCASE 2016) has offered such an opportunity for development of the state-of-the-art methods, and succeeded in drawing together a large number of participants from academic and industrial backgrounds. In this paper, we report on the tasks and outcomes of the DCASE 2016 challenge. The challenge comprised four tasks: acoustic scene classification, sound event detection in synthetic audio, sound event detection in real-life audio, and domestic audio tagging. We present each task in detail and analyze the submitted systems in terms of design and performance. We observe the emergence of deep learning as the most popular classification method, replacing the traditional approaches based on Gaussian mixture models and support vector machines. By contrast, feature representations have not changed substantially throughout the years, as mel frequency-based representations predominate in all tasks. The datasets created for and used in DCASE 2016 are publicly available and are a valuable resource for further research.},
author = {A Mesaros and T Heittola and E Benetos and P Foster and M Lagrange and T Virtanen and M Plumbley},
doi = {10.1109/TASLP.2017.2778423},
issn = {2329-9304},
issue = {2},
journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
month = {2},
pages = {379-393},
publisher = {Institute of Electrical and Electronics Engineers},
title = {Detection and Classification of Acoustic Scenes and Events: Outcome of the DCASE 2016 Challenge},
volume = {26},
url = {http://ieeexplore.ieee.org/document/8123864/},
year = {2018},
}
@book_section{Milo2018,
abstract = {© 2018 by IGI Global. All rights reserved. This chapter presents an overview of 3 graphical tools supporting soundscape assessment in different settings, indoors and outdoors. These research prototypes support the spatial organization of the perceptual information available to the participants and are designed based on surveying techniques used in architectural training to create a foundation for acoustic design education in architecture schools. This chapter reports the contexts of the focus groups investigations, presenting advantages and drawbacks related to their use. It has been found that participants often added explanatory verbal data and arrows to the provided diagrams. The diagrams and their use have been interpreted with the support of the qualitative data captured along the studies through thematic analysis. Finally, paper prototypes are useful for educational approaches, but future more comprehensive studies will require integrating these tools in existing or yet-to-be-designed systematic frameworks for soundscape analysis and design.},
author = {A Milo and N Bryan-Kinns and J D Reiss},
doi = {10.4018/978-1-5225-3637-6.ch017},
isbn = {152253637X},
journal = {Handbook of Research on Perception-Driven Approaches to Urban Assessment and Design},
month = {1},
pages = {397-433},
title = {Graphical research tools for acoustic design training: Capturing perception in architectural settings},
year = {2018},
}
@inproceedings{Mishra2018,
abstract = {© Saumitra Mishra, Bob L. Sturm, Simon Dixon. Methods for interpreting machine learning models can help one understand their global and/or local behaviours, and thereby improve them. In this work, we apply a global analysis method to a machine listening model, which essentially inverts the features generated in a model back into an interpretable form like a sonogram. We demonstrate this method for a state-of-the-art singing voice detection model. We train up-convolutional neural networks to invert the feature generated at each layer of the model. The results suggest that the deepest fully connected layer of the model does not preserve temporal and harmonic structures, but that the inverted features from the deepest convolutional layer do. Moreover, a qualitative analysis of a large number of inputs suggests that the deepest layer in the model learns a decision function as the information it preserves depends on the class label associated with an input.},
author = {S Mishra and B L Sturm and S Dixon},
isbn = {9782954035123},
journal = {Proceedings of the 19th International Society for Music Information Retrieval Conference, ISMIR 2018},
month = {1},
pages = {755-762},
title = {Understanding a deep machine listening model through feature inversion},
year = {2018},
}
@inproceedings{Mishra2018b,
abstract = {© EURASIP 2018. Researchers have proposed methods to explain neural network predictions by building explanations either in terms of input components (e.g., pixels in an image) or in terms of input regions (e.g., the area containing the face of a Labrador). Such methods aim to determine the trustworthiness of a model, as well as to guide its improvement. In this paper, we argue that explanations in terms of input regions are useful for analysing machine listening systems. We introduce a novel method based on feature inversion to identify a region in an input time-frequency representation that is most influential to a prediction. We demonstrate it for a state-of-the-art singing voice detection model. We evaluate the quality of the generated explanations on two public benchmark datasets. The results demonstrate that the presented method often identifies a region of an input instance that has a decisive effect on the classification.},
author = {S Mishra and B L Sturm and S Dixon},
doi = {10.23919/EUSIPCO.2018.8553178},
isbn = {9789082797015},
issn = {2219-5491},
journal = {European Signal Processing Conference},
month = {11},
pages = {2260-2264},
title = {“What are you listening to?” Explaining predictions of deep machine listening systems},
volume = {2018-September},
year = {2018},
}
@inproceedings{Moffat2018,
abstract = {© 2018 KASHYAP. Dynamic range compression (DRC) is a very commonly used audio effect. One use of DRC is to emphasise transients in an audio signal. The aim of this paper is to present an approach for automatically setting dynamic range compression timing parameters, adaptively, allowing parameters to adapt to the incoming audio signal, with the aim of emphasising transients within percussive audio tracks. An implementation approach is presented.},
author = {D Moffat and M B Sandler},
journal = {145th Audio Engineering Society International Convention, AES 2018},
month = {1},
title = {Adaptive ballistics control of dynamic range compression for percussive tracks},
year = {2018},
}
@article{MOFFAT2018b,
abstract = {Sound synthesis is the process of generating artificial sounds through some form of simulation or modelling. This article aims to identify which sound synthesis methods achieve the goal of producing a believable audio sample that may replace a recorded sound sample. A perceptual evaluation experiment of five different sound synthesis techniques was undertaken. Additive synthesis, statistical modelling synthesis with two different feature sets, physically inspired synthesis, concatenative synthesis, and sinusoidal modelling synthesis were all compared. Evaluation using eight different sound class stimuli and 66 different samples was undertaken. The additive synthesizer is the only synthesis method not considered significantly different from the reference sample across all sounds classes. The results demonstrate that sound synthesis can be considered as realistic as a recorded sample and makes recommendations for use of synthesis methods, given different sound class contexts.},
author = {D J MOFFAT and J D REISS},
doi = {10.1145/3165287},
issue = {2},
journal = {ACM Transactions on Applied Perception (TAP)},
month = {4},
publisher = {ACM},
title = {Perceptual Evaluation of Synthesized Sound Effects},
volume = {15},
url = {https://dl.acm.org/citation.cfm?id=3165287},
year = {2018},
}
@article{Morfi2018,
author = {V Morfi and D Stowell},
doi = {10.3390/app8081397},
issn = {1454-5101},
issue = {8},
journal = {Applied Sciences},
month = {8},
publisher = {MDPI},
title = {Deep Learning for Audio Event Detection and Tagging on Low-Resource Datasets},
volume = {8},
url = {http://arxiv.org/abs/1807.03697v2},
year = {2018},
}
@article{MORREALE2018,
author = {F MORREALE and J ARMITAGE and A MCPHERSON},
doi = {10.3389/fpsyg.2018.02436},
issn = {1664-1078},
journal = {Frontiers in Psychology},
month = {12},
publisher = {Frontiers Media},
title = {Effect of Instrument Structure Alterations on Violin Performance},
year = {2018},
}
@inproceedings{Mycroft2018,
author = {J Mycroft and T Stockman and J D Reiss},
doi = {10.1145/3243274.3243290},
isbn = {9781450366090},
journal = {ACM International Conference Proceeding Series},
month = {9},
title = {A prototype mixer to improve cross-modal attention during audio mixing},
year = {2018},
}
@inproceedings{Nakamura2018,
abstract = {Most work on automatic transcription produces "piano roll" data with no musical interpretation of the rhythm or pitches. We present a polyphonic transcription method that converts a music audio signal into a human-readable musical score, by integrating multi-pitch detection and rhythm quantization methods. This integration is made difficult by the fact that the multi-pitch detection produces erroneous notes such as extra notes and introduces timing errors that are added to temporal deviations due to musical expression. Thus, we propose a rhythm quantization method that can remove extra notes by extending the metrical hidden Markov model and optimize the model parameters. We also improve the note-tracking process of multi-pitch detection by refining the treatment of repeated notes and adjustment of onset times. Finally, we propose evaluation measures for transcribed scores. Systematic evaluations on commonly used classical piano data show that these treatments improve the performance of transcription, which can be used as benchmarks for further studies.},
author = {E Nakamura and E BENETOS and K Yoshii and S DIXON},
month = {4},
pages = {101-105},
publisher = {IEEE},
title = {Towards Complete Polyphonic Music Transcription: Integrating Multi-Pitch Detection and Rhythm Quantization},
url = {https://2018.ieeeicassp.org/},
year = {2018},
}
@inproceedings{Nolasco2018,
abstract = {In this work, we aim to explore the potential of machine learning methods to the problem of beehive sound recognition. A major contribution of this work is the creation and release of annotations for a selection of beehive recordings. By experimenting with both support vector machines and convolutional neural networks, we explore important aspects to be considered in the development of beehive sound recognition systems using machine learning approaches.},
author = {I Nolasco and E BENETOS},
journal = {http://dcase.community/documents/workshop2018/proceedings/DCASE2018Workshop_Nolasco_131.pdf},
month = {11},
title = {To bee or not to bee: Investigating machine learning approaches for beehive sound recognition},
url = {http://dcase.community/workshop2018},
year = {2018},
}
@inproceedings{hanlon2018,
abstract = {© 2018 IEEE. Onset detection is a fundamental task in musical signal processing, providing information for higher level applications. Different classes of onsets can be found in musical signals, determined as being hard, or soft, by the initial energy transfer. Most onset detectors are general purpose and attempt to detect both classes of onsets, although some specifically attempt to detect soft onsets. Temporal reassignment operators related to group delay have previously been employed in onset detectors for the purposes of soft onset detection and pruning of time-frequency elements deemed to consist of vibrato. We consider the use of temporal reassignment for the detection of hard onsets and also employ the second mixed derivative of phase as a means to prune the spectral energy. Experimental validation of the proposed approach is given, showing improvements relative to state-of-the-art general purpose onset detectors for the specific tasks.},
author = {K O'Hanlon and M B Sandler},
doi = {10.1109/ICASSP.2018.8461381},
isbn = {9781538646588},
issn = {1520-6149},
journal = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
month = {9},
pages = {611-615},
title = {Improved Detection of Semi-Percussive Onsets in Audio Using Temporal Reassignment},
volume = {2018-April},
year = {2018},
}
@article{PANTELI2018,
abstract = {The comparison of world music cultures has been a recurring topic in the field of musicology since the end of the nineteenth century. Recent advances in technology in the field of Music Information Retrieval allow for a large-scale analysis of music corpora. We review manual and computational approaches in the literature that fall within the scope of music corpus research and world music analysis. With a large-scale computational music corpus analysis in mind, we compare the tools and research questions addressed by each study and discuss strengths and weaknesses. Taking into account critical remarks from experts in the field and challenges involved in a large-scale computational analysis, we discuss how this line of research can be improved in future work.},
author = {M PANTELI and E BENETOS and S DIXON},
doi = {10.1080/09298215.2017.1418896},
issn = {0929-8215},
issue = {2},
journal = {Journal of New Music Research},
month = {1},
pages = {176-189},
publisher = {Taylor & Francis (Routledge)},
title = {A review of manual and computational approaches for the study of world music corpora},
volume = {47},
year = {2018},
}
@inproceedings{Pardue2018,
abstract = {© Proceedings of the 15th Sound and Music Computing Conference: Sonic Crossings, SMC 2018. All rights reserved. This position paper introduces the concept of complexity management in instrument design as a means to optimize the learning rewards cycle in an effort to maintain player motivation. Successful fluency and expertise on an instrument requires sustained practice. In the quest to enable exceptional levels of expression, instruments designed for virtuosic performance often have a high level of complexity, which can be overwhelming for a beginner, decreasing practice motivation. Here we explain complexity management, the idea of intentionally limiting instrument complexity on a temporary basis so that instrument difficulty is optimally matched to user skill and users always remain capable of focused learning and enjoy sufficient musical success to motivate continued practice. We discuss the relevance of Csikszentmihalyi's ideas about flow, along with concepts from traditional music learning, such as chunking and internalization, along with the importance of practice and enjoyment. We then propose our own concept of learning efficiency and the importance of controlling challenge. Finally, we introduce our own experiments into complexity management using the violin, an existing example of an instrument with high input complexity. We discuss the effects of simplifying intonation in order to make early musical success easier along with plans for further investigations.},
author = {L S Pardue and A McPherson and D Overholt},
isbn = {9789963697304},
journal = {Proceedings of the 15th Sound and Music Computing Conference: Sonic Crossings, SMC 2018},
month = {1},
pages = {150-157},
title = {Improving the instrumental learning experience through complexity management},
year = {2018},
}
@inproceedings{Pauwels2018,
abstract = {A common problem in music education is finding varied and engaging material that is suitable for practising a specific musical concept or technique. At the same time, a number of large music collections are available under a Creative Commons (CC) licence (e.g. Jamendo, ccMixter), but their potential is largely untapped because of the relative obscurity of their content. In this paper, we present *Jam with Jamendo*, a web application that allows novice and expert learners of musical instruments to query songs by chord content from a large music collection, and practise the chords present in the retrieved songs by playing along. Its goal is twofold: the learners get a larger variety of practice material, while the artists receive increased exposure. We experimented with two visualisation modes. The first is a linear visualisation based on a moving time axis, the second is a circular visualisation inspired by the chromatic circle. We conducted a small-scale thinking-aloud user study with seven participants based on a hands-on practice with the web app. Through this pilot study, we obtained a qualitative understanding of the potentials and challenges of each visualisation, which will be used to inform the next design iteration of the web app.},
author = {J Pauwels and A Xambó and G Roma and M Barthet and G Fazekas},
journal = {Proceedings of the 4th Web Audio Conference (WAC)},
month = {9},
title = {Exploring Real-time Visualisations to Support Chord Learning with a Large Music Collection},
year = {2018},
}
@inproceedings{Pauwels2018b,
abstract = {Lately, a number of audio players based on web technology have made it possible for researchers to present their audio-related work in an attractive manner. Tools such as "wavesurfer.js", "waveform-playlist" and "trackswitch.js" provide highly-configurable players, allowing a more interactive exploration of scientific results that goes beyond simple linear playback.
However, the audio output to be presented is in many cases not generated by the same web technologies. The process of preparing audio data for display therefore requires manual intervention, in order to bridge the resulting gap between programming languages. While this is acceptable for one-time events, such as the preparation of final results, it prevents the usage of such players during the iterative development cycle. Having access to rich audio players already during development would allow researchers to get more instantaneous feedback. The current workflow consists of repeatedly importing audio into a digital audio workstation in order to achieve similar capabilities, a repetitive and time-consuming process.
In order to address these needs, we present "pywebaudioplayer", a Python package that automates the generation of code snippets for the each of the three aforementioned web audio players. It is aimed at use-cases where audio development in Python is combined with web visualisation. Notable examples are "Jupyter Notebook" and WSGI-compatible web frameworks such as "Flask" or "Django".},
author = {J Pauwels and M Sandler},
journal = {Proceedings of the 4th Web Audio Conference (WAC)},
month = {9},
title = {pywebaudioplayer: Bridging the gap between audio processing code and attractive visualisations based on web technology},
year = {2018},
}
@inproceedings{Pauwels2018c,
abstract = {Music learners looking for practice material to play along with are not served well by the current search interfaces for large music collections. While it is easy to find specific songs using meta-data or audio fingerprinting, discovering new music based on musical content is hard. In this paper, we'll look at the challenges that arise when creating a search interface that allows to query for songs based on chord content. Specifically, we'll discuss different ways of fulfilling queries and how imperfect chord transcriptions resulting from the automatic estimation process are handled.},
author = {J Pauwels and G Fazekas and M Sandler},
journal = {Proceedings of the 2018 Joint Workshop on Machine Learning for Music},
month = {7},
title = {Recommending songs to music learners based on chord content},
year = {2018},
}
@book_section{Pearce2018,
abstract = {© 2018, Springer-Verlag Berlin Heidelberg. Efforts to develop a formal characterization of musical structure are often framed in syntactic terms, sometimes but not always with direct inspiration from research on language. In Chap. 25, we present syntactic approaches to characterizing musical structure and survey a range of theoretical issues involved in developing formal syntactic theories of sequential structure in music. Such theories are often computational in nature, lending themselves to implementation and our first goal here is to review empirical research on computational modeling of musical structure from a syntactic point of view. We ask about the motivations for implementing a model and assess the range of approaches that have been taken to date. It is important to note that while a computational model may be capable of deriving an optimal structural description of a piece of music, human cognitive processing may not achieve this optimal performance, or may even process syntax in a different way. Therefore we emphasize the difference between developing an optimal model of syntactic processing and developing a model that simulates human syntactic processing. Furthermore, we argue that, while optimal models (e. g., optimal compression or prediction) can be useful as a benchmark or yardstick for assessing human performance, if we wish to understand human cognition then simulating human performance (including aspects that are nonoptimal or even erroneous) should be the priority. Following this principle, we survey research on processing of musical syntax from the perspective of computational modeling, experimental psychology and cognitive neuroscience. There exists a large number of computational models of musical syntax, but we limit ourselves to those that are explicitly cognitively motivated, assessing them in the context of theoretical, psychological and neuroscientific research.},
author = {M Pearce and M Rohrmeier},
doi = {10.1007/978-3-662-55004-5_26},
issn = {2522-8692},
journal = {Springer Handbooks},
month = {1},
pages = {487-505},
title = {Musical Syntax II: Empirical Perspectives},
year = {2018},
}
@article{PEARCE2018b,
author = {M T PEARCE},
doi = {10.1111/nyas.13654},
issn = {0077-8923},
journal = {Annals of the New York Academy of Sciences},
month = {5},
publisher = {Wiley},
title = {Statistical Learning and Probabilistic Prediction in Music Cognition: Mechanisms of Stylistic Enculturation},
year = {2018},
}
@inproceedings{Peng2018,
abstract = {© 2018 KASHYAP. Studies have shown that listeners can distinguish between hot and cold water being poured based solely on sonic properties, yet the cause of this is unknown. This acoustic perception of temperature is an interesting aspect of multisensory perception and integration. In this paper, a series of experiments were performed to investigate the characteristics of auditory information when water is poured at different temperatures into various containers. Based on the results, it attempts to find physical and psychoacoustic explanations for the phenomenon.},
author = {H Peng and J D Reiss},
journal = {145th Audio Engineering Society International Convention, AES 2018},
month = {1},
title = {Why can you hear a difference between pouring hot and cold water? An investigation of temperature dependence in psychoacoustics},
year = {2018},
}
@inproceedings{Pras2018,
abstract = {© 2018 Audio Engineering Society. All Rights Reserved. While sound mixers of popular music may share common principles across cultures, different engineers produce different mixes, and different listeners judge a mix differently. We designed a mixed-methods approach to examine this highly multidimensional problem in both style and perceived quality. Five student sound engineers from the Paris Conservatoire mixed the multitrack source of two pop songs and fully documented their mixing process. The resulting mixes were then used as stimuli for a blind, multi-stimulus listening test in a high-quality listening room, that 13 students and one faculty member commented on and rated in terms of preference. Our outcomes highlight cultural and generational mixing specificities and offer a better understanding of the artistic side of the practice.},
author = {A Pras and B De Man and J D Reiss},
journal = {144th Audio Engineering Society Convention 2018},
month = {1},
title = {A case study of cultural influences on mixing practices},
year = {2018},
}
@article{PURVER2018,
author = {M R J PURVER and J HOUGH and C HOWES},
doi = {10.1111/tops.12324},
issn = {1756-8765},
journal = {Topics in Cognitive Science},
month = {3},
publisher = {Wiley},
title = {Computational Models of Miscommunication Phenomena},
url = {http://www.eecs.qmul.ac.uk/~mpurver/papers/purver-et-al18topics.pdf},
year = {2018},
}
@article{quiroga2018,
abstract = {Abstract Theories of predictive processing propose that prediction error responses are modulated by the certainty of the predictive model or precision . While there is some evidence for this phenomenon in the visual and, to a lesser extent, the auditory modality, little is known about whether it operates in the complex auditory contexts of daily life. Here, we examined how prediction error responses behave in a more complex and ecologically valid auditory context than those typically studied. We created musical tone sequences with different degrees of pitch uncertainty to manipulate the precision of participants’ auditory expectations. Magnetoencephalography was used to measure the magnetic counterpart of the mismatch negativity (MMNm) as a neural marker of prediction error in a multi-feature paradigm. Pitch, slide, intensity and timbre deviants were included. We compared high-entropy stimuli, consisting of a set of non-repetitive melodies, with low-entropy stimuli consisting of a simple, repetitive pitch pattern. Pitch entropy was quantitatively assessed with an information-theoretic model of auditory expectation. We found a reduction in pitch and slide MMNm amplitudes in the high-entropy as compared to the low-entropy context. No significant differences were found for intensity and timbre MMNm amplitudes. Furthermore, in a separate behavioral experiment investigating the detection of pitch deviants, similar decreases were found for accuracy measures in response to more fine-grained increases in pitch entropy. Our results are consistent with a precision modulation of auditory prediction error in a musical context, and suggest that this effect is specific to features that depend on the manipulated dimension—pitch information, in this case. Highlights <jats:list list-type="bullet"><jats:list-item> The mismatch negativity (MMNm) is reduced in musical contexts with high pitch uncertainty <jats:list-item> The MMNm reduction is restricted to pitch-related features <jats:list-item> Accuracy during deviance detection is reduced in contexts with higher uncertainty <jats:list-item> The results suggest a feature-selective precision modulation of prediction error Materials, data and scripts can be found in the Open Science Framework repository: http://bit.ly/music_entropy_MMN DOI: 10.17605/OSF.IO/MY6TE},
author = {D R Quiroga-Martinez and N C Hansen and A Højlund and M Pearce and E Brattico and P Vuust},
doi = {10.1101/422949},
journal = {bioRxiv},
month = {9},
publisher = {bioRxiv},
title = {Reduced prediction error responses in high- as compared to low-uncertainty musical contexts},
year = {2018},
}
@book_section{Rohrmeier2018,
abstract = {© 2018, Springer-Verlag Berlin Heidelberg. The understanding of musical syntax is a topic of fundamental importance for systematic musicology and lies at the core intersection of music theory and analysis, music psychology, and computational modeling. This chapter discusses the notion of musical syntax and its potential foundations based on notions such as sequence grammaticality, expressive unboundedness, generative capacity, sequence compression and stability. Subsequently, it discusses problems concerning the choice of musical building blocks to be modeled as well as the underlying principles of sequential structure building. The remainder of the chapter reviews the main theoretical proposals that can be characterized under different mechanisms of structure building, in particular approaches using finite-context or finite-state models as well as tree-based models of context-free complexity (including the Generative Theory of Tonal Music) and beyond. The chapter concludes with a discussion of the main issues and questions driving current research and a preparation for the subsequent empirical chapter Musical Syntax II.},
author = {M Rohrmeier and M Pearce},
doi = {10.1007/978-3-662-55004-5_25},
issn = {2522-8692},
journal = {Springer Handbooks},
month = {1},
pages = {473-486},
title = {Musical Syntax I: Theoretical Perspectives},
year = {2018},
}
@article{Sears2018,
author = {D R W Sears and M T Pearce and J Spitzer and W E Caplin and S McAdams},
doi = {10.1177/1747021818814472},
journal = {Q J Exp Psychol (Hove)},
month = {11},
pages = {1747021818814472–1747021818814472},
title = {Expectations for tonal cadences: Sensory and cognitive priming effects.},
url = {https://www.ncbi.nlm.nih.gov/pubmed/30404574},
year = {2018},
}
@article{SELFRIDGE2018,
abstract = {Aeroacoustics is a branch of engineering within fluid dynamics. It encompasses sounds generated by disturbances in air, either by an airflow being disturbed by an object or an object moving through air. A number of fundamental sound sources exist depending on the geometry of the interacting objects and the characteristics of the flow. An example of a fundamental aeroacoustic sound source is the Aeolian tone, generated by vortex shedding as air flows around an object. A compact source model of this sound is derived from fluid dynamics principles, operating in real-time and presenting highly relevant parameters to the user. A swinging sword, Aeolian harp and propeller are behaviour models presented to illustrate how a taxonomy of real-time aeroacoustic sound synthesis can be achieved through physical modelling. Evaluation indicates that the resulting sounds are perceptually as believable as sounds produced by other synthesis methods, while objective evaluations reveal similarities and differences between our models, pre-recorded samples and those generated by computationally complex offline methods.},
author = {R SELFRIDGE and D MOFFAT and E AVITAL and J REISS},
doi = {10.17743/jaes.2018.0033},
issn = {1549-4950},
issue = {7/8},
journal = {Journal of the Audio Engineering Society},
month = {8},
pages = {594-607},
publisher = {Audio Engineering Society},
title = {Creating Real-Time Aeroacoustic Sound Effects Using Physically Informed Models},
volume = {66},
url = {http://www.aes.org/e-lib/browse.cfm?elib=19708},
year = {2018},
}
@inproceedings{Selfridge2018b,
abstract = {© 2018 Audio Engineering Society. All Rights Reserved. The edge tone is the sound generated when a planar jet of air from a nozzle comes into contact with a wedge and a number of physical conditions are met. Fluid dynamics equations were used to synthesise authentic edge tones without the need for complex computation. A real-time physically derived synthesis model was designed using the jet airspeed and nozzle exit-to-wedge geometry. We compare different theoretical equations used to predict the tone frequency. A decision tree derived from machine learning based on previously published experimental results was used to predict the correct mode of operation. Results showed an accurate implementation for mode selection, and highlighted areas where operation follows or deviates from previously published data.},
author = {R Selfridge and J D Reiss and E J Avital},
journal = {144th Audio Engineering Society Convention 2018},
month = {1},
title = {Physically derived synthesis model of an edge tone},
year = {2018},
}
@inproceedings{Sheng2018,
abstract = {This paper proposes a method of controlling the dynamic range compressor using sound examples. Our earlier work showed the effectiveness of random forest regression to map acoustic features to effect control parameters. We extend this work to address the challenging task of extracting relevant features when audio events overlap. We assess different audio decomposition approaches such as onset event detection, NMF, and transient/stationary audio separation using ISTA and compare feature extraction strategies for each case. Numerical and perceptual similarity tests show the utility of audio decomposition as well as specific features in the prediction of dynamic range compressor parameters.},
author = {D Sheng and G Fazekas},
journal = {Proc. of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), April 15-20, Calgary, Canada.},
note = {date-added: 2018-05-06 23:33:10 +0000 date-modified: 2018-05-07 00:05:17 +0000 keywords: intelligent music production, ICASSP, intelligent audio effects local-url: sheng2018icassp.pdf},
title = {Feature Design Using Audio Decomposition for Intelligent Control of the Dynamic Range Compressor},
url = {https://2018.ieeeicassp.org/Papers/ViewPapers.asp?PaperNum=3048},
year = {2018},
}
@inproceedings{Sheng2018b,
abstract = {Casual users of audio effects may lack practical experience or knowledge of their low-level signal processing parameters. An intelligent control tool that allows using sound examples to control effects would strongly benefit these users. In a previous work we proposed a control method for the dynamic range compressor (DRC) using a random forest regression model. It maps audio features extracted from a reference sound to DRC parameter values, such that the processed signal resembles the reference. The key to good performance in this system is the relevance and effectiveness of audio features. This paper focusses on a thorough exposition and assessment of the features, as well as the comparison of different strategies to find the optimal feature set for DRC parameter estimation, using automatic feature selection methods. This enables us to draw conclusions about which features are relevant to core DRC parameters. Our results show that conventional time and frequency domain features well known from the literature are sufficient to estimate the DRC’s threshold and ratio parameters, while more specialized features are needed for attack and release time, which induce more subtle changes to the signal.},
author = {D Sheng and G Fazekas},
journal = {Proc. of the 144th Convention of the Audio Engineering Society, 23-26 May, Milan, Italy},
note = {date-added: 2018-05-07 00:06:23 +0000 date-modified: 2018-05-07 00:09:42 +0000 keywords: feature selection,. intelligent music production, AES, intelligent audio effects local-url: sheng2018aes.pdf},
title = {Feature Selection for Dynamic Range Compressor Parameter Estimation},
url = {http://www.aes.org/events/144/papers/?ID=5993},
year = {2018},
}
@inproceedings{SHUKLA2018,
abstract = {If well-matched to a given listener, head-related transfer functions (HRTFs) that have not been individually measured can still present relatively effective auditory scenes compared to renderings from individualised HRTF sets. We present and assess a system for HRTF selection that relies on holistic judgements of users to identify their optimal match through a series of pairwise adversarial comparisons. The mechanism resulted in clear preference for a single HRTF set in a majority of cases. Where this did not occur, randomised selection between equally judged HRTFs did not significantly impact user performance in a subsequent listening task. This approach is shown to be equally effective for both novice and expert listeners in selecting their preferred HRTF set.},
author = {R C SHUKLA and R L STEWART and A Roginska and M B SANDLER},
city = {New York, NY, USA},
journal = {http://www.aes.org/e-lib/inst/browse.cfm?elib=19677},
month = {8},
pages = {1-10},
publisher = {Audio Engineering Society},
title = {User Selection of Optimal HRTF Sets via Holistic Comparative Evaluation},
url = {http://www.aes.org/e-lib/},
year = {2018},
}
@inproceedings{SKACH2018,
abstract = {This paper presents initial steps towards the design of an embedded system for body-centric sonic performance. The proposed prototyping system allows performers to manipulate sounds through gestural interactions captured by textile wearable sensors. The e-textile sensor data control, in real-time, audio synthesis algorithms working with content from Audio Commons, a novel web-based ecosystem for re-purposing crowd-sourced audio. The system enables creative embodied music interactions by combining seamless physical e-textiles with web-based digital audio technologies.},
author = {S SKACH and A XAMBO and L TURCHET and A Stolfi and R L STEWART and M H E BARTHET},
doi = {10.1145/3173225.3173272},
month = {3},
title = {Embodied Interactions with E-Textiles and the Internet of Sounds for Performing Arts},
year = {2018},
}
@inproceedings{Skach2018b,
abstract = {© 2018 Copyright held by the owner/author(s). Body posture is a good indicator of, amongst other things, people's state of arousal, focus of attention and level of interest in a conversation. Posture is conventionally measured by observation and hand coding of videos or, more recently, through automated computer vision and motion capture techniques. Here we introduce a novel alternative approach exploiting a new modality: posture classification using bespoke'smart' trousers with integrated textile pressure sensors. Changes in posture translate to changes in pressure patterns across the surface of our clothing. We describe the construction of the textile pressure sensor that can detect these changes. Using simple machine learning techniques on data gathered from 6 participants we demonstrate its ability to discriminate between 19 different basic posture types with high accuracy. This technology has the potential to support anonymous, unintrusive sensing of interest, attention and engagement in a wide variety of settings.},
author = {S Skach and R Stewart and P G T Healey},
doi = {10.1145/3242969.3242977},
isbn = {9781450356923},
journal = {ICMI 2018 - Proceedings of the 2018 International Conference on Multimodal Interaction},
month = {10},
pages = {116-124},
title = {Smart ARSE: Posture classification with textile sensors in trousers},
year = {2018},
}
@article{STOCKMAN2018,
author = {A G STOCKMAN and D AL-THANI},
doi = {10.1093/iwc/iwy017},
journal = {Interacting With Computers},
month = {9},
title = {Evaluating an Interface for Cross-modal Information Seeking},
year = {2018},
}
@inproceedings{STOCKMAN2018b,
author = {A G STOCKMAN and O METATLA},
doi = {10.1145/3173574.3174120},
month = {4},
title = {“I Hear You”: Understanding Awareness Information Exchange in an Audio-only Workspace},
year = {2018},
}
@article{STOCKMAN2018c,
author = {T STOCKMAN and S Wilkie},
doi = {10.1016/j.apacoust.2017.12.032},
issn = {1872-910X},
journal = {Applied Acoustics},
month = {1},
publisher = {Elsevier},
title = {Perception of objects that move in depth, using ecologically valid audio cues},
year = {2018},
}
@article{Stolfi2018,
abstract = {© 2018 Audio Engineering Society. All Rights Reserved. This paper analyzes communication patterns occurring in the online chat of the "Open Band" system for participatory live music performance. In addition, to act as a multi-user messaging tool, the chat system also serves as a control interface for the sonification of textual messages from the audience. Open Band performances were presented at various festivals and conferences since 2016. Its web-based platform enables collective "sound dialogues" that are open to everyone regardless of musical skills. Drawing on interactive participatory art and networked music performance, the system aims to provide engaging social experiences in colocated music-making situations. We collected data from four public performances including over 3,000 anonymous messages sent by audiences. We present the design of the system and then analyze the semantic content of messages using thematic and statistical analyses. Findings show how different sonification mechanisms alter the nature of the communication between participants who articulate between linguistic and musical self-expression.},
author = {A Stolfi and J Sokolovskis and F Gorodscy and F Iazzetta and M Barthet},
doi = {10.17743/jaes.2018.0048},
issn = {1549-4950},
issue = {11},
journal = {AES: Journal of the Audio Engineering Society},
month = {11},
pages = {910-921},
title = {Audio semantics: Online chat communication in open band participatory music performances},
volume = {66},
year = {2018},
}
@inproceedings{Stoller2018,
abstract = {© 2018 IEEE. The state of the art in music source separation employs neural networks trained in a supervised fashion on multi-track databases to estimate the sources from a given mixture. With only few datasets available, often extensive data augmentation is used to combat overfitting. Mixing random tracks, however, can even reduce separation performance as instruments in real music are strongly correlated. The key concept in our approach is that source estimates of an optimal separator should be indistinguishable from real source signals. Based on this idea, we drive the separator towards outputs deemed as realistic by discriminator networks that are trained to tell apart real from separator samples. This way, we can also use unpaired source and mixture recordings without the drawbacks of creating unrealistic music mixtures. Our framework is widely applicable as it does not assume a specific network architecture or number of sources. To our knowledge, this is the first adoption of adversarial training for music source separation. In a prototype experiment for singing voice separation, separation performance increases with our approach compared to purely supervised training.},
author = {D Stoller and S Ewert and S Dixon},
doi = {10.1109/ICASSP.2018.8461722},
isbn = {9781538646588},
issn = {1520-6149},
journal = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
month = {9},
pages = {2391-2395},
title = {Adversarial Semi-Supervised Audio Source Separation Applied to Singing Voice Extraction},
volume = {2018-April},
year = {2018},
}
@inproceedings{Stoller2018b,
abstract = {© 2018 IEEE. Existing music recordings are often rearranged, for example to fit their duration and structure to video content. Often an expert is needed to find suitable cut points allowing for imperceptible transitions between different sections. In previous work, the search for these cuts is restricted to the beginnings of beats or measures and only timbre and loudness are taken into account, while melodic expectations and instrument continuity are neglected. We instead aim to learn these features by training neural networks on a dataset of over 300 popular Western songs to classify which note onsets are suitable entry or exit points for a cut. We investigate existing and novel architectures and different feature representations, and find that best performance is achieved using neural networks with two-dimensional convolutions applied to spectrogram input covering several seconds of audio with a high temporal resolution of 23 or 46 ms. Finally, we analyse our best model using saliency maps and find it attends to rhythmical structures and the presence of sounds at the onset position, suggesting instrument activity to be important for predicting cut quality.},
author = {D Stoller and V Akkermans and S Dixon},
doi = {10.1109/MLSP.2018.8516706},
isbn = {9781538654774},
issn = {2161-0363},
journal = {IEEE International Workshop on Machine Learning for Signal Processing, MLSP},
month = {10},
title = {Detection of cut-points for automatic music rearrangement},
volume = {2018-September},
year = {2018},
}
@inproceedings{STOLLER2018c,
author = {D STOLLER and S Ewert and S DIXON},
month = {6},
title = {Jointly detecting and separating singing voice: a multi-task approach},
year = {2018},
}
@article{Stowell2018,
abstract = {Assessing the presence and abundance of birds is important for monitoring specific species as well as overall ecosystem health. Many birds are most readily detected by their sounds, and thus, passive acoustic monitoring is highly appropriate. Yet acoustic monitoring is often held back by practical limitations such as the need for manual configuration, reliance on example sound libraries, low accuracy, low robustness, and limited ability to generalise to novel acoustic conditions. Here, we report outcomes from a collaborative data challenge. We present new acoustic monitoring datasets, summarise the machine learning techniques proposed by challenge teams, conduct detailed performance evaluation, and discuss how such approaches to detection can be integrated into remote monitoring projects. Multiple methods were able to attain performance of around 88% area under the receiver operating characteristic (ROC) curve (AUC), much higher performance than previous general-purpose methods. With modern machine learning, including deep learning, general-purpose acoustic bird detection can achieve very high retrieval rates in remote monitoring data, with no manual recalibration, and no pretraining of the detector for the target species or the acoustic conditions in the target environment.},
author = {D Stowell and M D Wood and H Pamuła and Y Stylianou and H Glotin},
doi = {10.1111/2041-210X.13103},
journal = {Methods in Ecology and Evolution},
month = {11},
title = {Automatic acoustic detection of birds through deep learning: The first Bird Audio Detection challenge},
year = {2018},
}
@inproceedings{THALMANN2018,
abstract = {We describe the concepts behind a web-based minimal-UI DJ system that adapts to the user’s preference via sim- ple interactive decisions and feedback on taste. Starting from a preset decision tree modeled on common DJ prac- tice, the system can gradually learn a more customised and user-specific tree. At the core of the system are structural representations of the musical content based on semantic au- dio technologies and inferred from features extracted from the audio directly in the browser. These representations are gradually combined into a representation of the mix which could then be saved and shared with other users. We show how different types of transitions can be modeled using sim- ple musical constraints. Potential applications of the system include crowd-sourced data collection, both on temporally aligned playlisting and musical preference.},
author = {FLORIAN THALMANN and L THOMPSON and M SANDLER},
month = {9},
title = {A User-Adaptive Automated DJ Web App with Object-Based Audio and Crowd-Sourced Decision Trees},
year = {2018},
}
@inproceedings{Thalmann2018b,
abstract = {© 2018 Copyright held by the owner/author(s). We refine and unify our previous data model for describing and linking live music artefacts. In our model, physical and digital artefacts and recordings are treated as forms of cultural heritage which can all be aligned and distributed along the same event timeline. We show how our ontology maps to existing conceptual models and we evaluate it with a number of example queries as well as in practice, embedded in an online platform dedicated to the exploration of aggregated information documenting the live music events of a specific band.},
author = {F Thalmann and T Wilmering and M B Sandler},
doi = {10.1145/3243907.3243910},
isbn = {9781450364959},
journal = {ACM International Conference Proceeding Series},
month = {10},
pages = {1-5},
title = {Cultural heritage documentation and exploration of live music events with linked data},
year = {2018},
}
@article{Turchet2018,
abstract = {© 2018 Audio Engineering Society. All rights reserved. Smart Instruments are a novel family of musical instruments that embed sensors, actuators, wireless connectivity, and semantic audio technologies. This paper reports the findings of a participatory design approach to develop a Smart Cajón, a box-shaped percussion instrument with Internet of Musical Things components. Five initial co-design sessions were conducted with different professional cajón player participants. The players were invited to devise tangible mock-ups by placing provided sensors on an acoustic cajón and to express desirable use cases and interactions. We then designed and implemented a prototype satisfying performers' common requirements. The prototype was assessed using the concurrent think-aloud protocol and semi-structured interviews. Overall, the smart qualities of the prototype and their potential received positive feedback, and areas of improvements related to expressive control and personalization were highlighted.},
author = {L Turchet and A McPherson and M Barthet},
doi = {10.17743/jaes.2018.0007},
issn = {1549-4950},
issue = {4},
journal = {AES: Journal of the Audio Engineering Society},
month = {4},
pages = {220-230},
title = {Co-design of a smart Cajón},
volume = {66},
year = {2018},
}
@article{TURCHET2018b,
author = {L TURCHET and M BARTHET},
doi = {10.1109/THMS.2018.2885408},
issn = {2168-2291},
journal = {IEEE Transactions on Human-Machine Systems},
month = {12},
publisher = {Institute of Electrical and Electronics Engineers},
title = {Co-design of Musical Haptic Wearables for Electronic Music Performer's Communication},
year = {2018},
}
@article{Turchet2018c,
abstract = {© 2013 IEEE. The Internet of Musical Things (IoMusT) is an emerging research field positioned at the intersection of Internet of Things, new interfaces for musical expression, ubiquitous music, human-computer interaction, artificial intelligence, and participatory art. From a computer science perspective, IoMusT refers to the networks of computing devices embedded in physical objects (musical things) dedicated to the production and/or reception of musical content. Musical things, such as smart musical instruments or wearables, are connected by an infrastructure that enables multidirectional communication, both locally and remotely. We present a vision in which the IoMusT enables the connection of digital and physical domains by means of appropriate information and communication technologies, fostering novel musical applications and services. The ecosystems associated with the IoMusT include interoperable devices and services that connect musicians and audiences to support musician-musician, audience-musicians, and audience-audience interactions. In this paper, we first propose a vision for the IoMusT and its motivations. We then discuss five scenarios illustrating how the IoMusT could support: 1) augmented and immersive concert experiences; 2) audience participation; 3) remote rehearsals; 4) music e-learning; and 5) smart studio production. We identify key capabilities missing from today's systems and discuss the research needed to develop these capabilities across a set of interdisciplinary challenges. These encompass network communication (e.g., ultra-low latency and security), music information research (e.g., artificial intelligence for real-time audio content description and multimodal sensing), music interaction (e.g., distributed performance and music e-learning), as well as legal and responsible innovation aspects to ensure that future IoMusT services are socially desirable and undertaken in the public interest.},
author = {L Turchet and C Fischione and G Essl and D Keller and M Barthet},
doi = {10.1109/ACCESS.2018.2872625},
journal = {IEEE Access},
month = {9},
pages = {61994-62017},
title = {Internet of Musical Things: Vision and Challenges},
volume = {6},
year = {2018},
}
@inproceedings{Turchet2018d,
abstract = {© 2018 FRUCT Oy. This paper presents an Internet of Musical Things ecosystem involving musicians and audiences interacting with a smart mandolin, smartphones, and the Audio Commons online repository Freesound. The ecosystem has been devised to support performer-instrument and performer-audience interactions through the generation of musical accompaniments exploiting crowd-sourced sounds. We present two use cases investigating how audio content retrieved from Freesound can be leveraged by performers or audiences to produce accompanying soundtracks for music performance with a smart mandolin. In the performer-instrument interaction use case, the performer can select content to be retrieved prior to performing through a set of keywords and structure it in order to create the desired accompaniment. In the performer-audience interaction use case, a group of audience members participates in the music creation by selecting and arranging Freesound audio content to create an accompaniment collaboratively. We discuss the advantages and limitations of the system with regard to music making and audience participation, along with its implications and challenges.},
author = {L Turchet and M Barthet},
doi = {10.23919/FRUCT.2018.8588110},
isbn = {9789526865362},
issn = {2305-7254},
journal = {Conference of Open Innovation Association, FRUCT},
month = {12},
pages = {375-381},
title = {Jamming with a Smart Mandolin and Freesound-based Accompaniment},
volume = {2018-November},
year = {2018},