-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path5) ProPer scores (aggregated data).Rmd
88 lines (72 loc) · 4.58 KB
/
5) ProPer scores (aggregated data).Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ProPer scores (V): allocate ProPer values to manually segmented intervals, for data aggregation and stats
```{r clean_start}
rm(list = ls())
## Load required libraries
require(dplyr)
require(zoo)
comp_df <- read.csv("data_tables/comp_df.csv") %>% distinct(file, t, .keep_all = TRUE)
## interpolate observations from the TextGrid manual annotations
if(length(comp_df$syll_mid) > 0) comp_df <- mutate(
group_by(comp_df, file),
syll_mid = zoo::na.locf(syll_mid, na.rm=F)
)
#
if(length(comp_df$word_label) > 0) comp_df <- mutate(
group_by(comp_df, file),
word_mid = zoo::na.locf(word_mid, na.rm=F)
)
```
# Prepare the scores_df table
```{r prepare_scores_df, warning=FALSE}
## check up on correct allocations: check how many CoMs within manually segmented intervals and choose the one associated with highest mass if there are multiple CoMs
scores_df <- mutate(
## calcualte the following for each syllabic interval (manual)
group_by(comp_df, file, syll_mid),
## find the CoMs associated with each syllabic interval
syllCoMMap = ifelse( (all(is.na(CoMatCoM)) | is.na(syll_mid)) , NA,
ifelse( length(which(!is.na(CoMatCoM))) == 1, mean(CoMatCoM, na.rm = T),
# use '0' when there are multiple CoMs
ifelse( length(which(!is.na(CoMatCoM))) > 1, 0 ))),
# choose the stonger mass_rel if there are multiple CoMs
syllCoMMap2 = ifelse(syllCoMMap==0 & CoMatCoM==t, mass_rel, NA),
syllCoMMap2 = max(syllCoMMap2, na.rm = T),
syllCoMMap = ifelse(syllCoMMap==0 & CoMatCoM==t, syllCoMMap2, syllCoMMap),
syllCoMMap = ifelse(syllCoMMap==0, NA, syllCoMMap),
## convert mass_rel to corresponding CoM
syllCoMflicts = ifelse( (length(which(!is.na(CoMatCoM))) > 1 & syllCoMMap != mass_rel) , NA, CoM ),
## strectch the relevant CoM values within syllabic intervals
syllCoMMap = ifelse( length(which(!is.na(CoMatCoM))) > 1, mean(syllCoMflicts, na.rm = T), syllCoMMap ),
### the following ProPer parameters: use the values associated with the relevant CoM
intervalDuration = ifelse( syllCoMMap == t, intervalDuration, NA),
intervalDuration = ifelse( all(is.na(intervalDuration)), NA, max(intervalDuration, na.rm = T) ),
CoM = ifelse( syllCoMMap == t, CoM, NA),
CoM = ifelse( all(is.na(CoM)), NA, max(CoM, na.rm = T) ),
CoG = ifelse( syllCoMMap == t, CoG, NA),
CoG = ifelse( all(is.na(CoG)), NA, max(CoG, na.rm = T) ),
f0atCoM = ifelse( syllCoMMap == t, f0atCoM, NA),
f0atCoM = ifelse( all(is.na(f0atCoM)), NA, max(f0atCoM, na.rm = T) ),
DeltaF0 = ifelse( syllCoMMap == t, DeltaF0, NA),
DeltaF0 = max(DeltaF0, na.rm = T),
# DeltaF0 = ifelse( all(is.na(DeltaF0)), NA, max(DeltaF0, na.rm = T) ),
DeltaF0_rel = ifelse( syllCoMMap == t, DeltaF0_rel, NA),
DeltaF0_rel = ifelse( all(is.na(DeltaF0_rel)), NA, max(DeltaF0_rel, na.rm = T) ),
synchrony = ifelse( syllCoMMap == t, synchrony, NA),
synchrony = ifelse( all(is.na(synchrony)), NA, max(synchrony, na.rm = T) ),
sync_rel = ifelse( syllCoMMap == t, sync_rel, NA),
sync_rel = ifelse( all(is.na(sync_rel)), NA, max(sync_rel, na.rm = T) ),
mass_rel = ifelse( syllCoMMap == t, mass_rel, NA),
mass_rel = ifelse( all(is.na(mass_rel)), NA, max(mass_rel, na.rm = T) )
)
## reduce rows
# scores_df <- dplyr::filter(scores_df, syll_mid==t)
scores_df <- dplyr::filter(scores_df, syllCoMMap == t)
## reduce columns
mini_scores_df <- droplevels(subset(scores_df, select = -c(t, syll_start, syll_mid, syll_end, syll_bounds, word_start, word_mid, word_end, word_bounds, perFloorStatus, relToStatus, f0_smooth, f0_smerp, f0_smoorp, f0_data_min, f0_data_max, f0_data_range, strengThresh_indeed, smogPP_20Hz, smogPP_12Hz, smogPP_8Hz, smogPP_5Hz, syll_boundsSeq, useManualStatus, autoManStatus, averageSyllStatus, auto_bounds, auto_boundsSeq, f0atCoG, f0Floor4real, PERatCoM, PERatCoG, CoM_corr, CoG_corr, CoMatCoM, syllCoMMap, syllCoMMap2, syllCoMflicts, totalPower_scaled, auto_boundsSeqBounded, smogPP_5HzBounded, smogPP_8HzBounded, smogPP_12HzBounded, smogPP_20HzBounded)))
nano_scores_df <- droplevels(subset(mini_scores_df, select = -c(f0_speaker_min, f0_speaker_max, f0_speaker_median, f0_speaker_mean, f0_speaker_range, f0_speaker_min_median, f0_speaker_max_median, f0_speaker_range_median, f0_token_min, f0_token_max, f0_token_median, f0_token_mean, f0_token_range, CoM, CoG, intervalDuration_rel, DeltaF0Label, syncLabel, dynaSpeechRate)))
```
# Write scores_df table
```{r write_scores_df}
## Write the scores data file
write.csv(mini_scores_df, "data_tables/mini_scores_df.csv", row.names=FALSE)
write.csv(nano_scores_df, "data_tables/nano_scores_df.csv", row.names=FALSE)
```