This repository has been archived by the owner on Dec 30, 2023. It is now read-only.
forked from Lakens/statistical_inferences
-
Notifications
You must be signed in to change notification settings - Fork 0
/
book.bib
5842 lines (5421 loc) · 465 KB
/
book.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{abelson_value_2003,
title = {The {{Value}} of {{Life}} and {{Health}} for {{Public Policy}}},
author = {Abelson, Peter},
year = {2003},
month = jun,
journal = {Economic Record},
volume = {79},
pages = {S2-S13},
issn = {00130249, 14754932},
doi = {10.1111/1475-4932.00087},
langid = {english}
}
@book{aberson_applied_2019,
title = {Applied {{Power Analysis}} for the {{Behavioral Sciences}}},
shorttitle = {Applied {{Power Analysis}} for the {{Behavioral Sciences}}},
author = {Aberson, Christopher L.},
year = {2019},
month = feb,
edition = {Second},
publisher = {{Routledge}},
address = {{New York}},
abstract = {Applied Power Analysis for the Behavioral Sciences is a practical "how-to" guide to conducting statistical power analyses for psychology and related fields. The book provides a guide to conducting analyses that is appropriate for researchers and students, including those with limited quantitative backgrounds. With practical use in mind, the text provides detailed coverage of topics such as how to estimate expected effect sizes and power analyses for complex designs. The topical coverage of the text, an applied approach, in-depth coverage of popular statistical procedures, and a focus on conducting analyses using R make the text a unique contribution to the power literature. To facilitate application and usability, the text includes ready-to-use R code developed for the text. An accompanying R package called pwr2ppl (available at https://github.com/chrisaberson/pwr2ppl) provides tools for conducting power analyses across each topic covered in the text.},
isbn = {978-1-138-04456-2},
langid = {english},
annotation = {00000}
}
@misc{aert_correcting_2018,
title = {Correcting for {{Publication Bias}} in a {{Meta-Analysis}} with the {{P-uniform}}* {{Method}}},
author = {van Aert, Robbie C. M. and van Assen, Marcel A. L. M.},
year = {2018},
month = oct,
institution = {{MetaArXiv}},
doi = {10.31222/osf.io/zqjr9},
abstract = {Publication bias is a major threat to the validity of a meta-analysis resulting in overestimated effect sizes. We propose an extension and improvement of the publication bias method p-uniform called p-uniform*. P-uniform* improves upon p-uniform in three ways, as it (i) entails a more efficient estimator, (ii) eliminates the overestimation of effect size caused by between-study variance in true effect size, and (iii) enables estimating and testing for the presence of the between-study variance. We compared the statistical properties of p-uniform* with p-uniform, the selection model approach of Hedges (1992), and the random-effects model. Statistical properties of p-uniform* and the selection model approach were comparable and generally outperformed p-uniform and the random-effects model if publication bias was present. We demonstrate that p-uniform* and the selection model approach estimate average effect size and between-study variance rather well with ten or more studies in the meta-analysis when publication bias is not extreme. P-uniform* generally provides more accurate estimates of the between-study variance in meta-analyses containing many studies (e.g., 60 or more) and if publication bias is present. We offer recommendations for applied researchers, provide an R package as well as an easy-to-use web application for applying p-uniform*.},
langid = {american},
keywords = {meta-analysis,Other Statistics and Probability,p-uniform,Physical Sciences and Mathematics,Psychology,publication bias,Quantitative Psychology,selection model approach,Social and Behavioral Sciences,Statistics and Probability}
}
@article{albers_credible_2018,
title = {Credible {{Confidence}}: {{A Pragmatic View}} on the {{Frequentist}} vs {{Bayesian Debate}}},
shorttitle = {Credible {{Confidence}}},
author = {Albers, Casper J. and Kiers, Henk A. L. and van Ravenzwaaij, Don},
year = {2018},
month = aug,
journal = {Collabra: Psychology},
volume = {4},
number = {1},
pages = {31},
publisher = {{The Regents of the University of California}},
issn = {2474-7394},
doi = {10.1525/collabra.149},
abstract = {Article: Credible Confidence: A Pragmatic View on the Frequentist vs Bayesian Debate},
copyright = {Authors who publish with this journal agree to the following terms: Authors retain copyright and grant the journal right of first publication with the work simultaneously licensed under a Creative Commons Attribution License that allows others to share the work with an acknowledgement of the work's authorship and initial publication in this journal. Authors are able to enter into separate, additional contractual arrangements for the non-exclusive distribution of the journal's published version of the work (e.g., post it to an institutional repository or publish it in a book), with an acknowledgement of its initial publication in this journal. Authors are permitted and encouraged to post their work online (e.g., in institutional repositories or on their website) prior to and during the submission process, as it can lead to productive exchanges, as well as earlier and greater citation of published work (See The Effect of Open Access ). All third-party images reproduced on this journal are shared under Educational Fair Use. For more information on Educational Fair Use , please see this useful checklist prepared by Columbia University Libraries . All copyright of third-party content posted here for research purposes belongs to its original owners. Unless otherwise stated all references to characters and comic art presented on this journal are \textcopyright, \textregistered{} or \texttrademark{} of their respective owners. No challenge to any owner's rights is intended or should be inferred.},
langid = {english}
}
@article{albers_when_2018,
title = {When Power Analyses Based on Pilot Data Are Biased: {{Inaccurate}} Effect Size Estimators and Follow-up Bias},
shorttitle = {When Power Analyses Based on Pilot Data Are Biased},
author = {Albers, Casper J. and Lakens, Dani{\"e}l},
year = {2018},
journal = {Journal of Experimental Social Psychology},
volume = {74},
pages = {187--195},
issn = {0022-1031},
doi = {10.1016/j.jesp.2017.09.004},
abstract = {When designing a study, the planned sample size is often based on power analyses. One way to choose an effect size for power analyses is by relying on pilot data. A-priori power analyses are only accurate when the effect size estimate is accurate. In this paper we highlight two sources of bias when performing a-priori power analyses for between-subject designs based on pilot data. First, we examine how the choice of the effect size index ({$\eta$}2, {$\omega$}2 and {$\epsilon$}2) affects the sample size and power of the main study. Based on our observations, we recommend against the use of {$\eta$}2 in a-priori power analyses. Second, we examine how the maximum sample size researchers are willing to collect in a main study (e.g. due to time or financial constraints) leads to overestimated effect size estimates in the studies that are performed. Determining the required sample size exclusively based on the effect size estimates from pilot data, and following up on pilot studies only when the sample size estimate for the main study is considered feasible, creates what we term follow-up bias. We explain how follow-up bias leads to underpowered main studies. Our simulations show that designing main studies based on effect sizes estimated from small pilot studies does not yield desired levels of power due to accuracy bias and follow-up bias, even when publication bias is not an issue. We urge researchers to consider alternative approaches to determining the sample size of their studies, and discuss several options.},
keywords = {Effect size,Epsilon-squared,Eta-squared,Follow-up bias,Omega-squared,Power analysis}
}
@article{albers_when_2018-1,
title = {When Power Analyses Based on Pilot Data Are Biased: {{Inaccurate}} Effect Size Estimators and Follow-up Bias},
shorttitle = {When Power Analyses Based on Pilot Data Are Biased},
author = {Albers, Casper J. and Lakens, Dani{\"e}l},
year = {2018},
journal = {Journal of Experimental Social Psychology},
volume = {74},
pages = {187--195},
issn = {0022-1031},
doi = {10.1016/j.jesp.2017.09.004},
abstract = {When designing a study, the planned sample size is often based on power analyses. One way to choose an effect size for power analyses is by relying on pilot data. A-priori power analyses are only accurate when the effect size estimate is accurate. In this paper we highlight two sources of bias when performing a-priori power analyses for between-subject designs based on pilot data. First, we examine how the choice of the effect size index ({$\eta$}2, {$\omega$}2 and {$\epsilon$}2) affects the sample size and power of the main study. Based on our observations, we recommend against the use of {$\eta$}2 in a-priori power analyses. Second, we examine how the maximum sample size researchers are willing to collect in a main study (e.g. due to time or financial constraints) leads to overestimated effect size estimates in the studies that are performed. Determining the required sample size exclusively based on the effect size estimates from pilot data, and following up on pilot studies only when the sample size estimate for the main study is considered feasible, creates what we term follow-up bias. We explain how follow-up bias leads to underpowered main studies. Our simulations show that designing main studies based on effect sizes estimated from small pilot studies does not yield desired levels of power due to accuracy bias and follow-up bias, even when publication bias is not an issue. We urge researchers to consider alternative approaches to determining the sample size of their studies, and discuss several options.},
keywords = {Effect size,Epsilon-squared,Eta-squared,Follow-up bias,Omega-squared,Power analysis}
}
@article{aldrich_r_1997,
title = {R.{{A}}. {{Fisher}} and the Making of Maximum Likelihood 1912-1922},
author = {Aldrich, John},
year = {1997},
month = sep,
journal = {Statistical Science},
volume = {12},
number = {3},
pages = {162--176},
publisher = {{Institute of Mathematical Statistics}},
issn = {0883-4237, 2168-8745},
doi = {10.1214/ss/1030037906},
abstract = {In 1922 R. A. Fisher introduced the method of maximum likelihood. He first presented the numerical procedure in 1912. This paper considers Fisher's changing justifications for the method, the concepts he developed around it (including likelihood, sufficiency, efficiency and information) and the approaches he discarded (including inverse probability).},
keywords = {Bayes's postulate,efficiency,Fisher,Information,inverse probability,maximum likelihood,Pearson,student,sufficiency}
}
@article{allison_power_1997,
title = {Power and Money: {{Designing}} Statistically Powerful Studies While Minimizing Financial Costs},
shorttitle = {Power and Money},
author = {Allison, David B. and Allison, Ronald L. and Faith, Myles S. and Paultre, Furcy and {Pi-Sunyer}, F. Xavier},
year = {1997},
journal = {Psychological Methods},
volume = {2},
number = {1},
pages = {20--33},
issn = {1939-1463(Electronic),1082-989X(Print)},
doi = {10.1037/1082-989X.2.1.20},
abstract = {Adequate statistical power is increasingly demanded in research designs. However, obtaining adequate research funding is increasingly difficult. This places researchers in a difficult position. In response, the authors advocate an approach to designing studies that considers statistical power and financial concerns simultaneously. Their purpose is twofold: (a) to introduce the general paradigm of cost optimization in the context of power analysis and (b) to present techniques for such optimization. Techniques are presented in the context of a randomized clinical trial. The authors consider (a) selecting optimal cutpoints for subject screening tests; (b) optimally allocating subjects to different treatment conditions; (c) choosing between obtaining more subjects or taking more replicate measurements; and (d) using prerandomization covariates. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
keywords = {Cost Containment,Experimentation,Experimenters,Statistical Power}
}
@article{altman_statistics_1995,
title = {Statistics Notes: {{Absence}} of Evidence Is Not Evidence of Absence},
shorttitle = {Statistics Notes},
author = {Altman, Douglas G. and Bland, J. Martin},
year = {1995},
month = aug,
journal = {BMJ},
volume = {311},
number = {7003},
pages = {485},
issn = {0959-8138, 1468-5833},
doi = {10.1136/bmj.311.7003.485},
abstract = {The non-equivalence of statistical significance and clinical importance has long been recognised, but this error of interpretation remains common. Although a significant result in a large study may sometimes not be clinically important, a far greater problem arises from misinterpretation of non-significant findings. By convention a P value greater than 5\% (P{$>$}0.05) is called ``not significant.'' Randomised controlled clinical trials that do not show a significant difference between the treatments being compared are often called ``negative.'' This term wrongly implies that the study has shown that there is no difference, whereas usually all that has been shown is an absence of evidence of a difference. These are quite different statements. The sample size of controlled trials is generally inadequate, with a consequent lack of power to detect real, and clinically worthwhile, differences in treatment. Freiman et al1 found that only \ldots},
copyright = {\textcopyright{} 1995 BMJ Publishing Group Ltd.},
langid = {english},
pmid = {7647644}
}
@article{anderson_addressing_2017,
title = {Addressing the ``{{Replication Crisis}}'': {{Using Original Studies}} to {{Design Replication Studies}} with {{Appropriate Statistical Power}}},
shorttitle = {Addressing the ``{{Replication Crisis}}''},
author = {Anderson, Samantha F. and Maxwell, Scott E.},
year = {2017},
month = mar,
journal = {Multivariate Behavioral Research},
pages = {1--20},
issn = {0027-3171, 1532-7906},
doi = {10.1080/00273171.2017.1289361},
langid = {english}
}
@incollection{anderson_group_2014,
title = {Group {{Sequential Design}} in {{R}}},
booktitle = {Clinical {{Trial Biostatistics}} and {{Biopharmaceutical Applications}}},
author = {Anderson, Keaven M.},
year = {2014},
pages = {179--209},
publisher = {{CRC Press}},
address = {{New York}},
isbn = {978-1-4822-1218-1}
}
@article{anderson_sample-size_2017,
title = {Sample-Size Planning for More Accurate Statistical Power: {{A}} Method Adjusting Sample Effect Sizes for Publication Bias and Uncertainty},
shorttitle = {Sample-Size Planning for More Accurate Statistical Power},
author = {Anderson, Samantha F. and Kelley, Ken and Maxwell, Scott E.},
year = {2017},
journal = {Psychological science},
volume = {28},
number = {11},
pages = {1547--1562},
publisher = {{Sage Publications Sage CA: Los Angeles, CA}},
doi = {10.1177/0956797617723724}
}
@article{anderson_theres_2016,
title = {There's More than One Way to Conduct a Replication Study: {{Beyond}} Statistical Significance.},
shorttitle = {There's More than One Way to Conduct a Replication Study},
author = {Anderson, Samantha F. and Maxwell, Scott E.},
year = {2016},
journal = {Psychological Methods},
volume = {21},
number = {1},
pages = {1--12},
issn = {1939-1463, 1082-989X},
doi = {10.1037/met0000051},
langid = {english}
}
@article{anvari_not_2021,
title = {Not All Effects Are Indispensable: {{Psychological}} Science Requires Verifiable Lines of Reasoning for Whether an Effect Matters.},
shorttitle = {Not All Effects Are Indispensable},
author = {Anvari, Farid and Kievit, Rogier and Lakens, Daniel and Pennington, Charlotte Rebecca and Przybylski, Andrew K. and Tiokhin, Leo and Wiernik, Brenton M. and Orben, Amy},
year = {2021},
month = jun,
journal = {Perspectives on Psychological Science},
doi = {10.31234/osf.io/g3vtr},
abstract = {Psychological researchers currently lack guidance for how to make claims about and evaluate the practical relevance and significance of observed effect sizes, i.e. whether a finding will have impact when translated to a different context of application. Although psychologists have recently highlighted theoretical justifications for why small effect sizes might be practically relevant, such justifications fail to provide the information necessary for evaluation and falsification. Claims about whether an observed effect size is practically relevant need to consider both the mechanisms amplifying and counteracting practical relevance, as well as the assumptions underlying each mechanism at play. To provide guidance for making claims about whether an observed effect size is practically relevant in such a way that the claims can be systematically evaluated, we present examples of widely applicable mechanisms and the key assumptions needed for justifying whether an observed effect size can be expected to generalize to different contexts. Routine use of these mechanisms to justify claims about practical relevance has the potential to make researchers' claims about generalizability substantially more transparent. This transparency can help move psychological science towards a more rigorous assessment of when psychological findings can be applied in the world.},
langid = {american},
keywords = {benchmarks,effect size,evaluation,Meta-science,practical significance,Social and Behavioral Sciences}
}
@article{anvari_using_2021,
title = {Using Anchor-Based Methods to Determine the Smallest Effect Size of Interest},
author = {Anvari, Farid and Lakens, Dani{\"e}l},
year = {2021},
month = sep,
journal = {Journal of Experimental Social Psychology},
volume = {96},
pages = {104159},
issn = {0022-1031},
doi = {10.1016/j.jesp.2021.104159},
abstract = {Effect sizes are an important outcome of quantitative research, but few guidelines exist that explain how researchers can determine which effect sizes are meaningful. Psychologists often want to study effects that are large enough to make a difference to people's subjective experience. Thus, subjective experience is one way to gauge the meaningfulness of an effect. We propose and illustrate one method for how to quantify the smallest subjectively experienced difference\textemdash the smallest change in an outcome measure that individuals consider to be meaningful enough in their subjective experience such that they are willing to rate themselves as feeling different\textemdash using an anchor-based method with a global rating of change question applied to the positive and negative affect scale. We provide a step-by-step guide for the questions that researchers need to consider in deciding whether and how to use the anchor-based method, and we make explicit the assumptions of the method that future research can examine. For researchers interested in people's subjective experiences, this anchor-based method provides one way to specify a smallest effect size of interest, which allows researchers to interpret observed results in terms of their theoretical and practical significance.},
langid = {english},
keywords = {Minimum important difference,Negative affect,Positive affect,Practical significance,Smallest effect size of interest,Smallest subjectively experienced difference,Subjectively experienced difference}
}
@article{appelbaum_journal_2018,
title = {Journal Article Reporting Standards for Quantitative Research in Psychology: {{The APA Publications}} and {{Communications Board}} Task Force Report.},
shorttitle = {Journal Article Reporting Standards for Quantitative Research in Psychology},
author = {Appelbaum, Mark and Cooper, Harris and Kline, Rex B. and {Mayo-Wilson}, Evan and Nezu, Arthur M. and Rao, Stephen M.},
year = {2018},
month = jan,
journal = {American Psychologist},
volume = {73},
number = {1},
pages = {3},
publisher = {{US: American Psychological Association}},
issn = {1935-990X},
doi = {10.1037/amp0000191}
}
@article{armitage_repeated_1969,
title = {Repeated Significance Tests on Accumulating Data},
author = {Armitage, Peter and McPherson, C. K. and Rowe, B. C.},
year = {1969},
journal = {Journal of the Royal Statistical Society: Series A (General)},
volume = {132},
number = {2},
pages = {235--244},
publisher = {{Wiley Online Library}}
}
@article{arslan_how_2019,
title = {How to {{Automatically Document Data With}} the Codebook {{Package}} to {{Facilitate Data Reuse}}},
author = {Arslan, Ruben C.},
year = {2019},
month = may,
journal = {Advances in Methods and Practices in Psychological Science},
pages = {2515245919838783},
issn = {2515-2459},
doi = {10.1177/2515245919838783},
abstract = {Data documentation in psychology lags behind not only many other disciplines, but also basic standards of usefulness. Psychological scientists often prefer to invest the time and effort that would be necessary to document existing data well in other duties, such as writing and collecting more data. Codebooks therefore tend to be unstandardized and stored in proprietary formats, and they are rarely properly indexed in search engines. This means that rich data sets are sometimes used only once\textemdash by their creators\textemdash and left to disappear into oblivion. Even if they can find an existing data set, researchers are unlikely to publish analyses based on it if they cannot be confident that they understand it well enough. My codebook package makes it easier to generate rich metadata in human- and machine-readable codebooks. It uses metadata from existing sources and automates some tedious tasks, such as documenting psychological scales and reliabilities, summarizing descriptive statistics, and identifying patterns of missingness. The codebook R package and Web app make it possible to generate a rich codebook in a few minutes and just three clicks. Over time, its use could lead to psychological data becoming findable, accessible, interoperable, and reusable, thereby reducing research waste and benefiting both its users and the scientific community as a whole.},
langid = {english}
}
@book{babbage_reflections_1830,
title = {Reflections on the {{Decline}} of {{Science}} in {{England}}: {{And}} on {{Some}} of {{Its Causes}}},
shorttitle = {Reflections on the {{Decline}} of {{Science}} in {{England}}},
author = {Babbage, Charles},
year = {1830},
publisher = {{B. Fellowes}},
abstract = {Book digitized by Google and uploaded to the Internet Archive by user tpb.},
collaborator = {{unknown library}},
langid = {english}
}
@article{bacchetti_current_2010,
title = {Current Sample Size Conventions: {{Flaws}}, Harms, and Alternatives},
shorttitle = {Current Sample Size Conventions},
author = {Bacchetti, Peter},
year = {2010},
month = mar,
journal = {BMC Medicine},
volume = {8},
number = {1},
pages = {17},
issn = {1741-7015},
doi = {10.1186/1741-7015-8-17},
abstract = {The belief remains widespread that medical research studies must have statistical power of at least 80\% in order to be scientifically sound, and peer reviewers often question whether power is high enough.},
keywords = {Current Convention,Inadequate Sample Size,Information Method,Marginal Return,Sample Size Planning}
}
@article{bacchetti_simple_2008,
title = {Simple, {{Defensible Sample Sizes Based}} on {{Cost Efficiency}}},
author = {Bacchetti, Peter and McCulloch, Charles E. and Segal, Mark R.},
year = {2008},
journal = {Biometrics},
volume = {64},
number = {2},
pages = {577--585},
issn = {1541-0420},
doi = {10.1111/j.1541-0420.2008.01004_1.x},
abstract = {The conventional approach of choosing sample size to provide 80\% or greater power ignores the cost implications of different sample size choices. Costs, however, are often impossible for investigators and funders to ignore in actual practice. Here, we propose and justify a new approach for choosing sample size based on cost efficiency, the ratio of a study's projected scientific and/or practical value to its total cost. By showing that a study's projected value exhibits diminishing marginal returns as a function of increasing sample size for a wide variety of definitions of study value, we are able to develop two simple choices that can be defended as more cost efficient than any larger sample size. The first is to choose the sample size that minimizes the average cost per subject. The second is to choose sample size to minimize total cost divided by the square root of sample size. This latter method is theoretically more justifiable for innovative studies, but also performs reasonably well and has some justification in other cases. For example, if projected study value is assumed to be proportional to power at a specific alternative and total cost is a linear function of sample size, then this approach is guaranteed either to produce more than 90\% power or to be more cost efficient than any sample size that does. These methods are easy to implement, based on reliable inputs, and well justified, so they should be regarded as acceptable alternatives to current conventional approaches.},
langid = {english},
keywords = {Innovation,Peer review,Power,Research funding,Study design},
annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.1541-0420.2008.01004\_1.x}
}
@book{baguley_serious_2012,
title = {Serious Stats: A Guide to Advanced Statistics for the Behavioral Sciences},
shorttitle = {Serious Stats},
author = {Baguley, Thom},
year = {2012},
publisher = {{Palgrave Macmillan}},
address = {{Houndmills, Basingstoke, Hampshire [England] ; New York}},
isbn = {978-0-230-57717-6 978-0-230-57718-3},
lccn = {BF39 .B3175 2012},
keywords = {Psychology,Psychometrics,Social sciences,Statistical methods}
}
@article{baguley_standardized_2009,
title = {Standardized or Simple Effect Size: {{What}} Should Be Reported?},
shorttitle = {Standardized or Simple Effect Size},
author = {Baguley, Thom},
year = {2009},
month = aug,
journal = {British Journal of Psychology},
volume = {100},
number = {3},
pages = {603--617},
issn = {2044-8295},
doi = {10.1348/000712608X377117},
abstract = {It is regarded as best practice for psychologists to report effect size when disseminating quantitative research findings. Reporting of effect size in the psychological literature is patchy \textendash{} though this may be changing \textendash{} and when reported it is far from clear that appropriate effect size statistics are employed. This paper considers the practice of reporting point estimates of standardized effect size and explores factors such as reliability, range restriction and differences in design that distort standardized effect size unless suitable corrections are employed. For most purposes simple (unstandardized) effect size is more robust and versatile than standardized effect size. Guidelines for deciding what effect size metric to use and how to report it are outlined. Foremost among these are: (i) a preference for simple effect size over standardized effect size, and (ii) the use of confidence intervals to indicate a plausible range of values the effect might take. Deciding on the appropriate effect size statistic to report always requires careful thought and should be influenced by the goals of the researcher, the context of the research and the potential needs of readers.},
langid = {english}
}
@article{baguley_understanding_2004,
title = {Understanding Statistical Power in the Context of Applied Research},
author = {Baguley, Thom},
year = {2004},
month = mar,
journal = {Applied Ergonomics},
volume = {35},
number = {2},
pages = {73--80},
issn = {0003-6870},
doi = {10.1016/j.apergo.2004.01.002},
abstract = {Estimates of statistical power are widely used in applied research for purposes such as sample size calculations. This paper reviews the benefits of power and sample size estimation and considers several problems with the use of power calculations in applied research that result from misunderstandings or misapplications of statistical power. These problems include the use of retrospective power calculations and standardized measures of effect size. Methods of increasing the power of proposed research that do not involve merely increasing sample size (such as reduction in measurement error, increasing `dose' of the independent variable and optimizing the design) are noted. It is concluded that applied researchers should consider a broader range of factors (other than sample size) that influence statistical power, and that the use of standardized measures of effect size should be avoided (except as intermediate stages in prospective power or sample size calculations).},
langid = {english},
keywords = {Applied research,Experimental design,Statistical power}
}
@article{bakan_test_1966,
title = {The Test of Significance in Psychological Research.},
author = {Bakan, David},
year = {1966},
journal = {Psychological bulletin},
volume = {66},
number = {6},
pages = {423--437},
doi = {10.1037/h0020412}
}
@article{bakker_recommendations_2020,
title = {Recommendations in Pre-Registrations and Internal Review Board Proposals Promote Formal Power Analyses but Do Not Increase Sample Size},
author = {Bakker, Marjan and Veldkamp, Coosje L. S. and van den Akker, Olmo R. and van Assen, Marcel A. L. M. and Crompvoets, Elise and Ong, How Hwee and Wicherts, Jelte M.},
year = {2020},
month = jul,
journal = {PLOS ONE},
volume = {15},
number = {7},
pages = {e0236079},
publisher = {{Public Library of Science}},
issn = {1932-6203},
doi = {10.1371/journal.pone.0236079},
abstract = {In this preregistered study, we investigated whether the statistical power of a study is higher when researchers are asked to make a formal power analysis before collecting data. We compared the sample size descriptions from two sources: (i) a sample of pre-registrations created according to the guidelines for the Center for Open Science Preregistration Challenge (PCRs) and a sample of institutional review board (IRB) proposals from Tilburg School of Behavior and Social Sciences, which both include a recommendation to do a formal power analysis, and (ii) a sample of pre-registrations created according to the guidelines for Open Science Framework Standard Pre-Data Collection Registrations (SPRs) in which no guidance on sample size planning is given. We found that PCRs and IRBs (72\%) more often included sample size decisions based on power analyses than the SPRs (45\%). However, this did not result in larger planned sample sizes. The determined sample size of the PCRs and IRB proposals (Md = 90.50) was not higher than the determined sample size of the SPRs (Md = 126.00; W = 3389.5, p = 0.936). Typically, power analyses in the registrations were conducted with G*power, assuming a medium effect size, {$\alpha$} = .05 and a power of .80. Only 20\% of the power analyses contained enough information to fully reproduce the results and only 62\% of these power analyses pertained to the main hypothesis test in the pre-registration. Therefore, we see ample room for improvements in the quality of the registrations and we offer several recommendations to do so.},
langid = {english},
keywords = {Analysis of variance,Computer software,Linear regression analysis,Metaanalysis,Open science,Psychology,Research ethics,Social sciences}
}
@article{ball_effects_2002,
title = {Effects of Cognitive Training Interventions with Older Adults: A Randomized Controlled Trial},
shorttitle = {Effects of Cognitive Training Interventions with Older Adults},
author = {Ball, Karlene and Berch, Daniel B. and Helmers, Karin F. and Jobe, Jared B. and Leveck, Mary D. and Marsiske, Michael and Morris, John N. and Rebok, George W. and Smith, David M. and Tennstedt, Sharon L.},
year = {2002},
journal = {Jama},
volume = {288},
number = {18},
pages = {2271--2281},
publisher = {{American Medical Association}}
}
@book{barber_pitfalls_1976,
title = {Pitfalls in {{Human Research}}: {{Ten Pivotal Points}}},
shorttitle = {Pitfalls in {{Human Research}}},
author = {Barber, Theodore Xenophon},
year = {1976},
publisher = {{Pergamon Press}},
googlebooks = {UBN9AAAAMAAJ},
isbn = {978-0-08-020935-7},
langid = {english},
keywords = {Psychology / General}
}
@article{bartos_z-curve20_2020,
title = {Z-{{Curve}}.2.0: {{Estimating Replication Rates}} and {{Discovery Rates}}},
shorttitle = {Z-{{Curve}}.2.0},
author = {Barto{\v s}, Franti{\v s}ek and Schimmack, Ulrich},
year = {2020},
month = jan,
publisher = {{PsyArXiv}},
doi = {10.31234/osf.io/urgtn},
abstract = {This article introduces z-curve.2.0 as a method that estimates the expected replication rate (ERR) and the expected discovery rate (EDR) based on the test-statistics of studies selected for significance. Z-curve.2.0 extends the work by Brunner and Schimmack (2019) in several ways. First, we show that a new estimation method using expectation-maximization outperforms the kernel-density approach of z-curve.1.0. Second, we examine the coverage of bootstrapped confidence intervals to provide information about the uncertainty in z-curve estimates. Third, we extended z-curve to estimate the number of all studies that were conducted, including studies with non-significant results that may not have been reported, solely on the basis of significant results. This allows us to estimate the EDR; that is, the percentage of significant results that were obtained in all studies. EDR can be used to assess the size of the file-drawer, estimate the maximum number of false positive results, and may provide a better estimate of the success rate in actual replication studies than the ERR because exact replications are impossible.}
}
@article{bauer_unifying_1996,
title = {A Unifying Approach for Confidence Intervals and Testing of Equivalence and Difference},
author = {Bauer, Peter and Kieser, Meinhard},
year = {1996},
journal = {Biometrika},
volume = {83},
number = {4},
pages = {934--937}
}
@book{bausell_power_2002,
title = {Power {{Analysis}} for {{Experimental Research}}: {{A Practical Guide}} for the {{Biological}}, {{Medical}} and {{Social Sciences}}},
shorttitle = {Power {{Analysis}} for {{Experimental Research}}},
author = {Bausell, R. Barker and Li, Yu-Fang},
year = {2002},
month = sep,
edition = {1st edition},
publisher = {{Cambridge University Press}},
abstract = {Power analysis is an essential tool for determining whether a statistically significant result can be expected in a scientific experiment prior to the experiment being performed. Many funding agencies and institutional review boards now require power analyses to be carried out before they will approve experiments, particularly where they involve the use of human subjects. This comprehensive, yet accessible, book provides practising researchers with step-by-step instructions for conducting power/sample size analyses, assuming only basic prior knowledge of summary statistics and the normal distribution. It contains a unified approach to statistical power analysis, with numerous easy-to-use tables to guide the reader without the need for further calculations or statistical expertise. This will be an indispensable text for researchers and graduates in the medical and biological sciences needing to apply power analysis in the design of their experiments.},
langid = {english}
}
@book{bausell_power_2002-1,
title = {Power Analysis for Experimental Research: A Practical Guide for the Biological, Medical and Social Sciences},
shorttitle = {Power Analysis for Experimental Research},
author = {Bausell, R. Barker and Li, Yu-Fang},
year = {2002},
publisher = {{Cambridge University Press}}
}
@article{bayarri_rejection_2016,
title = {Rejection Odds and Rejection Ratios: {{A}} Proposal for Statistical Practice in Testing Hypotheses},
shorttitle = {Rejection Odds and Rejection Ratios},
author = {Bayarri, M.J. and Benjamin, Daniel J. and Berger, James O. and Sellke, Thomas M.},
year = {2016},
month = jun,
journal = {Journal of Mathematical Psychology},
volume = {72},
pages = {90--103},
issn = {00222496},
doi = {10.1016/j.jmp.2015.12.007},
langid = {english},
keywords = {Bayes factors,Bayesian,Frequentist,Odds}
}
@incollection{becker_failsafe_2005,
title = {Failsafe {{N}} or {{File-Drawer Number}}},
booktitle = {Publication {{Bias}} in {{Meta-Analysis}}},
author = {Becker, Betsy Jane},
year = {2005},
pages = {111--125},
publisher = {{John Wiley \& Sons, Ltd}},
doi = {10.1002/0470870168.ch7},
abstract = {This chapter contains sections titled: Introduction Definition of the Failsafe N Examples Assumptions of the Failsafe N Variations on the Failsafe N Summary of the Examples Applications of the Failsafe N Conclusions Acknowledgement References},
chapter = {7},
isbn = {978-0-470-87016-7},
langid = {english},
keywords = {'file-drawer' analysis,‘offset publication bias’,augmented Fisher test statistic,failsafe N or file-drawer number,file-drawer effect,Raudenbush's teacher expectancy data set,Stouffer tests},
annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/0470870168.ch7}
}
@article{bem_feeling_2011,
title = {Feeling the Future: Experimental Evidence for Anomalous Retroactive Influences on Cognition and Affect},
shorttitle = {Feeling the Future},
author = {Bem, Daryl J.},
year = {2011},
month = mar,
journal = {Journal of Personality and Social Psychology},
volume = {100},
number = {3},
pages = {407--425},
issn = {1939-1315},
doi = {10.1037/a0021524},
abstract = {The term psi denotes anomalous processes of information or energy transfer that are currently unexplained in terms of known physical or biological mechanisms. Two variants of psi are precognition (conscious cognitive awareness) and premonition (affective apprehension) of a future event that could not otherwise be anticipated through any known inferential process. Precognition and premonition are themselves special cases of a more general phenomenon: the anomalous retroactive influence of some future event on an individual's current responses, whether those responses are conscious or nonconscious, cognitive or affective. This article reports 9 experiments, involving more than 1,000 participants, that test for retroactive influence by "time-reversing" well-established psychological effects so that the individual's responses are obtained before the putatively causal stimulus events occur. Data are presented for 4 time-reversed effects: precognitive approach to erotic stimuli and precognitive avoidance of negative stimuli; retroactive priming; retroactive habituation; and retroactive facilitation of recall. The mean effect size (d) in psi performance across all 9 experiments was 0.22, and all but one of the experiments yielded statistically significant results. The individual-difference variable of stimulus seeking, a component of extraversion, was significantly correlated with psi performance in 5 of the experiments, with participants who scored above the midpoint on a scale of stimulus seeking achieving a mean effect size of 0.43. Skepticism about psi, issues of replication, and theories of psi are also discussed.},
langid = {english},
pmid = {21280961},
keywords = {Affect,Awareness,Boredom,Cognition,Erotica,Escape Reaction,Female,Habituation; Psychophysiologic,Humans,Male,Mental Recall,Parapsychology,Subliminal Stimulation,Time Factors}
}
@article{bem_feeling_2015,
title = {Feeling the Future: {{A}} Meta-Analysis of 90 Experiments on the Anomalous Anticipation of Random Future Events},
shorttitle = {Feeling the Future},
author = {Bem, Daryl and Tressoldi, Patrizio and Rabeyron, Thomas and Duggan, Michael},
year = {2015},
month = oct,
journal = {F1000Research},
issn = {2046-1402},
doi = {10.12688/f1000research.7177.1},
langid = {english}
}
@article{bem_must_2011,
title = {Must Psychologists Change the Way They Analyze Their Data?},
author = {Bem, Daryl J. and Utts, Jessica and Johnson, Wesley O.},
year = {2011},
month = oct,
journal = {Journal of Personality and Social Psychology},
volume = {101},
number = {4},
pages = {716--719},
issn = {1939-1315},
doi = {10.1037/a0024777},
abstract = {Wagenmakers, Wetzels, Borsboom, and van der Maas (2011) argued that psychologists should replace the familiar "frequentist" statistical analyses of their data with bayesian analyses. To illustrate their argument, they reanalyzed a set of psi experiments published recently in this journal by Bem (2011), maintaining that, contrary to his conclusion, his data do not yield evidence in favor of the psi hypothesis. We argue that they have incorrectly selected an unrealistic prior distribution for their analysis and that a bayesian analysis using a more reasonable distribution yields strong evidence in favor of the psi hypothesis. More generally, we argue that there are advantages to bayesian analyses that merit their increased use in the future. However, as Wagenmakers et al.'s analysis inadvertently revealed, they contain hidden traps that must be better understood before being more widely substituted for the familiar frequentist analyses currently employed by most research psychologists.},
langid = {english},
pmid = {21928916},
keywords = {Data Interpretation; Statistical,Humans,Psychology}
}
@article{ben-shachar_effectsize_2020,
title = {Effectsize: {{Estimation}} of {{Effect Size Indices}} and {{Standardized Parameters}}},
shorttitle = {Effectsize},
author = {{Ben-Shachar}, Mattan S. and L{\"u}decke, Daniel and Makowski, Dominique},
year = {2020},
month = dec,
journal = {Journal of Open Source Software},
volume = {5},
number = {56},
pages = {2815},
issn = {2475-9066},
doi = {10.21105/joss.02815},
abstract = {Ben-Shachar et al., (2020). effectsize: Estimation of Effect Size Indices and Standardized Parameters. Journal of Open Source Software, 5(56), 2815, https://doi.org/10.21105/joss.02815},
langid = {english}
}
@article{benjamini_controlling_1995,
title = {Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing},
shorttitle = {Controlling the False Discovery Rate},
author = {Benjamini, Yoav and Hochberg, Yosef},
year = {1995},
journal = {Journal of the royal statistical society. Series B (Methodological)},
pages = {289--300}
}
@article{benjamini_its_2016,
title = {It's {{Not}} the p-Values' {{Fault}}},
author = {Benjamini, Yoav},
year = {2016},
journal = {The American Statistician: Supplemental Material to the ASA Statement on P-Values and Statistical Significance},
volume = {70},
pages = {1--2}
}
@article{berger_interplay_2004,
title = {The {{Interplay}} of {{Bayesian}} and {{Frequentist Analysis}}},
author = {Berger, J. O. and Bayarri, M. J.},
year = {2004},
month = feb,
journal = {Statistical Science},
volume = {19},
number = {1},
pages = {58--80},
issn = {0883-4237},
doi = {10.1214/088342304000000116},
langid = {english}
}
@book{berkeley_defence_1735,
title = {A Defence of Free-Thinking in Mathematics, in Answer to a Pamphlet of {{Philalethes Cantabrigiensis}} Entitled {{Geometry No Friend}} to {{Infidelity}}. {{Also}} an Appendix Concerning Mr. {{Walton}}'s {{Vindication}} of the Principles of Fluxions against the Objections Contained in {{The}} Analyst. {{By}} the Author of {{The}} Minute Philosopher},
author = {Berkeley, George},
year = {1735},
volume = {3}
}
@book{berkeley_defence_1735-1,
title = {A Defence of Free-Thinking in Mathematics, in Answer to a Pamphlet of {{Philalethes Cantabrigiensis}} Entitled {{Geometry No Friend}} to {{Infidelity}}. {{Also}} an Appendix Concerning Mr. {{Walton}}'s {{Vindication}} of the Principles of Fluxions against the Objections Contained in {{The}} Analyst. {{By}} the Author of {{The}} Minute Philosopher},
author = {Berkeley, George},
year = {1735},
volume = {3}
}
@article{bigby_understanding_2014,
title = {Understanding and Evaluating Systematic Reviews and Meta-Analyses},
author = {Bigby, Michael},
year = {2014},
month = mar,
journal = {Indian Journal of Dermatology},
volume = {59},
number = {2},
pages = {134},
issn = {0019-5154},
doi = {10.4103/0019-5154.127671},
abstract = {A systematic review is a summary of existing evidence that answers a specific clinical question, contains a thorough, unbiased search of the relevant literature, explicit criteria for assessing studies and structured presentation of the results. A systematic review that incorporates quantitative pooling of similar studies to produce an overall summary of treatment effects is a meta-analysis. A systematic review should have clear, focused clinical objectives containing four elements expressed through the acronym PICO ( \textbf{P}atient, group of \textbf{p}atients, or \textbf{p}roblem, an \textbf{I}ntervention, a \textbf{C}omparison intervention and specific \textbf{O} utcomes). Explicit and thorough search of the literature is a pre-requisite of any good systematic review. Reviews should have pre-defined explicit criteria for what studies would be included and the analysis should include only those studies that fit the inclusion criteria. The quality (risk of bias) of the primary studies should be critically appraised. Particularly the role of publication and language bias should be acknowledged and addressed by the review, whenever possible. Structured reporting of the results with quantitative pooling of the data must be attempted, whenever appropriate. The review should include interpretation of the data, including implications for clinical practice and further research. Overall, the current quality of reporting of systematic reviews remains highly variable.},
langid = {english},
pmid = {24700930}
}
@article{bishop_fallibility_2018,
title = {Fallibility in {{Science}}: {{Responding}} to {{Errors}} in the {{Work}} of {{Oneself}} and {{Others}}},
shorttitle = {Fallibility in {{Science}}},
author = {Bishop, D. V. M.},
year = {2018},
month = jul,
journal = {Advances in Methods and Practices in Psychological Science},
pages = {2515245918776632},
issn = {2515-2459},
doi = {10.1177/2515245918776632},
langid = {english},
annotation = {00000}
}
@book{bland_introduction_2015,
title = {An Introduction to Medical Statistics},
author = {Bland, Martin},
year = {2015},
series = {Oxford Medical Publications},
edition = {Fourth edition},
publisher = {{Oxford University Press}},
address = {{Oxford}},
isbn = {978-0-19-958992-0},
langid = {english},
lccn = {RA409 .B55 2015},
keywords = {Medical statistics}
}
@incollection{blume_likelihood_2011,
title = {Likelihood and Its {{Evidential Framework}}},
booktitle = {Philosophy of {{Statistics}}},
author = {Blume, Jeffrey D.},
year = {2011},
pages = {493--511},
publisher = {{Elsevier}},
doi = {10.1016/B978-0-444-51862-0.50014-9},
isbn = {978-0-444-51862-0},
langid = {english}
}
@book{borenstein_introduction_2009,
title = {Introduction to Meta-Analysis},
editor = {Borenstein, Michael},
year = {2009},
publisher = {{John Wiley \& Sons}},
address = {{Chichester, U.K}},
abstract = {This text provides a concise and clearly presented discussion of all the elements in a meta-analysis. It is illustrated with worked examples throughout, with visual explanations, using screenshots from Excel spreadsheets and computer programs such as Comprehensive Meta-Analysis (CMA) or Strata},
isbn = {978-0-470-05724-7},
lccn = {R853.M48 I58 2009},
keywords = {Meta-analysis,Meta-Analysis as Topic}
}
@article{bosco_correlational_2015,
title = {Correlational Effect Size Benchmarks},
author = {Bosco, Frank A. and Aguinis, Herman and Singh, Kulraj and Field, James G. and Pierce, Charles A.},
year = {2015},
month = mar,
journal = {The Journal of Applied Psychology},
volume = {100},
number = {2},
pages = {431--449},
issn = {1939-1854},
doi = {10.1037/a0038047},
abstract = {Effect size information is essential for the scientific enterprise and plays an increasingly central role in the scientific process. We extracted 147,328 correlations and developed a hierarchical taxonomy of variables reported in Journal of Applied Psychology and Personnel Psychology from 1980 to 2010 to produce empirical effect size benchmarks at the omnibus level, for 20 common research domains, and for an even finer grained level of generality. Results indicate that the usual interpretation and classification of effect sizes as small, medium, and large bear almost no resemblance to findings in the field, because distributions of effect sizes exhibit tertile partitions at values approximately one-half to one-third those intuited by Cohen (1988). Our results offer information that can be used for research planning and design purposes, such as producing better informed non-nil hypotheses and estimating statistical power and planning sample size accordingly. We also offer information useful for understanding the relative importance of the effect sizes found in a particular study in relationship to others and which research domains have advanced more or less, given that larger effect sizes indicate a better understanding of a phenomenon. Also, our study offers information about research domains for which the investigation of moderating effects may be more fruitful and provide information that is likely to facilitate the implementation of Bayesian analysis. Finally, our study offers information that practitioners can use to evaluate the relative effectiveness of various types of interventions.},
langid = {english},
pmid = {25314367},
keywords = {Behavioral Research,Benchmarking,Data Interpretation; Statistical,Humans}
}
@article{brown_errors_1983,
title = {Errors, {{Types I}} and {{II}}},
author = {Brown, George W.},
year = {1983},
month = jun,
journal = {American Journal of Diseases of Children},
volume = {137},
number = {6},
pages = {586--591},
issn = {0002-922X},
doi = {10.1001/archpedi.1983.02140320062014},
abstract = {\textbullet{} The practicing physician and the clinical investigator regularly confront therapeutic trials, diagnostic tests, and other hypothesis-testing situations. The clinical literature increasingly displays statistical notations and concepts related to decision making in medicine. For these reasons, the physician is obligated to have some familiarity with the principles behind the null hypothesis, Type I and II errors, statistical power, and related elements of hypothesis testing.(Am J Dis Child 1983;137:586-591)}
}
@article{brown_grim_2017,
title = {The {{GRIM Test}}: {{A Simple Technique Detects Numerous Anomalies}} in the {{Reporting}} of {{Results}} in {{Psychology}}},
shorttitle = {The {{GRIM Test}}},
author = {Brown, Nicholas J. L. and Heathers, James A. J.},
year = {2017},
month = may,
journal = {Social Psychological and Personality Science},
volume = {8},
number = {4},
pages = {363--369},
issn = {1948-5506},
doi = {10.1177/1948550616673876},
abstract = {We present a simple mathematical technique that we call granularity-related inconsistency of means (GRIM) for verifying the summary statistics of research reports in psychology. This technique evaluates whether the reported means of integer data such as Likert-type scales are consistent with the given sample size and number of items. We tested this technique with a sample of 260 recent empirical articles in leading journals. Of the articles that we could test with the GRIM technique (N = 71), around half (N = 36) appeared to contain at least one inconsistent mean, and more than 20\% (N = 16) contained multiple such inconsistencies. We requested the data sets corresponding to 21 of these articles, receiving positive responses in 9 cases. We confirmed the presence of at least one reporting error in all cases, with three articles requiring extensive corrections. The implications for the reliability and replicability of empirical psychology are discussed.},
langid = {english}
}
@article{brunner_estimating_2020,
title = {Estimating {{Population Mean Power Under Conditions}} of {{Heterogeneity}} and {{Selection}} for {{Significance}}},
author = {Brunner, Jerry and Schimmack, Ulrich},
year = {2020},
month = may,
journal = {Meta-Psychology},
volume = {4},
issn = {2003-2714},
doi = {10.15626/MP.2018.874},
abstract = {In scientific fields that use significance tests, statistical power is important for successful replications of significant results because it is the long-run success rate in a series of exact replication studies. For any population of significant results, there is a population of power values of the statistical tests on which conclusions are based. We give exact theoretical results showing how selection for significance affects the distribution of statistical power in a heterogeneous population of significance tests. In a set of large-scale simulation studies, we compare four methods for estimating population mean power of a set of studies selected for significance (a maximum likelihood model, extensions of p-curve and p-uniform, \& z-curve). The p-uniform and p-curve methods performed well with a fixed effects size and varying sample sizes. However, when there was substantial variability in effect sizes as well as sample sizes, both methods systematically overestimate mean power. With heterogeneity in effect sizes, the maximum likelihood model produced the most accurate estimates when the distribution of effect sizes matched the assumptions of the model, but z-curve produced more accurate estimates when the assumptions of the maximum likelihood model were not met. We recommend the use of z-curve to estimate the typical power of significant results, which has implications for the replicability of significant results in psychology journals.},
copyright = {Copyright (c) 2020 Jerry Brunner, Ulrich Schimmack},
langid = {english},
keywords = {Effect size,Maximum likelihood,Meta-analysis,P-curve,P-uniform,Post-hoc power analysis,Power estimation,Publication bias,Replicability,Z-curve}
}
@article{bryan_behavioural_2021,
title = {Behavioural Science Is Unlikely to Change the World without a Heterogeneity Revolution},
author = {Bryan, Christopher J. and Tipton, Elizabeth and Yeager, David S.},
year = {2021},
month = jul,
journal = {Nature Human Behaviour},
pages = {1--10},
publisher = {{Nature Publishing Group}},
issn = {2397-3374},
doi = {10.1038/s41562-021-01143-3},
abstract = {In the past decade, behavioural science has gained influence in policymaking but suffered a crisis of confidence in the replicability of its findings. Here, we describe a nascent heterogeneity revolution that we believe these twin historical trends have triggered. This revolution will be defined by the recognition that most treatment effects are heterogeneous, so the variation in effect estimates across studies that defines the replication crisis is to be expected as long as heterogeneous effects are studied without a systematic approach to sampling and moderation. When studied systematically, heterogeneity can be leveraged to build more complete theories of causal mechanism that could inform nuanced and dependable guidance to policymakers. We recommend investment in shared research infrastructure to make it feasible to study behavioural interventions in heterogeneous and generalizable samples, and suggest low-cost steps researchers can take immediately to avoid being misled by heterogeneity and begin to learn from it instead.},
copyright = {2021 Springer Nature Limited},
langid = {english},
annotation = {Bandiera\_abtest: a Cg\_type: Nature Research Journals Primary\_atype: Reviews Subject\_term: Human behaviour;Policy;Research management;Science, technology and society Subject\_term\_id: human-behaviour;policy;research-management;science-technology-and-society}
}
@article{brysbaert_how_2019,
title = {How Many Participants Do We Have to Include in Properly Powered Experiments? {{A}} Tutorial of Power Analysis with Reference Tables},
shorttitle = {How Many Participants Do We Have to Include in Properly Powered Experiments?},
author = {Brysbaert, Marc},
year = {2019},
month = jul,
journal = {Journal of Cognition},
volume = {2},
number = {1},
pages = {16},
issn = {2514-4820},
doi = {10.5334/joc.72},
abstract = {Article: How many participants do we have to include in properly powered experiments? A tutorial of power analysis with reference tables},
copyright = {Authors who publish with this journal agree to the following terms: Authors retain copyright and grant the journal right of first publication with the work simultaneously licensed under a Creative Commons Attribution License that allows others to share the work with an acknowledgement of the work's authorship and initial publication in this journal. Authors are able to enter into separate, additional contractual arrangements for the non-exclusive distribution of the journal's published version of the work (e.g., post it to an institutional repository or publish it in a book), with an acknowledgement of its initial publication in this journal. Authors are permitted and encouraged to post their work online (e.g., in institutional repositories or on their website) prior to and during the submission process, as it can lead to productive exchanges, as well as earlier and greater citation of published work (See The Effect of Open Access ). All third-party images reproduced on this journal are shared under Educational Fair Use. For more information on Educational Fair Use , please see this useful checklist prepared by Columbia University Libraries . All copyright of third-party content posted here for research purposes belongs to its original owners. Unless otherwise stated all references to characters and comic art presented on this journal are \textcopyright, \textregistered{} or \texttrademark{} of their respective owners. No challenge to any owner's rights is intended or should be inferred.},
langid = {english}
}
@article{brysbaert_how_2019-1,
title = {How Many Words Do We Read per Minute? {{A}} Review and Meta-Analysis of Reading Rate},
shorttitle = {How Many Words Do We Read per Minute?},
author = {Brysbaert, Marc},
year = {2019},
journal = {Journal of Memory and Language},
volume = {109},
pages = {104047},
publisher = {{Elsevier}}
}
@article{brysbaert_power_2018,
title = {Power {{Analysis}} and {{Effect Size}} in {{Mixed Effects Models}}: {{A Tutorial}}},
shorttitle = {Power {{Analysis}} and {{Effect Size}} in {{Mixed Effects Models}}},
author = {Brysbaert, Marc and Stevens, Micha{\"e}l},
year = {2018},
month = jan,
journal = {Journal of Cognition},
volume = {1},
number = {1},
issn = {2514-4820},
doi = {10.5334/joc.10},
abstract = {Article: Power Analysis and Effect Size in Mixed Effects Models: A Tutorial},
copyright = {Authors who publish with this journal agree to the following terms: Authors retain copyright and grant the journal right of first publication with the work simultaneously licensed under a Creative Commons Attribution License that allows others to share the work with an acknowledgement of the work's authorship and initial publication in this journal. Authors are able to enter into separate, additional contractual arrangements for the non-exclusive distribution of the journal's published version of the work (e.g., post it to an institutional repository or publish it in a book), with an acknowledgement of its initial publication in this journal. Authors are permitted and encouraged to post their work online (e.g., in institutional repositories or on their website) prior to and during the submission process, as it can lead to productive exchanges, as well as earlier and greater citation of published work (See The Effect of Open Access ). All third-party images reproduced on this journal are shared under Educational Fair Use. For more information on Educational Fair Use , please see this useful checklist prepared by Columbia University Libraries . All copyright of third-party content posted here for research purposes belongs to its original owners. Unless otherwise stated all references to characters and comic art presented on this journal are \textcopyright, \textregistered{} or \texttrademark{} of their respective owners. No challenge to any owner's rights is intended or should be inferred.},
langid = {english},
annotation = {00012}
}
@misc{buchanan_mote_2017,
title = {{{MOTE}}: {{Effect Size}} and {{Confidence Interval Calculator}}.},
author = {Buchanan, Erin M. and Scofield, J and Valentine, K. D.},
year = {2017}
}
@article{bulus_bound_2021,
title = {Bound {{Constrained Optimization}} of {{Sample Sizes Subject}} to {{Monetary Restrictions}} in {{Planning Multilevel Randomized Trials}} and {{Regression Discontinuity Studies}}},
author = {Bulus, Metin and Dong, Nianbo},
year = {2021},
month = apr,
journal = {The Journal of Experimental Education},
volume = {89},
number = {2},
pages = {379--401},
publisher = {{Routledge}},
issn = {0022-0973},
doi = {10.1080/00220973.2019.1636197},
abstract = {Sample size determination in multilevel randomized trials (MRTs) and multilevel regression discontinuity designs (MRDDs) can be complicated due to multilevel structure, monetary restrictions, differing marginal costs per treatment and control units, and range restrictions in sample size at one or more levels. These issues have sparked a set of studies under optimal design literature where scholars consider sample size determination as an allocation problem. The literature on optimal design of MRTs and MRDDs and their implementation in software packages has been scarce, scattered, and incomplete. This study unifies optimal design literature and extends currently available software under bound constrained optimal sample allocation (BCOSA) framework via bound constrained optimization technique. The BCOSA framework, introduction to the cosa R library, and an illustration that replicates and extends minimum required sample size determination for an evaluation report is provided.},
keywords = {bound constrained optimal sample allocation,conditional optimal design,multilevel randomized trials,multilevel regression discontinuity designs},
annotation = {\_eprint: https://doi.org/10.1080/00220973.2019.1636197}
}
@article{burriss_changes_2015,
title = {Changes in Women's Facial Skin Color over the Ovulatory Cycle Are Not Detectable by the Human Visual System},
author = {Burriss, Robert P. and Troscianko, Jolyon and Lovell, P. George and Fulford, Anthony J. C. and Stevens, Martin and Quigley, Rachael and Payne, Jenny and Saxton, Tamsin K. and Rowland, Hannah M.},
year = {2015},
month = jul,
journal = {PLOS ONE},
volume = {10},
number = {7},
pages = {e0130093},
issn = {1932-6203},
doi = {10.1371/journal.pone.0130093},
abstract = {Human ovulation is not advertised, as it is in several primate species, by conspicuous sexual swellings. However, there is increasing evidence that the attractiveness of women's body odor, voice, and facial appearance peak during the fertile phase of their ovulatory cycle. Cycle effects on facial attractiveness may be underpinned by changes in facial skin color, but it is not clear if skin color varies cyclically in humans or if any changes are detectable. To test these questions we photographed women daily for at least one cycle. Changes in facial skin redness and luminance were then quantified by mapping the digital images to human long, medium, and shortwave visual receptors. We find cyclic variation in skin redness, but not luminance. Redness decreases rapidly after menstrual onset, increases in the days before ovulation, and remains high through the luteal phase. However, we also show that this variation is unlikely to be detectable by the human visual system. We conclude that changes in skin color are not responsible for the effects of the ovulatory cycle on women's attractiveness.},
keywords = {Cameras,Color vision,Estradiol,Estrogens,Face,Luminance,Ovulation,Visual system}
}
@article{button_minimal_2015,
title = {Minimal Clinically Important Difference on the {{Beck Depression Inventory}} - {{II}} According to the Patient's Perspective},
author = {Button, K. S. and Kounali, D. and Thomas, L. and Wiles, N. J. and Peters, T. J. and Welton, N. J. and Ades, A. E. and Lewis, G.},
year = {2015},
month = nov,
journal = {Psychological Medicine},
volume = {45},
number = {15},
pages = {3269--3279},
issn = {0033-2917, 1469-8978},
doi = {10.1017/S0033291715001270},
abstract = {Background The Beck Depression Inventory, 2nd edition (BDI-II) is widely used in research on depression. However, the minimal clinically important difference (MCID) is unknown. MCID can be estimated in several ways. Here we take a patient-centred approach, anchoring the change on the BDI-II to the patient's global report of improvement. Method We used data collected (n = 1039) from three randomized controlled trials for the management of depression. Improvement on a `global rating of change' question was compared with changes in BDI-II scores using general linear modelling to explore baseline dependency, assessing whether MCID is best measured in absolute terms (i.e. difference) or as percent reduction in scores from baseline (i.e. ratio), and receiver operator characteristics (ROC) to estimate MCID according to the optimal threshold above which individuals report feeling `better'. Results Improvement in BDI-II scores associated with reporting feeling `better' depended on initial depression severity, and statistical modelling indicated that MCID is best measured on a ratio scale as a percentage reduction of score. We estimated a MCID of a 17.5\% reduction in scores from baseline from ROC analyses. The corresponding estimate for individuals with longer duration depression who had not responded to antidepressants was higher at 32\%. Conclusions MCID on the BDI-II is dependent on baseline severity, is best measured on a ratio scale, and the MCID for treatment-resistant depression is larger than that for more typical depression. This has important implications for clinical trials and practice.},
keywords = {2nd edition (BDI-II),Beck Depression Inventory,depression,minimal clinically important difference,outcome assessment,primary care}
}
@article{button_power_2013,
title = {Power Failure: Why Small Sample Size Undermines the Reliability of Neuroscience},
shorttitle = {Power Failure},
author = {Button, K. S. and Ioannidis, John P. A. and Mokrysz, Claire and Nosek, Brian A. and Flint, Jonathan and Robinson, Emma S. J. and Munaf{\`o}, Marcus R.},
year = {2013},
month = apr,
journal = {Nature Reviews Neuroscience},
volume = {14},
number = {5},
pages = {365--376},
issn = {1471-003X, 1471-0048},
doi = {10.1038/nrn3475}
}
@article{carter_correcting_2019,
title = {Correcting for {{Bias}} in {{Psychology}}: {{A Comparison}} of {{Meta-Analytic Methods}}},
shorttitle = {Correcting for {{Bias}} in {{Psychology}}},
author = {Carter, Evan C. and Sch{\"o}nbrodt, Felix D. and Gervais, Will M. and Hilgard, Joseph},
year = {2019},
month = jun,
journal = {Advances in Methods and Practices in Psychological Science},
volume = {2},
number = {2},
pages = {115--144},
issn = {2515-2459},
doi = {10.1177/2515245919847196},
abstract = {Publication bias and questionable research practices in primary research can lead to badly overestimated effects in meta-analysis. Methodologists have proposed a variety of statistical approaches to correct for such overestimation. However, it is not clear which methods work best for data typically seen in psychology. Here, we present a comprehensive simulation study in which we examined how some of the most promising meta-analytic methods perform on data that might realistically be produced by research in psychology. We simulated several levels of questionable research practices, publication bias, and heterogeneity, and used study sample sizes empirically derived from the literature. Our results clearly indicated that no single meta-analytic method consistently outperformed all the others. Therefore, we recommend that meta-analysts in psychology focus on sensitivity analyses\textemdash that is, report on a variety of methods, consider the conditions under which these methods fail (as indicated by simulation studies such as ours), and then report how conclusions might change depending on which conditions are most plausible. Moreover, given the dependence of meta-analytic methods on untestable assumptions, we strongly recommend that researchers in psychology continue their efforts to improve the primary literature and conduct large-scale, preregistered replications. We provide detailed results and simulation code at https://osf.io/rf3ys and interactive figures at http://www.shinyapps.org/apps/metaExplorer/.},
langid = {english}
}
@article{carter_publication_2014,
title = {Publication Bias and the Limited Strength Model of Self-Control: Has the Evidence for Ego Depletion Been Overestimated?},
shorttitle = {Publication Bias and the Limited Strength Model of Self-Control},
author = {Carter, Evan C. and McCullough, Michael E.},
year = {2014},
month = jul,
journal = {Frontiers in Psychology},
volume = {5},
issn = {1664-1078},
doi = {10.3389/fpsyg.2014.00823}
}
@article{cascio_open_1983,
title = {Open a {{New Window}} in {{Rational Research Planning}}: {{Adjust Alpha}} to {{Maximize Statistical Power}}},
shorttitle = {Open a {{New Window}} in {{Rational Research Planning}}},
author = {Cascio, Wayne F. and Zedeck, Sheldon},
year = {1983},
journal = {Personnel Psychology},
volume = {36},
number = {3},
pages = {517--526},
issn = {1744-6570},
doi = {10.1111/j.1744-6570.1983.tb02233.x},
abstract = {Alternative strategies for optimizing statistical power in applied psychological research are considered. Increasing sample size and combining predictors in order to yield a useful effect size are well-known tactics for increasing power. A third approach, increasing alpha, is rarely used because of zealous adherence to convention. There are two related aspects in setting the alpha level. First, the relative seriousness of Type I and Type II errors must be considered. This assessment must then be qualified and redetermined after taking into account the prior probability that an effect exists. Procedures that make these processes objective are demonstrated. When sample size and effect size are both fixed, increasing alpha may be the only feasible strategy for maximizing power. It is concluded that a priori power analysis should be a major consideration in any test of an hypothesis, and that alpha level adjustment should be viewed as a useful strategy for increasing power.},
langid = {english},
annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/j.1744-6570.1983.tb02233.x}
}
@article{chambers_past_2022,
title = {The Past, Present and Future of {{Registered Reports}}},
author = {Chambers, Christopher D. and Tzavella, Loukia},
year = {2022},
month = jan,
journal = {Nature Human Behaviour},
volume = {6},
number = {1},
pages = {29--42},
publisher = {{Nature Publishing Group}},
issn = {2397-3374},
doi = {10.1038/s41562-021-01193-7},
abstract = {Registered Reports are a form of empirical publication in which study proposals are peer reviewed and pre-accepted before research is undertaken. By deciding which articles are published based on the question, theory and methods, Registered Reports offer a remedy for a range of reporting and publication biases. Here, we reflect on the history, progress and future prospects of the Registered Reports initiative and offer practical guidance for authors, reviewers and editors. We review early evidence that Registered Reports are working as intended, while at the same time acknowledging that they are not a universal solution for irreproducibility. We also consider how the policies and practices surrounding Registered Reports are changing, or must change in the future, to address limitations and adapt to new challenges. We conclude that Registered Reports are promoting reproducibility, transparency and self-correction across disciplines and may help reshape how society evaluates research and researchers.},
copyright = {2021 Springer Nature Limited},
langid = {english},
keywords = {Culture,Publishing}
}
@book{chang_adaptive_2016,
title = {Adaptive {{Design Theory}} and {{Implementation Using SAS}} and {{R}}},
author = {Chang, Mark},
year = {2016},
month = oct,
edition = {2nd edition},
publisher = {{Chapman and Hall/CRC}},
abstract = {Get Up to Speed on Many Types of Adaptive Designs Since the publication of the first edition, there have been remarkable advances in the methodology and application of adaptive trials. Incorporating many of these new developments, Adaptive Design Theory and Implementation Using SAS and R, Second Edition offers a detailed framework to understand the use of various adaptive design methods in clinical trials. New to the Second Edition Twelve new chapters covering blinded and semi-blinded sample size reestimation design, pick-the-winners design, biomarker-informed adaptive design, Bayesian designs, adaptive multiregional trial design, SAS and R for group sequential design, and much more More analytical methods for K-stage adaptive designs, multiple-endpoint adaptive design, survival modeling, and adaptive treatment switching New material on sequential parallel designs with rerandomization and the skeleton approach in adaptive dose-escalation trials Twenty new SAS macros and R functions Enhanced end-of-chapter problems that give readers hands-on practice addressing issues encountered in designing real-life adaptive trials Covering even more adaptive designs, this book provides biostatisticians, clinical scientists, and regulatory reviewers with up-to-date details on this innovative area in pharmaceutical research and development. Practitioners will be able to improve the efficiency of their trial design, thereby reducing the time and cost of drug development.},
isbn = {978-1-138-03423-5},
langid = {english}
}
@techreport{chatziathanasiou_beware_2022,
type = {{{SSRN Scholarly Paper}}},
title = {Beware the {{Lure}} of {{Narratives}}: `{{Hungry Judges}}' {{Should}} Not {{Motivate}} the {{Use}} of `{{Artificial Intelligence}}' in {{Law}}},
shorttitle = {Beware the {{Lure}} of {{Narratives}}},
author = {Chatziathanasiou, Konstantin},
year = {2022},
month = jan,
number = {ID 4011603},
address = {{Rochester, NY}},
institution = {{Social Science Research Network}},
doi = {10.2139/ssrn.4011603},
abstract = {The `hungry judge' effect, as presented by a famous study, is a common point of reference to underline human bias in judicial decision-making. This is particularly pronounced in the literature on `artificial intelligence' (AI) in law. Here, the effect is invoked to counter concerns about bias in automated decision-aids and to motivate their use. However, the validity of the `hungry judge' effect is doubtful. In our context, this is problematic for, at least, two reasons. First, shaky evidence leads to a misconstruction of the problem that may warrant an AI intervention. Second, painting the justice system worse than it actually is, is a dangerous argumentative strategy as it undermines institu-tional trust. Against this background, this article revisits the original `hungry judge' study and argues that it cannot be relied on as an argument in the AI discourse or beyond. The case of `hungry judges' demonstrates the lure of narratives, the dangers of `problem gerrymandering', and ultimately the need for a careful reception of social science.},
langid = {english},
keywords = {extra-legal influences,hungry judge,judicial decision-making,social science in law}
}
@article{chin_questionable_2021,
title = {Questionable {{Research Practices}} and {{Open Science}} in {{Quantitative Criminology}}},
author = {Chin, Jason M. and Pickett, Justin T. and Vazire, Simine and Holcombe, Alex O.},
year = {2021},
month = aug,
journal = {Journal of Quantitative Criminology},
issn = {1573-7799},
doi = {10.1007/s10940-021-09525-6},
abstract = {Questionable research practices (QRPs) lead to incorrect research results and contribute to irreproducibility in science. Researchers and institutions have proposed open science practices (OSPs) to improve the detectability of QRPs and the credibility of science. We examine the prevalence of QRPs and OSPs in criminology, and researchers' opinions of those practices.},
langid = {english}
}
@article{cho_is_2013,
title = {Is Two-Tailed Testing for Directional Research Hypotheses Tests Legitimate?},
author = {Cho, Hyun-Chul and Abe, Shuzo},
year = {2013},
month = sep,
journal = {Journal of Business Research},
series = {Advancing {{Research Methods}} in {{Marketing}}},
volume = {66},
number = {9},
pages = {1261--1266},
issn = {0148-2963},
doi = {10.1016/j.jbusres.2012.02.023},
abstract = {This paper demonstrates that there is currently a widespread misuse of two-tailed testing for directional research hypotheses tests. One probable reason for this overuse of two-tailed testing is the seemingly valid beliefs that two-tailed testing is more conservative and safer than one-tailed testing. However, the authors examine the legitimacy of this notion and find it to be flawed. A second and more fundamental cause of the current problem is the pervasive oversight in making a clear distinction between the research hypothesis and the statistical hypothesis. Based upon the explicated, sound relationship between the research and statistical hypotheses, the authors propose a new scheme of hypothesis classification to facilitate and clarify the proper use of statistical hypothesis testing in empirical research.},
keywords = {hypothesis testing,one-tailed testing,Research hypothesis in existential form,Research hypothesis in non-existential form,Statistical hypothesis,two-tailed testing}
}
@article{cohen_earth_1994,
title = {The Earth Is Round (p {$<$} .05).},
author = {Cohen, Jacob},
year = {1994},
journal = {American Psychologist},
volume = {49},
number = {12},
pages = {997--1003},
issn = {1935-990X, 0003-066X},
doi = {10.1037/0003-066X.49.12.997},
langid = {english}
}
@article{cohen_earth_1995,
title = {The Earth Is Round ( p\hspace{0.6em}{$<$}\hspace{0.6em}.05): {{Rejoinder}}},
shorttitle = {The Earth Is Round ( p\hspace{0.6em}{$<$}\hspace{0.6em}.05)},
author = {Cohen, Jacob},
year = {1995},
month = dec,
journal = {American Psychologist},
volume = {50},
number = {12},
pages = {1103},
issn = {0003-066X},
doi = {http://dx.doi.org/10.1037/0003-066X.50.12.1103},
copyright = {\textcopyright{} 1995, American Psychological Association},
langid = {english},
keywords = {Null Hypothesis Testing (major)}
}
@article{cohen_statistical_1965,
title = {Some Statistical Issues in Psychological Research},
author = {Cohen, Jacob},
year = {1965},
journal = {Handbook of clinical psychology},
pages = {95--121}
}
@book{cohen_statistical_1988,
title = {Statistical Power Analysis for the Behavioral Sciences},
author = {Cohen, Jacob},
year = {1988},
edition = {2nd ed},
publisher = {{L. Erlbaum Associates}},
address = {{Hillsdale, N.J}},
isbn = {978-0-8058-0283-2},
lccn = {HA29 .C66 1988},
keywords = {Probabilities,Social sciences,Statistical methods,Statistical power analysis}
}