Skip to content

Commit

Permalink
Add shuffle data frame support
Browse files Browse the repository at this point in the history
Add split train test method
  • Loading branch information
Hernán Morales Durand committed Dec 11, 2023
1 parent 182681f commit d49e02c
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 28 deletions.
11 changes: 6 additions & 5 deletions src/AI-DataPartitioners/AIHashPartitioner.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@ subsets := AIHashPartitioner split: dataFrame withProportions: #(0.8 0.2).
```
"
Class {
#name : #AIHashPartitioner,
#superclass : #Object,
#category : #'AI-DataPartitioners'
#name : 'AIHashPartitioner',
#superclass : 'Object',
#category : 'AI-DataPartitioners',
#package : 'AI-DataPartitioners'
}

{ #category : #api }
{ #category : 'api' }
AIHashPartitioner class >> split: aCollection withProportions: aCollectionOfProportions [

^ self new split: aCollection withProportions: aCollectionOfProportions
]

{ #category : #api }
{ #category : 'api' }
AIHashPartitioner >> split: aCollection withProportions: aCollectionOfProportions [

| cumulativeProportionsMap indexesMap |
Expand Down
55 changes: 39 additions & 16 deletions src/AI-DataPartitioners/AIRandomPartitioner.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -15,50 +15,61 @@ subsets := AIRandomPartitioner split: letters withProportions: #(0.5 0.3 0.2).
```
"
Class {
#name : #AIRandomPartitioner,
#superclass : #Object,
#category : #'AI-DataPartitioners'
#name : 'AIRandomPartitioner',
#superclass : 'Object',
#category : 'AI-DataPartitioners',
#package : 'AI-DataPartitioners'
}

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner class >> split: aCollection withBoundaries: aCollectionOfBoundaries [

^ self new split: aCollection withBoundaries: aCollectionOfBoundaries
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner class >> split: aCollection withBoundaries: aCollectionOfBoundaries seed: aNumber [

^ self new split: aCollection withBoundaries: aCollectionOfBoundaries seed: aNumber
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner class >> split: aCollection withProportions: aCollectionOfProportions [

^ self new split: aCollection withProportions: aCollectionOfProportions
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner class >> split: aCollection withProportions: aCollectionOfProportions seed: aNumber [

^ self new split: aCollection withProportions: aCollectionOfProportions seed: aNumber
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner class >> split: aCollection withProportions: aCollectionOfProportions shuffle: aBoolean [
"Split aCollection into multiple sets. The sets to split are determined by the size of aCollectionOfProportions, which looks like #(0.7 0.3), and commonly it represents splitting between a training and test set."

^ self new
split: aCollection
withProportions: aCollectionOfProportions
shuffle: aBoolean
]

{ #category : 'api' }
AIRandomPartitioner class >> split: aCollection withSizes: aCollectionOfSizes [

^ self new split: aCollection withSizes: aCollectionOfSizes
]

{ #category : #accessing }
{ #category : 'accessing' }
AIRandomPartitioner >> shuffle: aCollection [

"Default method of shuffling the elements of collection. You can override it if you need to use a different shuffling algorithm"

^ aCollection shuffle
^ aCollection shuffled
]

{ #category : #accessing }
{ #category : 'accessing' }
AIRandomPartitioner >> shuffle: aCollection seed: aNumber [
"Default method of shuffling the elements of collection. You can override it if you need to use a different shuffling algorithm"

Expand All @@ -67,13 +78,13 @@ AIRandomPartitioner >> shuffle: aCollection seed: aNumber [
^ aCollection shuffleWithSeed: aNumber
]

{ #category : #private }
{ #category : 'private' }
AIRandomPartitioner >> split: aCollection withBoundaries: aCollectionOfBoundaries [

^ self split: aCollection withBoundaries: aCollectionOfBoundaries seed: nil
]

{ #category : #private }
{ #category : 'private' }
AIRandomPartitioner >> split: aCollection withBoundaries: aCollectionOfBoundaries seed: aNumber [

| shuffledIndices previousBoundary |
Expand All @@ -88,13 +99,13 @@ AIRandomPartitioner >> split: aCollection withBoundaries: aCollectionOfBoundarie
res ]
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner >> split: aCollection withProportions: aCollectionOfProportions [

^ self split: aCollection withProportions: aCollectionOfProportions seed: nil
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner >> split: aCollection withProportions: aCollectionOfProportions seed: aNumber [
"If you want to use a fixed seed for the random numbers generator for have reproducibility"

Expand All @@ -107,7 +118,19 @@ AIRandomPartitioner >> split: aCollection withProportions: aCollectionOfProporti
^ self split: aCollection withBoundaries: boundaries seed: aNumber
]

{ #category : #api }
{ #category : 'api' }
AIRandomPartitioner >> split: aCollection withProportions: aCollectionOfProportions shuffle: aBoolean [
"See class side comment."

| targetCollection |

targetCollection := aBoolean
ifTrue: [ self shuffle: aCollection ]
ifFalse: [ aCollection ].
^ self split: targetCollection withProportions: aCollectionOfProportions seed: nil
]

{ #category : 'api' }
AIRandomPartitioner >> split: aCollection withSizes: aCollectionOfSizes [

| boundaries |
Expand Down
4 changes: 2 additions & 2 deletions src/AI-DataPartitioners/Collection.extension.st
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Extension { #name : #Collection }
Extension { #name : 'Collection' }

{ #category : #'*AI-DataPartitioners' }
{ #category : '*AI-DataPartitioners' }
Collection >> cumulativeSum [
| sum |
sum := 0.
Expand Down
15 changes: 15 additions & 0 deletions src/AI-DataPartitioners/DataFrame.extension.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Extension { #name : 'DataFrame' }

{ #category : '*AI-DataPartitioners' }
DataFrame >> splitTrainTest: aCollectionOfProportions shuffle: aBoolean [
"Split the receiver's data into train and test sets.
xTrain and yTrain sets are used for training and fitting the model.
xTest and yTest sets are used for testing the model.
"

^ AIRandomPartitioner
split: self
withProportions: aCollectionOfProportions
shuffle: aBoolean.
]
8 changes: 4 additions & 4 deletions src/AI-DataPartitioners/SequenceableCollection.extension.st
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
Extension { #name : #SequenceableCollection }
Extension { #name : 'SequenceableCollection' }

{ #category : #'*AI-DataPartitioners' }
{ #category : '*AI-DataPartitioners' }
SequenceableCollection >> indexOfRowNamed: aName [

^ self indexOf: aName
]

{ #category : #'*AI-DataPartitioners' }
{ #category : '*AI-DataPartitioners' }
SequenceableCollection >> rowNames [

^ self
]

{ #category : #'*AI-DataPartitioners' }
{ #category : '*AI-DataPartitioners' }
SequenceableCollection >> shuffleWithSeed: aNumber [
" Fix the random seed with aNumber to ensure reproducibility "

Expand Down
2 changes: 1 addition & 1 deletion src/AI-DataPartitioners/package.st
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Package { #name : #'AI-DataPartitioners' }
Package { #name : 'AI-DataPartitioners' }

0 comments on commit d49e02c

Please sign in to comment.