-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Zeynep
committed
Aug 5, 2024
1 parent
9a979df
commit 6f39e2f
Showing
7 changed files
with
1,059 additions
and
54 deletions.
There are no files selected for viewing
251 changes: 251 additions & 0 deletions
251
src/main/java/de/unijena/cheminf/clustering/art2a/Art2aEuclideanClusteringTask.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,251 @@ | ||
/* | ||
* ART2a Clustering for Java | ||
* Copyright (C) 2023 Betuel Sevindik, Felix Baensch, Jonas Schaub, Christoph Steinbeck, and Achim Zielesny | ||
* | ||
* Source code is available at <https://github.com/JonasSchaub/ART2a-Clustering-for-Java> | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in all | ||
* copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
package de.unijena.cheminf.clustering.art2a; | ||
|
||
import de.unijena.cheminf.clustering.art2a.clustering.euclideanClustering.Art2aEuclideanDoubleClustering; | ||
import de.unijena.cheminf.clustering.art2a.exceptions.ConvergenceFailedException; | ||
import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClustering; | ||
import de.unijena.cheminf.clustering.art2a.interfaces.euclideanClusteringInterfaces.IArt2aEuclideanClusteringResult; | ||
|
||
import java.util.concurrent.Callable; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
|
||
/** | ||
* Callable class for clustering input vectors (fingerprints). | ||
* | ||
* @author Betuel Sevindik | ||
* @version 1.0.0.0 | ||
*/ | ||
public class Art2aEuclideanClusteringTask implements Callable<IArt2aEuclideanClusteringResult> { | ||
//<editor-fold desc="private class variables" defaultstate="collapsed> | ||
/** | ||
* Clustering instance | ||
*/ | ||
private IArt2aEuclideanClustering art2aClustering; | ||
/** | ||
* If isClusteringResultExported = true the cluster results are exported to text files. | ||
* If isClusteringResultExported = false the clustering results are not exported to text files. | ||
*/ | ||
private boolean isClusteringResultExported; | ||
/** | ||
* Seed value to randomize input vectors. | ||
*/ | ||
private int seed; | ||
/** | ||
* If the seed is setting by the user isSeedSet == true, otherwise false. | ||
* Is needed to determine whether the default value or the seed specified by the user should be used. | ||
*/ | ||
private boolean isSeedSet; | ||
//</editor-fold> | ||
// | ||
//<editor-fold desc="private final class constants" defaultstate="collapsed> | ||
/** | ||
* Seed value for randomising the input vectors before starting clustering. | ||
*/ | ||
private final int DEFAULT_SEED_VALUE_TO_RANDOMIZE_INPUT_VECTORS = 1; | ||
//</editor-fold> | ||
// | ||
//<editor-fold desc="private static final class constants" defaultstate="collapsed> | ||
/** | ||
* Default value of the learning parameter in float | ||
*/ | ||
public static final float DEFAULT_LEARNING_PARAMETER_FLOAT = 0.01f; | ||
/** | ||
* Default value of the required similarity parameter in float | ||
*/ | ||
public static final float REQUIRED_SIMILARITY_FLOAT = 0.99f; | ||
/** | ||
* Default value of the learning parameter in double | ||
*/ | ||
public static final double DEFAULT_LEARNING_PARAMETER_DOUBLE = 0.01; | ||
/** | ||
* Default value of the required similarity parameter in double | ||
*/ | ||
public static final double REQUIRED_SIMILARITY_DOUBLE = 0.99; | ||
//</editor-fold> | ||
// | ||
//<editor-fold desc="private static final class variables" defaultstate="collapsed> | ||
/** | ||
* Logger of this class | ||
*/ | ||
private static final Logger LOGGER = Logger.getLogger(Art2aEuclideanClusteringTask.class.getName()); | ||
//</editor-fold> | ||
// | ||
// <editor-fold defaultstate="collapsed" desc="Constructors"> | ||
/** | ||
* Float clustering task constructor. | ||
* Creates a new Art2aClusteringTask instance with the specified parameters. | ||
* | ||
* @param aVigilanceParameter parameter to influence the number of clusters. | ||
* @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. | ||
* In addition, all inputs must have the same length. Each column of the matrix contains one component of the input. | ||
* @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. | ||
* @param anIsClusteringResultExported if the parameter is set to true, the cluster results | ||
* are exported to text files. | ||
* @param aRequiredSimilarity parameter indicating the minimum similarity between the current | ||
* cluster vectors and the previous cluster vectors. The parameter is crucial | ||
* for the convergence of the system. If the parameter is set too high, a much | ||
* more accurate similarity is expected and the convergence may take longer, | ||
* while a small parameter expects a lower similarity between the cluster | ||
* vectors and thus the system may converge faster. | ||
* @param aLearningParameter parameter to define the intensity of keeping the old cluster vector in mind | ||
* before the system adapts it to the new sample vector. | ||
* @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments | ||
* is done in the constructor of Art2aFloatClustering. | ||
* @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is | ||
* done in the constructor of the ArtaFloatClustering. | ||
* | ||
*/ | ||
/** public Art2aEuclideanClusteringTask(float aVigilanceParameter, float[][] aDataMatrix, int aMaximumEpochsNumber, | ||
boolean anIsClusteringResultExported, float aRequiredSimilarity, float aLearningParameter) | ||
throws IllegalArgumentException, NullPointerException { | ||
this.isClusteringResultExported = anIsClusteringResultExported; | ||
this.isSeedSet = false; | ||
this.art2aEuclideanClustering = new Art2aFloatClustering(aDataMatrix, aMaximumEpochsNumber, aVigilanceParameter, | ||
aRequiredSimilarity, aLearningParameter); | ||
}**/ | ||
// | ||
/** | ||
* Float clustering task constructor. | ||
* Creates a new Art2aClusteringTask instance with the specified parameters. | ||
* For the required similarity and learning parameter default values are used. | ||
* | ||
* @param aVigilanceParameter parameter to influence the number of clusters. | ||
* @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. | ||
* In addition, all inputs must have the same length. | ||
* Each column of the matrix contains one component of the input. | ||
* @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. | ||
* @param anIsClusteringResultExported if the parameter is set to true, the cluster results | ||
* are exported to text files. | ||
* @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments | ||
* is done in the constructor of Art2aFloatClustering. | ||
* @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is | ||
* done in the constructor of the ArtaFloatClustering. | ||
* | ||
* @see de.unijena.cheminf.clustering.art2a.Art2aClusteringTask#Art2aClusteringTask(float, float[][], int, | ||
* boolean, float, float) | ||
*/ | ||
/**public Art2aClusteringTask(float aVigilanceParameter, float[][] aDataMatrix, int aMaximumEpochsNumber, | ||
boolean anIsClusteringResultExported) | ||
throws IllegalArgumentException, NullPointerException { | ||
this(aVigilanceParameter, aDataMatrix, aMaximumEpochsNumber, anIsClusteringResultExported, | ||
Art2aClusteringTask.REQUIRED_SIMILARITY_FLOAT, Art2aClusteringTask.DEFAULT_LEARNING_PARAMETER_FLOAT); | ||
}**/ | ||
// | ||
/** | ||
* Double clustering task constructor. | ||
* Creates a new Art2aDoubleClustering instance with the specified parameters. | ||
* | ||
* @param aVigilanceParameter parameter to influence the number of clusters. | ||
* @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. | ||
* In addition, all inputs must have the same length. | ||
* Each column of the matrix contains one component of the input. | ||
* @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. | ||
* @param anIsClusteringResultExported if the parameter is set to true, the cluster results are | ||
* exported to text files. | ||
* @param aRequiredSimilarity parameter indicating the minimum similarity between the current | ||
* cluster vectors and the previous cluster vectors. | ||
* @param aLearningParameter parameter to define the intensity of keeping the old cluster vector in mind | ||
* before the system adapts it to the new sample vector. | ||
* @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments | ||
* is done in the constructor of Art2aFloatClustering. | ||
* @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is | ||
* done in the constructor of the ArtaFloatClustering. | ||
*/ | ||
public Art2aEuclideanClusteringTask(double aVigilanceParameter, double[][] aDataMatrix, int aMaximumEpochsNumber, | ||
boolean anIsClusteringResultExported, double aRequiredSimilarity, double aLearningParameter) | ||
throws IllegalArgumentException, NullPointerException { | ||
this.isClusteringResultExported = anIsClusteringResultExported; | ||
this.art2aClustering = new Art2aEuclideanDoubleClustering(aDataMatrix, aMaximumEpochsNumber, aVigilanceParameter, | ||
aRequiredSimilarity, aLearningParameter); | ||
} | ||
// | ||
/** | ||
* Double clustering task constructor. | ||
* Creates a new Art2aDoubleClustering instance with the specified parameters. | ||
* For the required similarity and learning parameter default values are used. | ||
* | ||
* @param aVigilanceParameter parameter to influence the number of clusters. | ||
* @param aDataMatrix matrix contains all inputs for clustering. Each row of the matrix contains one input. | ||
* In addition, all inputs must have the same length. Each column of the matrix contains one component of the input. | ||
* @param aMaximumEpochsNumber maximum number of epochs that the system may use for convergence. | ||
* @param anIsClusteringResultExported if the parameter is set to true, the cluster results are | ||
* exported to text files. | ||
* @throws IllegalArgumentException is thrown, if the given arguments are invalid. The checking of the arguments | ||
* is done in the constructor of Art2aFloatClustering. | ||
* @throws NullPointerException is thrown, if the given aDataMatrix is null. The checking of the data matrix is | ||
* done in the constructor of the ArtaFloatClustering. | ||
* | ||
* @see de.unijena.cheminf.clustering.art2a.Art2aClusteringTask#Art2aClusteringTask(double, double[][], int, | ||
* boolean, double, double) | ||
* | ||
*/ | ||
public Art2aEuclideanClusteringTask(double aVigilanceParameter, double[][] aDataMatrix, int aMaximumEpochsNumber, | ||
boolean anIsClusteringResultExported) throws IllegalArgumentException, NullPointerException { | ||
this(aVigilanceParameter, aDataMatrix, aMaximumEpochsNumber, anIsClusteringResultExported, | ||
Art2aClusteringTask.REQUIRED_SIMILARITY_DOUBLE, Art2aClusteringTask.DEFAULT_LEARNING_PARAMETER_DOUBLE); | ||
} | ||
//</editor-fold> | ||
// | ||
// <editor-fold defaultstate="collapsed" desc="Overriden call() method"> | ||
/** | ||
* Executes the clustering. | ||
* | ||
* @return clustering result. | ||
*/ | ||
@Override | ||
public IArt2aEuclideanClusteringResult call() { | ||
try { | ||
if(this.isSeedSet) { | ||
return this.art2aClustering.getClusterResult(this.isClusteringResultExported, this.seed); | ||
} else { | ||
return this.art2aClustering.getClusterResult(this.isClusteringResultExported, | ||
this.DEFAULT_SEED_VALUE_TO_RANDOMIZE_INPUT_VECTORS); | ||
} | ||
} catch (ConvergenceFailedException anException) { | ||
Art2aEuclideanClusteringTask.LOGGER.log(Level.SEVERE, anException.toString(), anException); | ||
return null; | ||
} | ||
} | ||
//</editor-fold> | ||
// | ||
// <editor-fold defaultstate="collapsed" desc="Public method"> | ||
/** | ||
* | ||
* User-defined seed value to randomize input vectors. | ||
* Different seed values can lead to different clustering results. | ||
* | ||
* @param aSeed seed value | ||
* @return user-defined seed value. | ||
*/ | ||
public int setSeed(int aSeed) { | ||
this.seed = aSeed; | ||
this.isSeedSet = true; | ||
return this.seed; | ||
} | ||
//</editor-fold> | ||
} |
Oops, something went wrong.