-
Notifications
You must be signed in to change notification settings - Fork 4
/
FeaturesExtractor.ts
335 lines (304 loc) · 13 KB
/
FeaturesExtractor.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
import { CommandLineValues } from './Common/CommandLineValues';
import { ProgramFeatures } from './FeaturesEntities/ProgramFeatures';
import { Common } from './Common/Common';
import { Node, SyntaxKind, getDefaultCompilerOptions, createProgram, TypeFlags, Program, TypeChecker, Type } from 'typescript';
import { ts, SourceFile, Project } from 'ts-morph'
export class FeatureExtractor {
private m_CommandLineValues: CommandLineValues;
private filePath: string;
private project: Project;
private sourceFile: SourceFile;
private root: Node;
private program: Program;
public checker: TypeChecker;
private functionAndMethodEntries: Array<ts.Symbol>;
private functions: Array<Node>;
private static s_ParentTypeToAddChildId: Set<String>= new Set<String>();
private static lparen: string = "(";
private static rparen: string = ")";
private static upSymbol: string = "^";
private static downSymbol: string = "_";
public static init(){
FeatureExtractor.s_ParentTypeToAddChildId.add("AssignExpr");
FeatureExtractor.s_ParentTypeToAddChildId.add("ArrayAccessExpr");
FeatureExtractor.s_ParentTypeToAddChildId.add("FieldAccessExpr");
FeatureExtractor.s_ParentTypeToAddChildId.add("MethodCallExpr");
}
constructor(commandLineValues: CommandLineValues, filePath: string) {
this.m_CommandLineValues = commandLineValues;
this.filePath = filePath;
this.program = createProgram([this.filePath], getDefaultCompilerOptions());
this.checker = this.program.getTypeChecker();
this.project = new Project();
this.sourceFile = this.project.addExistingSourceFile(filePath);
this.sourceFile.getSymbol();
this.findFunctionAndMethodEntries(this.sourceFile);
this.root = this.sourceFile.compilerNode;
this.functions = new Array<Node>();
this.findFunctions(this.root);
}
/**
* Extracts program features. Each feature contains all the contexts of a specific identifier in the AST.
*/
public extractFeatures(): Array<ProgramFeatures> {
var programFeatures: Array<ProgramFeatures> = new Array<ProgramFeatures>();
for (let func of this.functions) {
let identifiersSymbols: Array<ts.Symbol> = this.getFuncIdsSymbols(func);
if (identifiersSymbols.length == 0) continue;
let idsLeavesLists: Array<Array<Node>> = this.createLeavesListForEachIdentifier(func);
if (idsLeavesLists.length == 0) continue;
let funcLeaves: Array<Node> = this.getFunctionLeaves(func);
let funcProgramFeatures: Array<ProgramFeatures> = this.generatePathFeaturesForFunction(idsLeavesLists, funcLeaves, identifiersSymbols);
programFeatures = programFeatures.concat(funcProgramFeatures);
}
return programFeatures;
}
/**
* Gets the symbols of the identifiers in function /func.
* @param func - the function whose identifiers' symbols should be returned.
*/
private getFuncIdsSymbols(func: Node) : Array<ts.Symbol> {
let identifiersSymbols: Array<ts.Symbol> = new Array<ts.Symbol>();
let funcName: string;
func.forEachChild(node => {
if (node.kind == SyntaxKind.Identifier) funcName = node.getText();
});
let funcEntry = this.functionAndMethodEntries.filter(entry => entry.getName() == funcName)[0];
if (funcEntry == null) return identifiersSymbols;
let variableEntries = funcEntry.valueDeclaration['locals'];
variableEntries.forEach((id:ts.Symbol) => {
identifiersSymbols.push(id);
});
return identifiersSymbols;
}
/**
* Creates for each identifier in the program an array of all its leaves. Then it returns
* all these arrays (in an array).
*/
private createLeavesListForEachIdentifier(func: Node): Array<Array<Node>> {
var lists: Array<Array<Node>> = new Array<Array<Node>>();
let idLeaves: Array<Node> = this.getFunctionIdLeaves(func);
if (idLeaves.length == 0) return null;
idLeaves.sort((n1,n2) => {
let s1 = n1.getText(), s2 = n2.getText();
if (s1 > s2) return 1;
else if (s1 < s2) return -1;
return 0;
});
lists.push(new Array<Node>());
var prev_id: Node = idLeaves[0];
for (let i: number = 0, j: number = 0; i < idLeaves.length; i++) {
if (idLeaves[i].getText() == prev_id.getText()) {
lists[j].push(idLeaves[i]);
} else {
lists.push(new Array<Node>());
lists[++j].push(idLeaves[i]);
prev_id = idLeaves[i];
}
}
return lists;
}
/**
* Gets the leaves of the identifiers in function /func.
* @param func - the function.
*/
private getFunctionIdLeaves(func: Node): Array<Node> {
let functionIdLeaves: Array<Node> = new Array<Node>();
function findFunctionIdLeaves(node: Node) {
if (node.kind === SyntaxKind.Identifier) {
functionIdLeaves.push(node);
}
node.forEachChild(findFunctionIdLeaves);
}
findFunctionIdLeaves(func);
return functionIdLeaves;
}
/**
* Gets the leaves of function /func.
* @param func - the function.
*/
private getFunctionLeaves(func: Node): Array<Node> {
let functionLeaves: Array<Node> = new Array<Node>();
function findFunctionIdLeaves(node: Node) {
if (node.getChildCount() == 0) {
functionLeaves.push(node);
}
node.forEachChild(findFunctionIdLeaves);
}
findFunctionIdLeaves(func);
return functionLeaves;
}
/**
* Generates features (contexts; paths) for each identifier in the AST.
* @param idLeavesLists - an array of arrays. Each array in idLeavesLists contains all the
* leaves of a specific identifier.
*/
private generatePathFeaturesForFunction(idLeavesLists: Array<Array<Node>>, functionLeaves: Array<Node>, identifiersSymbols: Array<ts.Symbol>): Array<ProgramFeatures> {
let identifiersFeatures: Array<ProgramFeatures> = new Array<ProgramFeatures>();
for (let idLeaves of idLeavesLists) {
let singleIdFeatures: ProgramFeatures = this.generatePathFeaturesForIdentifier(idLeaves, functionLeaves, identifiersSymbols);
if (!singleIdFeatures.isEmpty()) {
identifiersFeatures.push(singleIdFeatures);
}
}
return identifiersFeatures;
}
/**
* Generates features (contexts; paths) for an identifier in the AST.
* @param idLeaves - the identifier's leaves in the AST.
*/
private generatePathFeaturesForIdentifier(idLeaves: Array<Node>, functionLeaves: Array<Node>, identifiersSymbols: Array<ts.Symbol>): ProgramFeatures {
let programFeatures: ProgramFeatures = new ProgramFeatures(this.m_CommandLineValues, idLeaves.length);
var identifier: Node = idLeaves[0];
programFeatures.setVariableName(identifier.getText());
// Find and set the identifier's type:
let idSymbol: ts.Symbol = identifiersSymbols.filter(id => id.getName()===identifier.getText())[0];
if (idSymbol == null) return programFeatures;
let currType: Type = this.checker.getTypeOfSymbolAtLocation(idSymbol, identifier);
if (currType.flags === TypeFlags.Object) {
programFeatures.setVariableType(currType.symbol.name);
}
else if (TypeFlags[currType.flags]) {
programFeatures.setVariableType(TypeFlags[currType.flags].toLowerCase());
}
else {
return programFeatures;
}
// The following loop will create paths between the identifier's leaves themselves:
for (let i: number = 0; i < idLeaves.length; i++) {
for (let j: number = i+1; j < idLeaves.length; j++) {
let source: Node = idLeaves[i];
let target: Node = idLeaves[j];
let path: string = this.generatePath(source, target);
if (path != Common.EmptyString) {
programFeatures.addFeature(source, path, target);
}
}
}
/* The following loop will create paths between the identifier's leaves and
all other leaves: */
for (let i: number = 0; i < idLeaves.length; i++) {
for (let j: number = 0; j < functionLeaves.length; j++) {
let source: Node = idLeaves[i];
let target: Node = functionLeaves[j];
// This if statement makes sure we don't create a path between the identifier's leaves again:
if (source.getText() == target.getText()) continue;
let path: string = this.generatePath(source, target);
if (path != Common.EmptyString) {
programFeatures.addFeature(source, path, target);
}
}
}
return programFeatures;
}
/**
* Finds the nodes of functions and methods in the AST whose root is /node.
* @param node
*/
private findFunctions(root: Node): void {
let functions: Array<Node> = new Array<Node>();
function findFunctionNodes(node: Node) {
if (node.kind == SyntaxKind.FunctionDeclaration || node.kind == SyntaxKind.MethodDeclaration) {
functions.push(node);
}
node.forEachChild(findFunctionNodes);
}
findFunctionNodes(root);
this.functions = functions;
}
/**
* Finds the symbols of the identifiers defined in functions and methods.
* @param sourceFile - the source file.
*/
private findFunctionAndMethodEntries(sourceFile: SourceFile): void {
// Get the identifiers table
const localEntries = (sourceFile.compilerNode as any)['locals'] as ts.SymbolTable | undefined;
var functionAndMethodEntries = new Array<ts.Symbol>();
// The following forEach loop gets all the functions' entries and methods' entries:
localEntries.forEach((entry) => {
if(entry.valueDeclaration && entry.valueDeclaration.kind == SyntaxKind.FunctionDeclaration) {
// entry is a function
functionAndMethodEntries.push(entry);
} else if (entry.declarations && entry.declarations[0].kind == SyntaxKind.ClassDeclaration) {
const members = this.checker.getExportSymbolOfSymbol(entry).members;
// entry is a class; The following forEach loop gets all the methods' entries of this class
members.forEach((entry:ts.Symbol) => {
if (entry.valueDeclaration && entry.valueDeclaration.kind == SyntaxKind.MethodDeclaration) {
// entry is a method
functionAndMethodEntries.push(entry);
}
});
} else return;
});
this.functionAndMethodEntries = functionAndMethodEntries;
}
/**
* Generates a string which represents a path between two leaves in the AST.
* @param source - a source node (leaf).
* @param target - a target node (leaf).
*/
private generatePath(source: Node , target: Node): string {
let down: string = FeatureExtractor.downSymbol;
let up: string = FeatureExtractor.upSymbol;
let startSymbol: string = FeatureExtractor.lparen;
let endSymbol: string = FeatureExtractor.rparen;
let stringBuilder: string = Common.EmptyString;
let sourceStack: Array<Node> = FeatureExtractor.getTreeStack(source);
let targetStack: Array<Node> = FeatureExtractor.getTreeStack(target);
let commonPrefix: number = 0;
let currentSourceAncestorIndex: number = sourceStack.length - 1;
let currentTargetAncestorIndex: number = targetStack.length - 1;
while (currentSourceAncestorIndex >= 0 && currentTargetAncestorIndex >= 0
&& sourceStack[currentSourceAncestorIndex] == targetStack[currentTargetAncestorIndex]) {
commonPrefix++;
currentSourceAncestorIndex--;
currentTargetAncestorIndex--;
}
let pathLength: number = sourceStack.length + targetStack.length - 2 * commonPrefix;
if (pathLength > this.m_CommandLineValues.MaxPathLength) {
return Common.EmptyString;
}
/* Don't create a path between leaves that belong to different functions, classes and interfaces.
This loop also makes sure that the path is created only between two leaves of the same line of code, or the same
block (for example an if statement or a loop). This happens because we stop when we see that a FunctionDeclaration
is in the path (the same happens with ClassDeclaration and InterfaceDeclaration). */
for (let i = 0; i <= sourceStack.length - commonPrefix; i++) {
var currentNode: Node = sourceStack[i];
var kind: SyntaxKind = currentNode.kind;
var pathUpperBoundTypes: Array<SyntaxKind> = [SyntaxKind.SourceFile,SyntaxKind.FunctionDeclaration, SyntaxKind.MethodDeclaration, SyntaxKind.ClassDeclaration, SyntaxKind.InterfaceDeclaration];
if (pathUpperBoundTypes.some(x => x === kind)) {
return Common.EmptyString;
}
}
// Build the string up the path
for (let i = 0; i < sourceStack.length - commonPrefix; i++) {
var currentNode: Node = sourceStack[i];
stringBuilder = stringBuilder + startSymbol +
SyntaxKind[currentNode.kind] + endSymbol + up;
}
// Add the common ancestor
var commonNode: Node = sourceStack[sourceStack.length - commonPrefix];
stringBuilder = stringBuilder + startSymbol +
SyntaxKind[commonNode.kind] + endSymbol;
// Continue building the string down the path
for (let i = targetStack.length - commonPrefix - 1; i >= 0; i--) {
var currentNode: Node = targetStack[i];
stringBuilder = stringBuilder + down + startSymbol +
SyntaxKind[currentNode.kind] + endSymbol;
}
return stringBuilder;
}
/**
* Gets the actual path in the AST between the given node and the root.
* @param node - The returned path contains all the nodes between the node and the root
*/
private static getTreeStack(node: Node): Array<Node> {
let upStack: Array<Node> = new Array<Node>();
let currentNode: Node = node;
while (currentNode != null) {
upStack.push(currentNode);
currentNode = currentNode.parent;
}
return upStack;
}
}