-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor of reconstruct.ts #484
base: main
Are you sure you want to change the base?
Changes from all commits
a08e74a
e367620
de9289d
19041f3
e5d9a30
5c90575
f546c5f
9971485
bda524a
3006674
86a2cd1
5dce5a0
7c64c31
5995277
030edc9
03cbf9e
329a0af
428d7bf
f33ad56
82552b7
f1adaa5
36a1df4
0446887
bc156d9
508d152
998a6d3
2e2472e
0cd9ab2
7b9cb5b
b70c91d
698febf
b9cc3dd
624acf2
7c0c3c3
2897b9b
19ae981
089eb6a
2701d40
a172d2b
ef2d55b
6c7c02e
037d4ef
d482745
d2d3d87
858a726
744e160
08141bd
22911ad
1030310
47a7d93
659574c
2bdd813
010db50
5345f19
20ec3a8
6ffcf9e
a8229ba
e245679
5425223
227348b
23b0ab0
5ae5b74
df6e39f
5c68caf
2ef8892
677d8b8
f2ae684
fd8e413
6d59c2e
0ff9142
c9f01b1
9c4f04d
97ce67d
73e3d46
94d0e4b
4c4abcf
4795997
08fcb2e
fe57bd2
600abc9
870a9f2
576d17e
9f1bcc9
8a403e0
812b291
0231dd6
bbfa26d
3c0e2c2
7bf2750
63be95e
0e5bf7c
d8004b6
eef7854
2ca0554
183dac0
71a79c7
04f9b02
2ea0759
b26c780
7f5afec
ec43a73
57349a6
c4cac54
3132580
36565d0
d25e464
eb5b11e
66888f5
9261280
5b3ede2
d036b65
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"cSpell.words": [ | ||
"unnesseccary" | ||
] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,284 @@ | ||
import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id' | ||
import type { ParentInformation } from '../r-bridge/lang-4.x/ast/model/processing/decorate' | ||
import type { RNode } from '../r-bridge/lang-4.x/ast/model/model' | ||
import { RType } from '../r-bridge/lang-4.x/ast/model/type' | ||
import type { SourcePosition } from '../util/range' | ||
import type { ReconstructionConfiguration } from './reconstruct' | ||
|
||
export type Code = PrettyPrintLine[] | ||
export type Selection = ReadonlySet<NodeId> | ||
export interface PrettyPrintLinePart { | ||
part: string | ||
loc: SourcePosition | ||
} | ||
export interface PrettyPrintLine { | ||
linePart: PrettyPrintLinePart[] | ||
EagleoutIce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
indent: number | ||
} | ||
|
||
/** | ||
* Splits text on linebreak to create lineParts and encapsulates them in the Code type | ||
*/ | ||
export function plain(text: string, location: SourcePosition): Code { | ||
const printLine: PrettyPrintLine = { linePart: [], indent: 0 } | ||
const split = text.split('\n') | ||
let locationLine = location[0] | ||
|
||
for(const line of split) { | ||
printLine.linePart.push({ part: line, loc: [locationLine++, location[1]] }) | ||
} | ||
return [printLine] | ||
} | ||
export function plainSplit(text: string, location: SourcePosition): Code { | ||
const printLine: PrettyPrintLine = { linePart: [], indent: 0 } | ||
let i = 0 | ||
let token = '' | ||
let currLoc: SourcePosition = [location[0], location[1]] | ||
while(i < text.length) { | ||
if(text[i] === ' ') { | ||
if(!(token === '')) { | ||
printLine.linePart.push({ part: token, loc: currLoc }) | ||
} | ||
currLoc = [currLoc[0], currLoc[1] + token.length + 1] | ||
token = '' | ||
} else if(text[i] === '\n') { | ||
printLine.linePart.push({ part: token, loc: currLoc }) | ||
currLoc = [currLoc[0] + 1, location[1]] | ||
token = '' | ||
} else { | ||
token = token.concat(text[i]) | ||
} | ||
i++ | ||
} | ||
printLine.linePart.push({ part: token, loc: currLoc }) | ||
return [printLine] | ||
} | ||
|
||
/** | ||
* this function will merge up to n code pieces into a singular code piece, garanting that there are no duplicate lines and all lines are in order | ||
*/ | ||
export function merge(...snipbits: Code[]): Code { | ||
const buckets: PrettyPrintLine[] = [] | ||
const result: Code = [] | ||
|
||
//separate and group lineParts by lines | ||
for(const code of snipbits) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. die ganze Schleife können wir glaube ich auslagern als ein There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Das stimmt. Ich schau mir das mal an |
||
for(const line of code) { | ||
if(line === undefined) { | ||
continue | ||
} | ||
for(const part of line.linePart) { | ||
const lineNumber = part.loc[0] | ||
if(buckets[lineNumber] === undefined) { //may be necessary as empty elements count as undefined and we don't want to reassign filled buckets | ||
buckets[lineNumber] = { linePart: [], indent: line.indent } | ||
} | ||
buckets[lineNumber].linePart.push(part) | ||
} | ||
} | ||
} | ||
|
||
//sort buckets by column and stich lines into single code piece | ||
for(const line of buckets) { | ||
if(line === undefined){ //appears to be necessary as 'buckets' may be sparse (empty elements count as undefined) | ||
continue | ||
} | ||
EagleoutIce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
line.linePart.sort((a, b) => a.loc[1] - b.loc[1]) | ||
result.push(line) | ||
} | ||
|
||
return result | ||
} | ||
|
||
export function prettyPrintPartToString(line: PrettyPrintLinePart[],columnOffset: number): string { | ||
if(line.length === 0) { | ||
return '' | ||
} | ||
if(line.length === 1) { | ||
return /*' '.repeat(Math.max(columnOffset - 1, 0)) + */line[0].part | ||
} | ||
line.sort((a, b) => a.loc[1] - b.loc[1]) | ||
let result = '' | ||
for(const part of line) { | ||
const currLength = result.length + columnOffset | ||
//we have to 0 any negative values as they can happen??? | ||
result += ' '.repeat(Math.max(part.loc[1] - currLength, 0)) | ||
result = result.concat(part.part) | ||
} | ||
return result | ||
} | ||
|
||
export function indentBy(lines: Code, indent: number): Code { | ||
return lines.map(({ linePart, indent: i }) => ({ linePart, indent: i + indent })) | ||
} | ||
|
||
export function isSelected(configuration: ReconstructionConfiguration, n: RNode<ParentInformation>) { | ||
return configuration.selection.has(n.info.id) || configuration.autoSelectIf(n) | ||
} | ||
|
||
export function removeExpressionListWrap(code: Code) { | ||
if(code.length > 0 && code[0].linePart[0].part === '{' && code[code.length - 1].linePart[code[code.length - 1].linePart.length - 1].part === '}') { | ||
return indentBy(code.slice(1, code.length - 1), -1) | ||
} else { | ||
return code | ||
} | ||
} | ||
|
||
/** The structure of the predicate that should be used to determine if a given normalized node should be included in the reconstructed code independent of if it is selected by the slice or not */ | ||
export type AutoSelectPredicate = (node: RNode<ParentInformation>) => boolean | ||
|
||
export function doNotAutoSelect(_node: RNode<ParentInformation>): boolean { | ||
return false | ||
} | ||
|
||
export const libraryFunctionCall = /^(library|require|((require|load|attach)Namespace))$/ | ||
|
||
export function autoSelectLibrary(node: RNode<ParentInformation>): boolean { | ||
if(node.type !== RType.FunctionCall || node.flavor !== 'named') { | ||
return false | ||
} | ||
return libraryFunctionCall.test(node.functionName.content) | ||
} | ||
|
||
export function getIndentString(indent: number): string { | ||
return ' '.repeat(indent * 4) | ||
} | ||
|
||
/* | ||
function dist(pos1: number, pos2: number) { | ||
Math.abs(pos1 - pos2) | ||
} | ||
*/ | ||
|
||
function addSemis(code: Code): Code { | ||
|
||
function contains(array: string[], elem: string): boolean { | ||
if(elem === '<-' || elem === '->' || elem === '<<-' || elem === '->>') { | ||
return true | ||
} | ||
if(elem === 'in' || elem === ' {} ') { | ||
return true | ||
} | ||
for(const arrElem of array) { | ||
if(elem === arrElem) { | ||
return true | ||
} | ||
} | ||
return false | ||
} | ||
|
||
const line: PrettyPrintLinePart[][] = [] | ||
const specialChar = ['+', '-', '*', '/', ':', '<-', '->', '<<-', '->>', '$', '$$', '&', '&&', '||', '?', '<', '>', '=', '<=', '>=', '==', '(', ')', '((', '))', '{', '}', '[', '[[', ']', ']]', 'for', ' in '] | ||
//find a way to make this work with merge, as this is a very similar piece of code | ||
for(const elem of code) { | ||
let currLine = 1 | ||
for(const linePart of elem.linePart) { | ||
currLine = linePart.loc[0] | ||
if(line[currLine] === undefined) { | ||
line[currLine] = [] | ||
} | ||
line[currLine].push(linePart) | ||
} | ||
} | ||
|
||
//iterate through all elements of the code piece to search for places for semicolons | ||
for(const lineElements of line) { | ||
if(lineElements === undefined) { | ||
continue | ||
} | ||
//create a heuristic to store information about the current search | ||
const heuristic = { assignment: false, brackets: false, lastChar: lineElements[0], statement: false, addedSemi: false, curlyBrackets: false } | ||
let possibleSemi = heuristic.lastChar.loc | ||
lineElements.splice(0, 1) | ||
for(const elem of lineElements) { | ||
|
||
const lastChar = heuristic.lastChar.part | ||
heuristic.brackets = lastChar[lastChar.length - 1] === ')' | ||
heuristic.curlyBrackets = lastChar[lastChar.length - 1] === '}' | ||
heuristic.statement = !contains(specialChar, heuristic.lastChar.part) | ||
|
||
if(heuristic.addedSemi) { | ||
heuristic.assignment = false | ||
} | ||
|
||
//check if the current element may be followed by a semicolon | ||
if((elem.loc[1] - (heuristic.lastChar.loc[1] + heuristic.lastChar.part.length)) >= 1) { | ||
//closing brackets | ||
possibleSemi = updateSemi(possibleSemi, heuristic) | ||
} else if(elem.part[elem.part.length - 1] === '}') { | ||
//closing curlyBrackets | ||
possibleSemi = updateSemi(possibleSemi, heuristic) | ||
} else if(elem.part[elem.part.length - 1] === ')') { | ||
//large space | ||
possibleSemi = updateSemi(possibleSemi, heuristic) | ||
} | ||
|
||
//checking condishions for adding semicolons | ||
if((elem.part === '<-') || (elem.part === '->') || (elem.part === '<<-') || (elem.part === '->>')) { | ||
//check for assignments | ||
if(heuristic.assignment) { | ||
pushSemi(heuristic, possibleSemi) | ||
} | ||
heuristic.assignment = !heuristic.assignment | ||
} else if(elem.part[0] === '(') { | ||
//check for brackets | ||
heuristic.assignment = false | ||
if(heuristic.brackets) { | ||
pushSemi(heuristic, possibleSemi) | ||
heuristic.brackets = false | ||
} | ||
} else if(elem.part[0] === '{') { | ||
//check for curlyBrackets | ||
heuristic.assignment = false | ||
if(heuristic.curlyBrackets) { | ||
pushSemi(heuristic, possibleSemi) | ||
heuristic.curlyBrackets = false | ||
} | ||
} else if(!contains(specialChar, elem.part)) { | ||
//check for two consecutive statements | ||
if(heuristic.statement) { | ||
pushSemi(heuristic, possibleSemi) | ||
} | ||
} | ||
|
||
//update the last character seen | ||
heuristic.lastChar = elem | ||
} | ||
} | ||
code = merge(code) | ||
return code | ||
|
||
function pushSemi(heuristic: { assignment: boolean; brackets: boolean; lastChar: PrettyPrintLinePart; statement: boolean; addedSemi: boolean; curlyBrackets: boolean }, possibleSemi: SourcePosition) { | ||
if(!heuristic.addedSemi) { | ||
code.push({ linePart: [{ part: ';', loc: possibleSemi }], indent: 0 }) | ||
heuristic.addedSemi = true | ||
} | ||
} | ||
|
||
function updateSemi(possibleSemi: SourcePosition, heuristic: { assignment: boolean; brackets: boolean; lastChar: PrettyPrintLinePart; statement: boolean; addedSemi: boolean; curlyBrackets: boolean }) { | ||
const lastSemi: SourcePosition = [possibleSemi[0], possibleSemi[1]] | ||
const other: SourcePosition = [heuristic.lastChar.loc[0], heuristic.lastChar.loc[1] + heuristic.lastChar.part.length] | ||
possibleSemi = other | ||
heuristic.addedSemi = (lastSemi[0] === possibleSemi[0]) && (lastSemi[1] === possibleSemi[0]) | ||
return possibleSemi | ||
} | ||
} | ||
|
||
export function prettyPrintCodeToString(code: Code, lf = '\n'): string { | ||
code = merge(code) | ||
code = addSemis(code) | ||
return code.map(({ linePart, indent }) => `${getIndentString(Math.max(indent, 0))}${prettyPrintPartToString(linePart, code[0].linePart[0].loc[1])}`).join(lf) | ||
} | ||
|
||
export function removeOuterExpressionListIfApplicable(result: PrettyPrintLine[]): Code { | ||
const first = result[0]?.linePart | ||
if(result.length === 1 && first[0].part === '{' && first[result[0].linePart.length - 1].part === '}') { | ||
// we are in a single line | ||
return [{ linePart: first.slice(1, first.length - 1), indent: result[0].indent }] | ||
} else if(result.length > 1 && first[0].part === '{' && result[result.length - 1].linePart[result[result.length - 1].linePart.length - 1].part === '}') { | ||
// remove outer block | ||
return indentBy(result.slice(1, result.length - 1), -1) | ||
} else { | ||
return result | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import { reconstructLogger, reconstructAstFolds } from './reconstruct' | ||
import type { ReconstructionResult } from './reconstruct' | ||
import { prettyPrintCodeToString, removeOuterExpressionListIfApplicable , autoSelectLibrary } from './helper' | ||
|
||
import type { AutoSelectPredicate , Selection } from './helper' | ||
|
||
|
||
import { LogLevel } from '../util/log' | ||
import type { RNode } from '../r-bridge/lang-4.x/ast/model/model' | ||
import type { NormalizedAst, ParentInformation } from '../r-bridge/lang-4.x/ast/model/processing/decorate' | ||
import { foldAstStateful } from '../r-bridge/lang-4.x/ast/model/processing/stateful-fold' | ||
|
||
|
||
|
||
/** | ||
* Reconstructs parts of a normalized R ast into R code on an expression basis. | ||
* | ||
* @param ast - The {@link NormalizedAst|normalized ast} to be used as a basis for reconstruction | ||
* @param selection - The selection of nodes to be reconstructed (probably the {@link NodeId|NodeIds} identified by the slicer) | ||
* @param autoSelectIf - A predicate that can be used to force the reconstruction of a node (for example to reconstruct library call statements, see {@link autoSelectLibrary}, {@link doNotAutoSelect}) | ||
* | ||
* @returns The number of times `autoSelectIf` triggered, as well as the reconstructed code itself. | ||
*/ | ||
|
||
export function reconstructToCode<Info>(ast: NormalizedAst<Info>, selection: Selection, autoSelectIf: AutoSelectPredicate = autoSelectLibrary): ReconstructionResult { | ||
if(reconstructLogger.settings.minLevel >= LogLevel.Trace) { | ||
reconstructLogger.trace(`reconstruct ast with ids: ${JSON.stringify([...selection])}`) | ||
} | ||
|
||
// we use a wrapper to count the number of times the autoSelectIf predicate triggered | ||
let autoSelected = 0 | ||
const autoSelectIfWrapper = (node: RNode<ParentInformation>) => { | ||
const result = autoSelectIf(node) | ||
if(result) { | ||
autoSelected++ | ||
} | ||
return result | ||
} | ||
|
||
// fold of the normalized ast | ||
const result = foldAstStateful(ast.ast, { selection, autoSelectIf: autoSelectIfWrapper }, reconstructAstFolds) | ||
|
||
//console.log(JSON.stringify(result)) | ||
if(reconstructLogger.settings.minLevel >= LogLevel.Trace) { | ||
reconstructLogger.trace('reconstructed ast before string conversion: ', JSON.stringify(result)) | ||
} | ||
|
||
return { code: prettyPrintCodeToString(removeOuterExpressionListIfApplicable(result)), linesWithAutoSelected: autoSelected } | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Das sollte nicht gemutet werden :D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ich wusste gar nicht das in den settings was geändert wurde 😅