You API integration (#203)

* Ability to configure custom service * Add example preset templates, rename module * Custom service client impl * Add YOU API integration * Remove/ignore generated antlr classes * Remove text completion models(deprecated) * Remove unused code, fix settings state sync * Display model name/icon in the tool window * Update chat history UI * Fix model/service sync * Clear plugin state * Fix minor bugs, add settings sync tests * UI changes * Separate model configuration * Add support for overriding the completion path * Update Find Bugs prompt
2026-05-12 22:31:24 +00:00 · 2023-09-14 14:52:18 +03:00 · 2023-09-14 14:52:18 +03:00 · 37af74ebdf
commit 37af74ebdf
parent a860054360
125 changed files with 1673 additions and 1537 deletions
--- a/codegpt-core/src/main/java/ee/carlrobert/embedding/CheckedFile.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/embedding/CheckedFile.java
@ -0,0 +1,47 @@
+package ee.carlrobert.embedding;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class CheckedFile {
+
+  private final String fileName;
+  private final String filePath;
+  private final String fileContent;
+
+  public CheckedFile(File file) {
+    this.fileName = file.getName();
+    this.filePath = file.getPath();
+    try {
+      this.fileContent = new String(Files.readAllBytes(Paths.get(filePath)));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public String getFileName() {
+    return fileName;
+  }
+
+  public String getFilePath() {
+    return filePath;
+  }
+
+  public String getFileContent() {
+    return fileContent;
+  }
+
+  public String getFileExtension() {
+    Pattern pattern = Pattern.compile("[^.]+$");
+    Matcher matcher = pattern.matcher(fileName);
+
+    if (matcher.find()) {
+      return matcher.group();
+    }
+    return "";
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/embedding/EmbeddingsService.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/embedding/EmbeddingsService.java
@ -0,0 +1,134 @@
+package ee.carlrobert.embedding;
+
+import static com.github.jelmerk.knn.util.VectorUtils.normalize;
+import static java.util.stream.Collectors.toList;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.github.jelmerk.knn.Item;
+import com.github.jelmerk.knn.SearchResult;
+import com.intellij.openapi.diagnostic.Logger;
+import com.intellij.openapi.progress.ProgressIndicator;
+import ee.carlrobert.llm.client.openai.OpenAIClient;
+import ee.carlrobert.llm.client.openai.completion.chat.OpenAIChatCompletionModel;
+import ee.carlrobert.llm.client.openai.completion.chat.request.OpenAIChatCompletionMessage;
+import ee.carlrobert.llm.client.openai.completion.chat.request.OpenAIChatCompletionRequest;
+import ee.carlrobert.splitter.SplitterFactory;
+import ee.carlrobert.vector.VectorStore;
+import ee.carlrobert.vector.Word;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+import org.jetbrains.annotations.Nullable;
+
+public class EmbeddingsService {
+
+  private static final Logger LOG = Logger.getInstance(EmbeddingsService.class);
+
+  private final VectorStore vectorStore;
+  private final OpenAIClient openAIClient;
+
+  public EmbeddingsService(OpenAIClient openAIClient, Path pluginBasePath) {
+    this.openAIClient = openAIClient;
+    this.vectorStore = VectorStore.getInstance(pluginBasePath);
+  }
+
+  public List<double[]> getEmbeddings(List<String> chunks) {
+    return openAIClient.getEmbeddings(chunks);
+  }
+  
+  public String buildPromptWithContext(String prompt) {
+    try {
+      var inputEmbedding = openAIClient.getEmbedding(getSearchQuery(prompt));
+      var sortedResult = vectorStore.loadIndex()
+          .findNearest(normalize(inputEmbedding), 10)
+          .stream()
+          .map(SearchResult::item)
+          .sorted(Comparator.comparing(Word::getMeta))
+          .collect(toList());
+
+      var context = sortedResult.stream().map(Word::id).collect(Collectors.joining());
+      var fileNames = sortedResult.stream().map(Word::getMeta).collect(Collectors.toSet());
+
+      return getResourceContent("/prompts/prompt-with-context.txt")
+          .replace("{prompt}", prompt)
+          .replace("{context}", new GeneratedContextDetails(context, fileNames).getContext());
+    } catch (IOException e) {
+      LOG.error("Unable to load vector index", e);
+      return prompt;
+    }
+  }
+
+  public List<Item<Object, double[]>> createEmbeddings(List<CheckedFile> checkedFiles, @Nullable ProgressIndicator indicator) {
+    var words = new ArrayList<Item<Object, double[]>>();
+    for (int i = 0; i < checkedFiles.size(); i++) {
+      try {
+        var checkedFile = checkedFiles.get(i);
+        addEmbeddings(checkedFile, words);
+
+        if (indicator != null) {
+          indicator.setFraction((double) i / checkedFiles.size());
+        }
+      } catch (Throwable t) {
+        // ignore
+      }
+    }
+    return words;
+  }
+
+  private String getSearchQuery(String userPrompt) throws JsonProcessingException {
+    var message = new OpenAIChatCompletionMessage("user", getResourceContent("/prompts/text-generator.txt").replace("{prompt}", userPrompt));
+    var request = new OpenAIChatCompletionRequest.Builder(List.of(message))
+        .setModel(OpenAIChatCompletionModel.GPT_4)
+        .setMaxTokens(400)
+        .setTemperature(0.1)
+        .setStream(false)
+        .build();
+
+    return openAIClient.getChatCompletion(request)
+        .getChoices()
+        .get(0)
+        .getMessage()
+        .getContent();
+  }
+
+  private void addEmbeddings(CheckedFile checkedFile, List<Item<Object, double[]>> prevEmbeddings) {
+    var fileExtension = checkedFile.getFileExtension();
+    var codeSplitter = SplitterFactory.getCodeSplitter(fileExtension);
+    if (codeSplitter != null) {
+      var chunks = codeSplitter.split(checkedFile.getFileName(), checkedFile.getFileContent());
+      var embeddings = openAIClient.getEmbeddings(chunks);
+      for (int i = 0; i < chunks.size(); i++) {
+        prevEmbeddings.add(new Word(chunks.get(i), checkedFile.getFileName(), normalize(embeddings.get(i))));
+      }
+    } else {
+      var chunks = splitText(checkedFile.getFileContent(), 400);
+      var embeddings = getEmbeddings(chunks);
+      for (int i = 0; i < chunks.size(); i++) {
+        prevEmbeddings.add(new Word(chunks.get(i), checkedFile.getFileName(), normalize(embeddings.get(i))));
+      }
+    }
+  }
+
+  private static List<String> splitText(String str, int chunkSize) {
+    int len = str.length();
+    var chunks = new ArrayList<String>();
+    for (int i = 0; i < len; i += chunkSize) {
+      chunks.add(str.substring(i, Math.min(len, i + chunkSize)));
+    }
+    return chunks;
+  }
+
+  // TODO: Move to shared module
+  private static String getResourceContent(String name) {
+    try (var stream = Objects.requireNonNull(EmbeddingsService.class.getResourceAsStream(name))) {
+      return new String(stream.readAllBytes(), StandardCharsets.UTF_8);
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to read resource", e);
+    }
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/embedding/GeneratedContextDetails.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/embedding/GeneratedContextDetails.java
@ -0,0 +1,22 @@
+package ee.carlrobert.embedding;
+
+import java.util.Set;
+
+public class GeneratedContextDetails {
+
+  private final String context;
+  private final Set<String> fileNames;
+
+  public GeneratedContextDetails(String context, Set<String> fileNames) {
+    this.context = context;
+    this.fileNames = fileNames;
+  }
+
+  public String getContext() {
+    return context;
+  }
+
+  public Set<String> getFileNames() {
+    return fileNames;
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/CodeSplitter.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/CodeSplitter.java
@ -0,0 +1,32 @@
+package ee.carlrobert.splitter;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CodePointCharStream;
+import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.misc.Interval;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeListener;
+import org.antlr.v4.runtime.tree.ParseTreeWalker;
+
+abstract class CodeSplitter implements Splitter {
+
+  protected List<String> chunks = new ArrayList<>();
+
+  protected abstract ParseTree getParseTree(CodePointCharStream charStream);
+
+  protected abstract ParseTreeListener getParseTreeListener();
+
+  protected String parseContext(ParserRuleContext ctx) {
+    return ctx.start.getInputStream().getText(
+        new Interval(ctx.start.getStartIndex(), ctx.stop.getStopIndex()));
+  }
+
+  @Override
+  public List<String> split(String fileName, String content) {
+    chunks = new ArrayList<>();
+    ParseTreeWalker.DEFAULT.walk(getParseTreeListener(), getParseTree(CharStreams.fromString(content)));
+    return chunks;
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/JavaCodeSplitter.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/JavaCodeSplitter.java
@ -0,0 +1,32 @@
+package ee.carlrobert.splitter;
+
+import grammar.JavaLexer;
+import grammar.JavaParser;
+import grammar.JavaParserBaseListener;
+import org.antlr.v4.runtime.CodePointCharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeListener;
+
+public class JavaCodeSplitter extends CodeSplitter {
+
+  @Override
+  protected ParseTree getParseTree(CodePointCharStream charStream) {
+    return new JavaParser(new CommonTokenStream(new JavaLexer(charStream))).compilationUnit();
+  }
+
+  @Override
+  protected ParseTreeListener getParseTreeListener() {
+    return new JavaParserBaseListener() {
+      @Override
+      public void enterConstructorDeclaration(JavaParser.ConstructorDeclarationContext ctx) {
+        chunks.add(parseContext(ctx));
+      }
+
+      @Override
+      public void enterMethodDeclaration(JavaParser.MethodDeclarationContext ctx) {
+        chunks.add(parseContext(ctx));
+      }
+    };
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/JsonSplitter.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/JsonSplitter.java
@ -0,0 +1,28 @@
+package ee.carlrobert.splitter;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.intellij.openapi.diagnostic.Logger;
+import java.util.ArrayList;
+import java.util.List;
+import org.json.JSONObject;
+
+public class JsonSplitter implements Splitter {
+
+  private static final Logger LOG = Logger.getInstance(JsonSplitter.class);
+
+  @Override
+  public List<String> split(String fileName, String content) {
+    var chunks = new ArrayList<String>();
+
+    try {
+      // TODO: Switch to ObjectMapper
+      for (var entry : new JSONObject(content).toMap().entrySet()) {
+        chunks.add(new ObjectMapper().writeValueAsString(entry));
+      }
+    } catch (JsonProcessingException e) {
+      LOG.error("Something went wrong while chunking the json", e);
+    }
+    return chunks;
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/PythonCodeSplitter.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/PythonCodeSplitter.java
@ -0,0 +1,27 @@
+package ee.carlrobert.splitter;
+
+import grammar.PythonLexer;
+import grammar.PythonParser;
+import grammar.PythonParserBaseListener;
+import org.antlr.v4.runtime.CodePointCharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeListener;
+
+public class PythonCodeSplitter extends CodeSplitter {
+
+  @Override
+  protected ParseTree getParseTree(CodePointCharStream charStream) {
+    return new PythonParser(new CommonTokenStream(new PythonLexer(charStream))).file_input();
+  }
+
+  @Override
+  protected ParseTreeListener getParseTreeListener() {
+    return new PythonParserBaseListener() {
+      @Override
+      public void enterClass_or_func_def_stmt(PythonParser.Class_or_func_def_stmtContext ctx) {
+        chunks.add(parseContext(ctx));
+      }
+    };
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/Splitter.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/Splitter.java
@ -0,0 +1,8 @@
+package ee.carlrobert.splitter;
+
+import java.util.List;
+
+public interface Splitter {
+
+  List<String> split(String fileName, String content);
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/SplitterFactory.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/SplitterFactory.java
@ -0,0 +1,22 @@
+package ee.carlrobert.splitter;
+
+import org.jetbrains.annotations.Nullable;
+
+public class SplitterFactory {
+
+  public static @Nullable Splitter getCodeSplitter(String fileExtension) {
+    switch (fileExtension) {
+      case "java":
+        return new JavaCodeSplitter();
+      case "py":
+        return new PythonCodeSplitter();
+      case "json":
+        return new JsonSplitter();
+      case "ts":
+      case "tsx":
+        return new TypeScriptCodeSplitter();
+      default:
+        return null;
+    }
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/splitter/TypeScriptCodeSplitter.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/splitter/TypeScriptCodeSplitter.java
@ -0,0 +1,22 @@
+package ee.carlrobert.splitter;
+
+import grammar.TypeScriptLexer;
+import grammar.TypeScriptParser;
+import grammar.TypeScriptParserBaseListener;
+import org.antlr.v4.runtime.CodePointCharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeListener;
+
+public class TypeScriptCodeSplitter extends CodeSplitter {
+
+  @Override
+  protected ParseTree getParseTree(CodePointCharStream charStream) {
+    return new TypeScriptParser(new CommonTokenStream(new TypeScriptLexer(charStream))).program();
+  }
+
+  @Override
+  protected ParseTreeListener getParseTreeListener() {
+    return new TypeScriptParserBaseListener() {};
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/vector/VectorStore.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/vector/VectorStore.java
@ -0,0 +1,60 @@
+package ee.carlrobert.vector;
+
+import com.github.jelmerk.knn.DistanceFunctions;
+import com.github.jelmerk.knn.Item;
+import com.github.jelmerk.knn.hnsw.HnswIndex;
+import com.intellij.openapi.application.ApplicationManager;
+import com.intellij.openapi.util.io.FileUtil;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.List;
+
+public class VectorStore {
+
+  private static VectorStore instance;
+
+  private final String storePath;
+
+  private VectorStore(Path pluginPath) {
+    this.storePath = getIndexStorePath(pluginPath.toString());
+  }
+
+  public static VectorStore getInstance(Path pluginPath) {
+    if (instance == null) {
+      instance = new VectorStore(pluginPath);
+    }
+    return instance;
+  }
+
+  public HnswIndex<Object, double[], Word, Object> loadIndex() throws IOException {
+    return loadIndex(storePath);
+  }
+
+  public HnswIndex<Object, double[], Word, Object> loadIndex(String path) throws IOException {
+    return HnswIndex.load(new File(path), this.getClass().getClassLoader());
+  }
+
+  public void save(List<Item<Object, double[]>> words) {
+    var hnswIndex = HnswIndex
+        .newBuilder(words.get(0).vector().length, DistanceFunctions.DOUBLE_COSINE_DISTANCE, words.size())
+        .build();
+    try {
+      hnswIndex.addAll(words);
+      hnswIndex.save(new File(storePath));
+    } catch (IOException | InterruptedException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public boolean isIndexExists() {
+    return FileUtil.exists(storePath);
+  }
+
+  private String getIndexStorePath(String pluginBasePath) {
+    if (ApplicationManager.getApplication().isUnitTestMode()) {
+      pluginBasePath = new File("src/test/resources/indexes").getAbsolutePath();
+    }
+    return pluginBasePath + File.separator + "hnsw.index";
+  }
+}
--- a/codegpt-core/src/main/java/ee/carlrobert/vector/Word.java
+++ b/codegpt-core/src/main/java/ee/carlrobert/vector/Word.java
@ -0,0 +1,46 @@
+package ee.carlrobert.vector;
+
+import com.github.jelmerk.knn.Item;
+import java.util.Arrays;
+
+public class Word implements Item<Object, double[]> {
+
+  private static final long serialVersionUID = 1L;
+
+  private final String id;
+  private final String meta;
+  private final double[] vector;
+
+  public Word(String id, String meta, double[] vector) {
+    this.id = id;
+    this.meta = meta;
+    this.vector = vector;
+  }
+
+  @Override
+  public String id() {
+    return id;
+  }
+
+  @Override
+  public double[] vector() {
+    return vector;
+  }
+
+  @Override
+  public int dimensions() {
+    return vector.length;
+  }
+
+  @Override
+  public String toString() {
+    return "Word{" +
+        "id='" + id + '\'' +
+        ", vector=" + Arrays.toString(vector) +
+        '}';
+  }
+
+  public String getMeta() {
+    return meta;
+  }
+}
--- a/codegpt-core/src/main/java/grammar/PythonLexerBase.java
+++ b/codegpt-core/src/main/java/grammar/PythonLexerBase.java
@ -0,0 +1,184 @@
+package grammar;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonToken;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Token;
+
+public abstract class PythonLexerBase extends Lexer {
+    public static int TabSize = 8;
+
+    // The amount of opened braces, brackets and parenthesis.
+    private int _opened;
+
+    // The stack that keeps track of the indentation level.
+    private final Deque<Integer> _indents = new ArrayDeque<>();
+
+    // A circular buffer where extra tokens are pushed on (see the NEWLINE and WS lexer rules).
+    private int _firstTokensInd;
+    private int _lastTokenInd;
+    private Token[] _buffer = new Token[32];
+    private Token _lastToken;
+
+    protected PythonLexerBase(CharStream input) {
+        super(input);
+    }
+
+    @Override
+    public void emit(Token token) {
+        super.setToken(token);
+
+        if (_buffer[_firstTokensInd] != null)
+        {
+            _lastTokenInd = IncTokenInd(_lastTokenInd);
+
+            if (_lastTokenInd == _firstTokensInd)
+            {
+                // Enlarge buffer
+                Token[] newArray = new Token[_buffer.length * 2];
+                int destInd = newArray.length - (_buffer.length - _firstTokensInd);
+
+                System.arraycopy(_buffer, 0, newArray, 0, _firstTokensInd);
+                System.arraycopy(_buffer, _firstTokensInd, newArray, destInd, _buffer.length - _firstTokensInd);
+
+                _firstTokensInd = destInd;
+                _buffer = newArray;
+            }
+        }
+
+        _buffer[_lastTokenInd] = token;
+        _lastToken = token;
+    }
+
+    @Override
+    public Token nextToken() {
+        // Check if the end-of-file is ahead and there are still some DEDENTS expected.
+        if (_input.LA(1) == EOF && _indents.size() > 0)
+        {
+            if (_buffer[_lastTokenInd] == null || _buffer[_lastTokenInd].getType() != PythonLexer.LINE_BREAK)
+            {
+                // First emit an extra line break that serves as the end of the statement.
+                emit(PythonLexer.LINE_BREAK);
+            }
+
+            // Now emit as much DEDENT tokens as needed.
+            while (_indents.size() != 0)
+            {
+                emit(PythonLexer.DEDENT);
+                _indents.pop();
+            }
+        }
+
+        Token next = super.nextToken();
+
+        if (_buffer[_firstTokensInd] == null)
+        {
+            return next;
+        }
+
+        Token result = _buffer[_firstTokensInd];
+        _buffer[_firstTokensInd] = null;
+
+        if (_firstTokensInd != _lastTokenInd)
+        {
+            _firstTokensInd = IncTokenInd(_firstTokensInd);
+        }
+
+        return result;
+    }
+
+    protected void HandleNewLine() {
+        emit(PythonLexer.NEWLINE, HIDDEN, getText());
+
+        char next = (char) _input.LA(1);
+
+        // Process whitespaces in HandleSpaces
+        if (next != ' ' && next != '\t' && IsNotNewLineOrComment(next))
+        {
+            ProcessNewLine(0);
+        }
+    }
+
+    protected void HandleSpaces() {
+        char next = (char) _input.LA(1);
+
+        if ((_lastToken == null || _lastToken.getType() == PythonLexer.NEWLINE) && IsNotNewLineOrComment(next))
+        {
+            // Calculates the indentation of the provided spaces, taking the
+            // following rules into account:
+            //
+            // "Tabs are replaced (from left to right) by one to eight spaces
+            //  such that the total number of characters up to and including
+            //  the replacement is a multiple of eight [...]"
+            //
+            //  -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
+
+            int indent = 0;
+            String text = getText();
+
+            for (int i = 0; i < text.length(); i++) {
+                indent += text.charAt(i) == '\t' ? TabSize - indent % TabSize : 1;
+            }
+
+            ProcessNewLine(indent);
+        }
+
+        emit(PythonLexer.WS, HIDDEN, getText());
+    }
+
+    protected void IncIndentLevel() {
+        _opened++;
+    }
+
+    protected void DecIndentLevel() {
+        if (_opened > 0) {
+            --_opened;
+        }
+    }
+
+    private boolean IsNotNewLineOrComment(char next) {
+        return _opened == 0 && next != '\r' && next != '\n' && next != '\f' && next != '#';
+    }
+
+    private void ProcessNewLine(int indent) {
+        emit(PythonLexer.LINE_BREAK);
+
+        int previous = _indents.size() == 0 ? 0 : _indents.peek();
+
+        if (indent > previous)
+        {
+            _indents.push(indent);
+            emit(PythonLexer.INDENT);
+        }
+        else
+        {
+            // Possibly emit more than 1 DEDENT token.
+            while (_indents.size() != 0 && _indents.peek() > indent)
+            {
+                emit(PythonLexer.DEDENT);
+                _indents.pop();
+            }
+        }
+    }
+
+    private int IncTokenInd(int ind) {
+        return (ind + 1) % _buffer.length;
+    }
+
+    private void emit(int tokenType) {
+        emit(tokenType, DEFAULT_TOKEN_CHANNEL, "");
+    }
+
+    private void emit(int tokenType, int channel, String text) {
+        int charIndex = getCharIndex();
+        CommonToken token = new CommonToken(_tokenFactorySourcePair, tokenType, channel, charIndex - text.length(), charIndex - 1);
+        token.setLine(getLine());
+        token.setCharPositionInLine(getCharPositionInLine());
+        token.setText(text);
+
+        emit(token);
+    }
+}
+
--- a/codegpt-core/src/main/java/grammar/PythonParserBase.java
+++ b/codegpt-core/src/main/java/grammar/PythonParserBase.java
@ -0,0 +1,26 @@
+package grammar;
+
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.TokenStream;
+
+public abstract class PythonParserBase extends Parser
+{
+    public PythonVersion Version = PythonVersion.Autodetect;
+
+    protected PythonParserBase(TokenStream input) {
+        super(input);
+    }
+
+    protected boolean CheckVersion(int version) {
+        return Version == PythonVersion.Autodetect || version == Version.getValue();
+    }
+
+    protected void SetVersion(int requiredVersion) {
+        if (requiredVersion == 2) {
+            Version = PythonVersion.Python2;
+        } else if (requiredVersion == 3) {
+            Version = PythonVersion.Python3;
+        }
+    }
+}
+
--- a/codegpt-core/src/main/java/grammar/PythonVersion.java
+++ b/codegpt-core/src/main/java/grammar/PythonVersion.java
@ -0,0 +1,17 @@
+package grammar;
+
+public enum PythonVersion {
+    Autodetect(0),
+    Python2(2),
+    Python3(3);
+
+    private final int value;
+
+    PythonVersion(int value) {
+        this.value = value;
+    }
+
+    public int getValue() {
+        return value;
+    }
+}
--- a/codegpt-core/src/main/java/grammar/TypeScriptLexerBase.java
+++ b/codegpt-core/src/main/java/grammar/TypeScriptLexerBase.java
@ -0,0 +1,166 @@
+package grammar;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Token;
+
+/**
+ * All lexer methods that used in grammar (IsStrictMode)
+ * should start with Upper Case Char similar to Lexer rules.
+ */
+public abstract class TypeScriptLexerBase extends Lexer
+{
+    /**
+     * Stores values of nested modes. By default mode is strict or
+     * defined externally (useStrictDefault)
+     */
+    private final Deque<Boolean> scopeStrictModes = new ArrayDeque<>();
+
+    private Token lastToken = null;
+    /**
+     * Default value of strict mode
+     * Can be defined externally by setUseStrictDefault
+     */
+    private boolean useStrictDefault = false;
+    /**
+     * Current value of strict mode
+     * Can be defined during parsing, see StringFunctions.js and StringGlobal.js samples
+     */
+    private boolean useStrictCurrent = false;
+    /**
+     * Keeps track of the current depth of nested template string backticks.
+     * E.g. after the X in:
+     *
+     * `${a ? `${X
+     *
+     * templateDepth will be 2. This variable is needed to determine if a `}` is a
+     * plain CloseBrace, or one that closes an expression inside a template string.
+     */
+    private int templateDepth = 0;
+
+    /**
+     * Keeps track of the depth of open- and close-braces. Used for expressions like:
+     *
+     * `${[1, 2, 3].map(x => { return x * 2;}).join("")}`
+     *
+     * where the '}' from `return x * 2;}` should not become a `TemplateCloseBrace`
+     * token but rather a `CloseBrace` token.
+     */
+    private int bracesDepth = 0;
+
+    public TypeScriptLexerBase(CharStream input) {
+        super(input);
+    }
+
+    public boolean getStrictDefault() {
+        return useStrictDefault;
+    }
+
+    public void setUseStrictDefault(boolean value) {
+        useStrictDefault = value;
+        useStrictCurrent = value;
+    }
+
+    public boolean IsStrictMode() {
+        return useStrictCurrent;
+    }
+
+    public void StartTemplateString() {
+        this.bracesDepth = 0;
+    }
+
+    public boolean IsInTemplateString() {
+        return this.templateDepth > 0 && this.bracesDepth == 0;
+    }
+
+    /**
+     * Return the next token from the character stream and records this last
+     * token in case it resides on the default channel. This recorded token
+     * is used to determine when the lexer could possibly match a regex
+     * literal. Also changes scopeStrictModes stack if tokenize special
+     * string 'use strict';
+     *
+     * @return the next token from the character stream.
+     */
+    @Override
+    public Token nextToken() {
+        Token next = super.nextToken();
+
+        if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+            // Keep track of the last token on the default channel.
+            this.lastToken = next;
+        }
+
+        return next;
+    }
+
+    protected void ProcessOpenBrace()
+    {
+        bracesDepth++;
+        useStrictCurrent = scopeStrictModes.size() > 0 && scopeStrictModes.peek() ? true : useStrictDefault;
+        scopeStrictModes.push(useStrictCurrent);
+    }
+
+    protected void ProcessCloseBrace()
+    {
+        bracesDepth--;
+        useStrictCurrent = scopeStrictModes.size() > 0 ? scopeStrictModes.pop() : useStrictDefault;
+    }
+
+    protected void ProcessStringLiteral()
+    {
+        if (lastToken == null || lastToken.getType() == TypeScriptLexer.OpenBrace)
+        {
+            String text = getText();
+            if (text.equals("\"use strict\"") || text.equals("'use strict'"))
+            {
+                if (scopeStrictModes.size() > 0)
+                    scopeStrictModes.pop();
+                useStrictCurrent = true;
+                scopeStrictModes.push(useStrictCurrent);
+            }
+        }
+    }
+
+    protected void IncreaseTemplateDepth() {
+        this.templateDepth++;
+    }
+
+    protected void DecreaseTemplateDepth() {
+        this.templateDepth--;
+    }
+
+    /**
+     * Returns {@code true} if the lexer can match a regex literal.
+     */
+    protected boolean IsRegexPossible() {
+                                       
+        if (this.lastToken == null) {
+            // No token has been produced yet: at the start of the input,
+            // no division is possible, so a regex literal _is_ possible.
+            return true;
+        }
+        
+        switch (this.lastToken.getType()) {
+            case TypeScriptLexer.Identifier:
+            case TypeScriptLexer.NullLiteral:
+            case TypeScriptLexer.BooleanLiteral:
+            case TypeScriptLexer.This:
+            case TypeScriptLexer.CloseBracket:
+            case TypeScriptLexer.CloseParen:
+            case TypeScriptLexer.OctalIntegerLiteral:
+            case TypeScriptLexer.DecimalLiteral:
+            case TypeScriptLexer.HexIntegerLiteral:
+            case TypeScriptLexer.StringLiteral:
+            case TypeScriptLexer.PlusPlus:
+            case TypeScriptLexer.MinusMinus:
+                // After any of the tokens above, no regex literal can follow.
+                return false;
+            default:
+                // In all other cases, a regex literal _is_ possible.
+                return true;
+        }
+    }
+}
--- a/codegpt-core/src/main/java/grammar/TypeScriptParserBase.java
+++ b/codegpt-core/src/main/java/grammar/TypeScriptParserBase.java
@ -0,0 +1,124 @@
+package grammar;
+
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.TokenStream;
+
+/**
+ * All parser methods that used in grammar (p, prev, notLineTerminator, etc.)
+ * should start with lower case char similar to parser rules.
+ */
+public abstract class TypeScriptParserBase extends Parser
+{
+    public TypeScriptParserBase(TokenStream input) {
+        super(input);
+    }
+
+    /**
+     * Short form for prev(String str)
+     */
+    protected boolean p(String str) {
+        return prev(str);
+    }
+
+    /**
+     * Whether the previous token value equals to @param str
+     */
+    protected boolean prev(String str) {
+        return _input.LT(-1).getText().equals(str);
+    }
+
+    /**
+     * Short form for next(String str)
+     */
+    protected boolean n(String str) {
+        return next(str);
+    }
+
+    /**
+     * Whether the next token value equals to @param str
+     */
+    protected boolean next(String str) {
+        return _input.LT(1).getText().equals(str);
+    }
+
+    protected boolean notLineTerminator() {
+        return !here(TypeScriptParser.LineTerminator);
+    }
+
+    protected boolean notOpenBraceAndNotFunction() {
+        int nextTokenType = _input.LT(1).getType();
+        return nextTokenType != TypeScriptParser.OpenBrace && nextTokenType != TypeScriptParser.Function_;
+    }
+
+    protected boolean closeBrace() {
+        return _input.LT(1).getType() == TypeScriptParser.CloseBrace;
+    }
+    
+    /**
+     * Returns {@code true} iff on the current index of the parser's
+     * token stream a token of the given {@code type} exists on the
+     * {@code HIDDEN} channel.
+     *
+     * @param type
+     *         the type of the token on the {@code HIDDEN} channel
+     *         to check.
+     *
+     * @return {@code true} iff on the current index of the parser's
+     * token stream a token of the given {@code type} exists on the
+     * {@code HIDDEN} channel.
+     */
+    private boolean here(final int type) {
+
+        // Get the token ahead of the current index.
+        int possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 1;
+        Token ahead = _input.get(possibleIndexEosToken);
+
+        // Check if the token resides on the HIDDEN channel and if it's of the
+        // provided type.
+        return (ahead.getChannel() == Lexer.HIDDEN) && (ahead.getType() == type);
+    }
+    
+    /**
+     * Returns {@code true} iff on the current index of the parser's
+     * token stream a token exists on the {@code HIDDEN} channel which
+     * either is a line terminator, or is a multi line comment that
+     * contains a line terminator.
+     *
+     * @return {@code true} iff on the current index of the parser's
+     * token stream a token exists on the {@code HIDDEN} channel which
+     * either is a line terminator, or is a multi line comment that
+     * contains a line terminator.
+     */
+    protected boolean lineTerminatorAhead() {
+
+        // Get the token ahead of the current index.
+        int possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 1;
+        Token ahead = _input.get(possibleIndexEosToken);
+
+        if (ahead.getChannel() != Lexer.HIDDEN) {
+            // We're only interested in tokens on the HIDDEN channel.
+            return false;
+        }
+
+        if (ahead.getType() == TypeScriptParser.LineTerminator) {
+            // There is definitely a line terminator ahead.
+            return true;
+        }
+
+        if (ahead.getType() == TypeScriptParser.WhiteSpaces) {
+            // Get the token ahead of the current whitespaces.
+            possibleIndexEosToken = this.getCurrentToken().getTokenIndex() - 2;
+            ahead = _input.get(possibleIndexEosToken);
+        }
+
+        // Get the token's text and type.
+        String text = ahead.getText();
+        int type = ahead.getType();
+
+        // Check if the token is, or contains a line terminator.
+        return (type == TypeScriptParser.MultiLineComment && (text.contains("\r") || text.contains("\n"))) ||
+                (type == TypeScriptParser.LineTerminator);
+    }
+}