55 lines
2.6 KiB
TypeScript
55 lines
2.6 KiB
TypeScript
/*!
|
|
* Copyright 2016 The ANTLR Project. All rights reserved.
|
|
* Licensed under the BSD-3-Clause license. See LICENSE file in the project root for license information.
|
|
*/
|
|
import { CodePointCharStream } from "./CodePointCharStream";
|
|
/** This class represents the primary interface for creating {@link CharStream}s
|
|
* from a variety of sources as of 4.7. The motivation was to support
|
|
* Unicode code points > U+FFFF. {@link ANTLRInputStream} and
|
|
* {@link ANTLRFileStream} are now deprecated in favor of the streams created
|
|
* by this interface.
|
|
*
|
|
* DEPRECATED: {@code new ANTLRFileStream("myinputfile")}
|
|
* NEW: {@code CharStreams.fromFileName("myinputfile")}
|
|
*
|
|
* WARNING: If you use both the deprecated and the new streams, you will see
|
|
* a nontrivial performance degradation. This speed hit is because the
|
|
* {@link Lexer}'s internal code goes from a monomorphic to megamorphic
|
|
* dynamic dispatch to get characters from the input stream. Java's
|
|
* on-the-fly compiler (JIT) is unable to perform the same optimizations
|
|
* so stick with either the old or the new streams, if performance is
|
|
* a primary concern. See the extreme debugging and spelunking
|
|
* needed to identify this issue in our timing rig:
|
|
*
|
|
* https://github.com/antlr/antlr4/pull/1781
|
|
*
|
|
* The ANTLR character streams still buffer all the input when you create
|
|
* the stream, as they have done for ~20 years. If you need unbuffered
|
|
* access, please note that it becomes challenging to create
|
|
* parse trees. The parse tree has to point to tokens which will either
|
|
* point into a stale location in an unbuffered stream or you have to copy
|
|
* the characters out of the buffer into the token. That defeats the purpose
|
|
* of unbuffered input. Per the ANTLR book, unbuffered streams are primarily
|
|
* useful for processing infinite streams *during the parse.*
|
|
*
|
|
* The new streams also use 8-bit buffers when possible so this new
|
|
* interface supports character streams that use half as much memory
|
|
* as the old {@link ANTLRFileStream}, which assumed 16-bit characters.
|
|
*
|
|
* A big shout out to Ben Hamilton (github bhamiltoncx) for his superhuman
|
|
* efforts across all targets to get true Unicode 3.1 support for U+10FFFF.
|
|
*
|
|
* @since 4.7
|
|
*/
|
|
export declare namespace CharStreams {
|
|
/**
|
|
* Creates a {@link CharStream} given a {@link String}.
|
|
*/
|
|
function fromString(s: string): CodePointCharStream;
|
|
/**
|
|
* Creates a {@link CharStream} given a {@link String} and the {@code sourceName}
|
|
* from which it came.
|
|
*/
|
|
function fromString(s: string, sourceName: string): CodePointCharStream;
|
|
}
|