@Namespace(value="parquet::arrow") @Properties(inherit=parquet.class) public class FileReader extends Pointer
FileReader::ReadTable
method.
More advanced users that also want to implement parallelism on top of each
single Parquet files should do this on the RowGroup level. For this, they can
call FileReader::RowGroup(i)->ReadTable
to receive only the specified
RowGroup as a table.
In the most advanced situation, where a consumer wants to independently read
RowGroups in parallel and consume each column individually, they can call
FileReader::RowGroup(i)->Column(j)->Read
and receive an arrow::Column
instance.
The parquet format supports an optional integer field_id which can be assigned
to a field. Arrow will convert these field IDs to a metadata key named
PARQUET:field_id on the appropriate field.Pointer.CustomDeallocator, Pointer.Deallocator, Pointer.NativeDeallocator, Pointer.ReferenceCounter
Constructor and Description |
---|
FileReader(Pointer p)
Pointer cast constructor.
|
Modifier and Type | Method and Description |
---|---|
Status |
GetColumn(int i,
ColumnReader out) |
Status |
GetRecordBatchReader(int[] row_group_indices,
int[] column_indices,
RecordBatchReader out) |
Status |
GetRecordBatchReader(int[] row_group_indices,
RecordBatchReader out) |
Status |
GetRecordBatchReader(IntBuffer row_group_indices,
IntBuffer column_indices,
RecordBatchReader out) |
Status |
GetRecordBatchReader(IntBuffer row_group_indices,
RecordBatchReader out) |
Status |
GetRecordBatchReader(IntPointer row_group_indices,
IntPointer column_indices,
RecordBatchReader out)
\brief Return a RecordBatchReader of row groups selected from
row_group_indices, whose columns are selected by column_indices.
|
Status |
GetRecordBatchReader(IntPointer row_group_indices,
RecordBatchReader out)
\brief Return a RecordBatchReader of row groups selected from row_group_indices.
|
Status |
GetSchema(Schema out)
\brief Return arrow schema for all the columns.
|
static Status |
Make(MemoryPool pool,
ParquetFileReader reader,
ArrowReaderProperties properties,
FileReader out)
Factory function to create a FileReader from a ParquetFileReader and properties
|
static Status |
Make(MemoryPool pool,
ParquetFileReader reader,
FileReader out)
Factory function to create a FileReader from a ParquetFileReader
|
SchemaManifest |
manifest() |
int |
num_row_groups()
\brief The number of row groups in the file
|
ParquetFileReader |
parquet_reader() |
ArrowReaderProperties |
properties() |
Status |
ReadColumn(int i,
ChunkedArray out)
\brief Read column as a whole into a chunked array.
|
Status |
ReadRowGroup(int i,
int[] column_indices,
Table out) |
Status |
ReadRowGroup(int i,
IntBuffer column_indices,
Table out) |
Status |
ReadRowGroup(int i,
IntPointer column_indices,
Table out) |
Status |
ReadRowGroup(int i,
Table out) |
Status |
ReadRowGroups(int[] row_groups,
int[] column_indices,
Table out) |
Status |
ReadRowGroups(int[] row_groups,
Table out) |
Status |
ReadRowGroups(IntBuffer row_groups,
IntBuffer column_indices,
Table out) |
Status |
ReadRowGroups(IntBuffer row_groups,
Table out) |
Status |
ReadRowGroups(IntPointer row_groups,
IntPointer column_indices,
Table out) |
Status |
ReadRowGroups(IntPointer row_groups,
Table out) |
Status |
ReadSchemaField(int i,
ChunkedArray out) |
Status |
ReadTable(int[] column_indices,
Table out) |
Status |
ReadTable(IntBuffer column_indices,
Table out) |
Status |
ReadTable(IntPointer column_indices,
Table out)
\brief Read the given columns into a Table
The indicated column indices are relative to the schema
|
Status |
ReadTable(Table out)
Read all columns into a Table
|
RowGroupReader |
RowGroup(int row_group_index)
\brief Return a reader for the RowGroup, this object must not outlive the
FileReader.
|
Status |
ScanContents(int[] columns,
int column_batch_size,
long[] num_rows) |
Status |
ScanContents(IntBuffer columns,
int column_batch_size,
LongBuffer num_rows) |
Status |
ScanContents(IntPointer columns,
int column_batch_size,
LongPointer num_rows)
\brief Scan file contents with one thread, return number of rows
|
void |
set_batch_size(long batch_size)
Set number of records to read per batch for the RecordBatchReader.
|
void |
set_use_threads(boolean use_threads)
Set whether to use multiple threads during reads of multiple columns.
|
address, asBuffer, asByteBuffer, availablePhysicalBytes, calloc, capacity, capacity, close, deallocate, deallocate, deallocateReferences, deallocator, deallocator, equals, fill, formatBytes, free, getDirectBufferAddress, getPointer, getPointer, getPointer, getPointer, hashCode, interruptDeallocatorThread, isNull, isNull, limit, limit, malloc, maxBytes, maxPhysicalBytes, memchr, memcmp, memcpy, memmove, memset, offsetAddress, offsetof, offsetof, parseBytes, physicalBytes, physicalBytesInaccurate, position, position, put, realloc, referenceCount, releaseReference, retainReference, setNull, sizeof, sizeof, toString, totalBytes, totalCount, totalPhysicalBytes, withDeallocator, zero
public FileReader(Pointer p)
Pointer(Pointer)
.@ByVal public static Status Make(MemoryPool pool, @UniquePtr @ByVal ParquetFileReader reader, @Const @ByRef ArrowReaderProperties properties, @UniquePtr FileReader out)
@ByVal public static Status Make(MemoryPool pool, @UniquePtr @ByVal ParquetFileReader reader, @UniquePtr FileReader out)
@ByVal public Status GetColumn(int i, @UniquePtr ColumnReader out)
@ByVal public Status GetSchema(@SharedPtr Schema out)
@ByVal public Status ReadColumn(int i, @SharedPtr ChunkedArray out)
@ByVal public Status ReadSchemaField(int i, @SharedPtr ChunkedArray out)
@ByVal public Status GetRecordBatchReader(@StdVector IntPointer row_group_indices, @UniquePtr RecordBatchReader out)
@ByVal public Status GetRecordBatchReader(@StdVector IntBuffer row_group_indices, @UniquePtr RecordBatchReader out)
@ByVal public Status GetRecordBatchReader(@StdVector int[] row_group_indices, @UniquePtr RecordBatchReader out)
@ByVal public Status GetRecordBatchReader(@StdVector IntPointer row_group_indices, @StdVector IntPointer column_indices, @UniquePtr RecordBatchReader out)
@ByVal public Status GetRecordBatchReader(@StdVector IntBuffer row_group_indices, @StdVector IntBuffer column_indices, @UniquePtr RecordBatchReader out)
@ByVal public Status GetRecordBatchReader(@StdVector int[] row_group_indices, @StdVector int[] column_indices, @UniquePtr RecordBatchReader out)
@ByVal public Status ReadTable(@SharedPtr Table out)
@ByVal public Status ReadTable(@StdVector IntPointer column_indices, @SharedPtr Table out)
@ByVal public Status ReadTable(@StdVector IntBuffer column_indices, @SharedPtr Table out)
@ByVal public Status ReadTable(@StdVector int[] column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroup(int i, @StdVector IntPointer column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroup(int i, @StdVector IntBuffer column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroup(int i, @StdVector int[] column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroup(int i, @SharedPtr Table out)
@ByVal public Status ReadRowGroups(@StdVector IntPointer row_groups, @StdVector IntPointer column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroups(@StdVector IntBuffer row_groups, @StdVector IntBuffer column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroups(@StdVector int[] row_groups, @StdVector int[] column_indices, @SharedPtr Table out)
@ByVal public Status ReadRowGroups(@StdVector IntPointer row_groups, @SharedPtr Table out)
@ByVal public Status ReadRowGroups(@StdVector IntBuffer row_groups, @SharedPtr Table out)
@ByVal public Status ReadRowGroups(@StdVector int[] row_groups, @SharedPtr Table out)
@ByVal public Status ScanContents(@StdVector IntPointer columns, int column_batch_size, @Cast(value="int64_t*") LongPointer num_rows)
@ByVal public Status ScanContents(@StdVector IntBuffer columns, int column_batch_size, @Cast(value="int64_t*") LongBuffer num_rows)
@ByVal public Status ScanContents(@StdVector int[] columns, int column_batch_size, @Cast(value="int64_t*") long[] num_rows)
@SharedPtr public RowGroupReader RowGroup(int row_group_index)
public int num_row_groups()
public ParquetFileReader parquet_reader()
public void set_use_threads(@Cast(value="bool") boolean use_threads)
public void set_batch_size(@Cast(value="int64_t") long batch_size)
@Const @ByRef public ArrowReaderProperties properties()
@Const @ByRef public SchemaManifest manifest()
Copyright © 2022. All rights reserved.