VTK
|
Extracts text from documents based on their MIME type. More...
#include <vtkTextExtraction.h>
Public Types | |
typedef vtkTableAlgorithm | Superclass |
![]() | |
typedef vtkAlgorithm | Superclass |
![]() | |
typedef vtkObject | Superclass |
![]() | |
typedef vtkObjectBase | Superclass |
Public Member Functions | |
virtual const char * | GetClassName () |
virtual int | IsA (const char *type) |
void | PrintSelf (ostream &os, vtkIndent indent) |
void | ClearStrategies () |
void | PrependStrategy (vtkTextExtractionStrategy *strategy) |
void | AppendStrategy (vtkTextExtractionStrategy *strategy) |
virtual void | SetOutputArray (const char *) |
virtual char * | GetOutputArray () |
![]() | |
virtual int | ProcessRequest (vtkInformation *, vtkInformationVector **, vtkInformationVector *) |
vtkTable * | GetOutput () |
vtkTable * | GetOutput (int index) |
void | SetInput (vtkDataObject *obj) |
void | SetInput (int index, vtkDataObject *obj) |
![]() | |
int | HasExecutive () |
vtkExecutive * | GetExecutive () |
virtual void | SetExecutive (vtkExecutive *executive) |
virtual int | ModifyRequest (vtkInformation *request, int when) |
vtkInformation * | GetInputPortInformation (int port) |
vtkInformation * | GetOutputPortInformation (int port) |
int | GetNumberOfInputPorts () |
int | GetNumberOfOutputPorts () |
void | UpdateProgress (double amount) |
vtkInformation * | GetInputArrayInformation (int idx) |
void | RemoveAllInputs () |
vtkDataObject * | GetOutputDataObject (int port) |
virtual void | RemoveInputConnection (int port, vtkAlgorithmOutput *input) |
int | GetNumberOfInputConnections (int port) |
int | GetTotalNumberOfInputConnections () |
vtkAlgorithmOutput * | GetInputConnection (int port, int index) |
virtual void | Update () |
virtual void | UpdateInformation () |
virtual void | UpdateWholeExtent () |
void | ConvertTotalInputToPortConnection (int ind, int &port, int &conn) |
virtual double | ComputePriority () |
int | ProcessRequest (vtkInformation *request, vtkCollection *inInfo, vtkInformationVector *outInfo) |
virtual int | ComputePipelineMTime (vtkInformation *request, vtkInformationVector **inInfoVec, vtkInformationVector *outInfoVec, int requestFromOutputPort, unsigned long *mtime) |
virtual vtkInformation * | GetInformation () |
virtual void | SetInformation (vtkInformation *) |
virtual void | Register (vtkObjectBase *o) |
virtual void | UnRegister (vtkObjectBase *o) |
virtual void | SetAbortExecute (int) |
virtual int | GetAbortExecute () |
virtual void | AbortExecuteOn () |
virtual void | AbortExecuteOff () |
virtual void | SetProgress (double) |
virtual double | GetProgress () |
void | SetProgressText (const char *ptext) |
virtual char * | GetProgressText () |
virtual unsigned long | GetErrorCode () |
virtual void | SetInputArrayToProcess (int idx, int port, int connection, int fieldAssociation, const char *name) |
virtual void | SetInputArrayToProcess (int idx, int port, int connection, int fieldAssociation, int fieldAttributeType) |
virtual void | SetInputArrayToProcess (int idx, vtkInformation *info) |
virtual void | SetInputArrayToProcess (int idx, int port, int connection, const char *fieldAssociation, const char *attributeTypeorName) |
vtkDataObject * | GetInputDataObject (int port, int connection) |
virtual void | SetInputConnection (int port, vtkAlgorithmOutput *input) |
virtual void | SetInputConnection (vtkAlgorithmOutput *input) |
virtual void | AddInputConnection (int port, vtkAlgorithmOutput *input) |
virtual void | AddInputConnection (vtkAlgorithmOutput *input) |
vtkAlgorithmOutput * | GetOutputPort (int index) |
vtkAlgorithmOutput * | GetOutputPort () |
virtual void | SetReleaseDataFlag (int) |
virtual int | GetReleaseDataFlag () |
void | ReleaseDataFlagOn () |
void | ReleaseDataFlagOff () |
int | UpdateExtentIsEmpty (vtkDataObject *output) |
int | UpdateExtentIsEmpty (vtkInformation *pinfo, int extentType) |
![]() | |
virtual void | DebugOn () |
virtual void | DebugOff () |
unsigned char | GetDebug () |
void | SetDebug (unsigned char debugFlag) |
virtual void | Modified () |
virtual unsigned long | GetMTime () |
unsigned long | AddObserver (unsigned long event, vtkCommand *, float priority=0.0f) |
unsigned long | AddObserver (const char *event, vtkCommand *, float priority=0.0f) |
vtkCommand * | GetCommand (unsigned long tag) |
void | RemoveObserver (vtkCommand *) |
void | RemoveObservers (unsigned long event, vtkCommand *) |
void | RemoveObservers (const char *event, vtkCommand *) |
int | HasObserver (unsigned long event, vtkCommand *) |
int | HasObserver (const char *event, vtkCommand *) |
void | RemoveObserver (unsigned long tag) |
void | RemoveObservers (unsigned long event) |
void | RemoveObservers (const char *event) |
void | RemoveAllObservers () |
int | HasObserver (unsigned long event) |
int | HasObserver (const char *event) |
template<class U , class T > | |
unsigned long | AddObserver (unsigned long event, U observer, void(T::*callback)(), float priority=0.0f) |
template<class U , class T > | |
unsigned long | AddObserver (unsigned long event, U observer, void(T::*callback)(vtkObject *, unsigned long, void *), float priority=0.0f) |
int | InvokeEvent (unsigned long event, void *callData) |
int | InvokeEvent (const char *event, void *callData) |
int | InvokeEvent (unsigned long event) |
int | InvokeEvent (const char *event) |
![]() | |
const char * | GetClassName () const |
virtual void | Delete () |
virtual void | FastDelete () |
void | Print (ostream &os) |
void | SetReferenceCount (int) |
void | PrintRevisions (ostream &os) |
virtual void | PrintHeader (ostream &os, vtkIndent indent) |
virtual void | PrintTrailer (ostream &os, vtkIndent indent) |
int | GetReferenceCount () |
Static Public Member Functions | |
static vtkTextExtraction * | New () |
static int | IsTypeOf (const char *type) |
static vtkTextExtraction * | SafeDownCast (vtkObject *o) |
Additional Inherited Members | |
![]() | |
int | AbortExecute |
![]() | |
static vtkInformationIntegerKey * | PORT_REQUIREMENTS_FILLED () |
![]() | |
vtkInformation * | Information |
double | Progress |
char * | ProgressText |
unsigned long | ErrorCode |
![]() | |
static vtkExecutive * | DefaultExecutivePrototype |
![]() |
Extracts text from documents based on their MIME type.
Given a table containing document ids, URIs, Mime types and document contents, extracts plain text from each document, and generates a list of 'tags' that delineate ranges of text. The actual work of extracting text and generating tags is performed by an ordered list of vtkTextExtractionStrategy objects.
By default, vtkTextExtraction has just a single strategy for extracting plain text documents. Callers will almost certainly want to supplement or replace the default with their own strategies.
Inputs: Input port 0: (required) A vtkTable containing document ids, Mime types and document contents (which could be binary).
Outputs: Output port 0: The same table with an additional "text" column that contains the text extracted from each document. Output port 1: A table of document tags that includes "document", "uri", "begin", "end", and "type" columns.
Use SetInputArrayToProcess(0, ...) to specify the input table column that contains document ids (must be a vtkIdTypeArray). Default: "document".
Use SetInputArrayToProcess(1, ...) to specify the input table column that contains URIs (must be a vtkStringArray). Default: "uri".
Use SetInputArrayToProcess(2, ...) to specify the input table column that contains Mime types (must be a vtkStringArray). Default: "mime_type".
Use SetInputArrayToProcess(3, ...) to specify the input table column that contains document contents (must be a vtkStringArray). Default: "content".
Definition at line 80 of file vtkTextExtraction.h.
Definition at line 85 of file vtkTextExtraction.h.
|
protected |
|
protected |
|
static |
Create an object with Debug turned off, modified time initialized to zero, and reference counting on.
Reimplemented from vtkTableAlgorithm.
|
virtual |
Reimplemented from vtkTableAlgorithm.
|
static |
Return 1 if this class type is the same type of (or a subclass of) the named class. Returns 0 otherwise. This method works in combination with vtkTypeMacro found in vtkSetGet.h.
Reimplemented from vtkTableAlgorithm.
|
virtual |
Return 1 if this class is the same type of (or a subclass of) the named class. Returns 0 otherwise. This method works in combination with vtkTypeMacro found in vtkSetGet.h.
Reimplemented from vtkTableAlgorithm.
|
static |
Reimplemented from vtkTableAlgorithm.
|
virtual |
Methods invoked by print to print information about the object including superclasses. Typically not called by the user (use Print() instead) but used in the hierarchical print process to combine the output of several classes.
Reimplemented from vtkTableAlgorithm.
void vtkTextExtraction::ClearStrategies | ( | ) |
Clear the list of strategies.
void vtkTextExtraction::PrependStrategy | ( | vtkTextExtractionStrategy * | strategy | ) |
Prepend a strategy to the list of strategies. vtkTextExtraction assumes ownership of the supplied object.
void vtkTextExtraction::AppendStrategy | ( | vtkTextExtractionStrategy * | strategy | ) |
Prepend a strategy to the list of strategies. vtkTextExtraction assumes ownership of the supplied object.
|
virtual |
Specifies the name of the output text array. Default: "text".
|
virtual |
Specifies the name of the output text array. Default: "text".
|
protectedvirtual |
This is called by the superclass. This is the method you should override.
Reimplemented from vtkTableAlgorithm.