import { Bullets } from 'components/Bullets/Bullets';
import { Italic } from 'components/Italic/Italic';
import { Chapter } from 'components/Chapter/Chapter';
import { ReactElement } from 'react';
import { Link, RouteComponentProps } from 'react-router-dom';
import { RouteBuilder } from 'utils/RouteBuilder';
import { DocFrame } from '../DocFrame/DocFrame';

export type DatasetsDocProps = RouteComponentProps;

export const DatasetsDoc = (props: DatasetsDocProps): ReactElement => {

    return (
        <DocFrame title="Datasets" {...props}>
            <Chapter>
                <Chapter.Paragraph>
                    A dataset is comprised of a set of values formatted in rows and columns, similar to 
                    a database table. A dataset is the result of a query of data from a source that is 
                    optionally processed thereafter. For example, a dataset may be the result of fetching
                    data from a REST API and then processing the results to parse and transform
                    the data. The last step in creating a dataset is defining a schema, that includes the 
                    column names, types, descriptions, and order. Datasets are made available to other parts of 
                    the platform, such as <Link to={RouteBuilder.MAIN_DOCUMENTATION_MODELS}>models</Link> and 
                    &nbsp;<Link to={RouteBuilder.MAIN_DOCUMENTATION_WORKFLOWS}>workflows</Link>, 
                    through queries. Variables can be defined and used in any of the processing steps and those variables 
                    can be exposed to allow users to configure aspects of the query to affect the final result. 
                </Chapter.Paragraph>
                <Chapter.Section title="Variables">
                    <Chapter.Paragraph>
                        <Italic>Variables</Italic> are value placeholders that can be changed by users to affect
                        processing. Variables have a name, description, and type and can be used in several parts
                        of the system to link components and parameterize how a component works by allowing users
                        to change their values to get different results. For example, a variable can be used to change 
                        a date in a query to change the range of data retrieved. Any number of variables may be 
                        defined based on the complexity of a query. These variables are exposed to users via the
                        query user interface allowing for the query inputs to be altered to change the results.
                    </Chapter.Paragraph>
                    <Chapter.Paragraph>
                        A variable is referenced using curly brackets, prefixed by a dollar sign. For example, if 
                        a variable called "date" has been defined, it can be referenced using the syntax $&#123;date&#125;
                        elsewhere, for example in a query to fetch data.
                    </Chapter.Paragraph>
                </Chapter.Section>
                <Chapter.Section title="Processors">
                    <Chapter.Paragraph>
                        In order to build a dataset, a sequence of <Italic>processors</Italic> is used to fetch and
                        manipulate data to prepare the final output. Processors are units of work that
                        perform a singe operation on data and pass the results on to the next processor.
                        The output of a processor is internally normalized so that it can be automatically
                        used as input to the next processor. In some special cases, the user will need to 
                        configure the output, but this is rarely required. There are four types of processors 
                        that can be configured:
                    </Chapter.Paragraph>
                    <Bullets
                        type="ordered"
                        items={[
                            "Readers",
                            "Parsers",
                            "Transformers",
                            "Writers"
                        ]}
                    />
                    <Chapter.Paragraph>
                        <Italic>Readers</Italic> connect to external data sources and pull in raw data. For example,
                        a REST API reader will connect to a 3rd party API and get a response. <Italic>Parsers</Italic> are
                        typically sed to dissect reader responses and extract some desired data. <Italic>Transformers</Italic> can
                        make modifications to the data in order to adjust values, add fields, or reorganize the data. For example,
                        a date field may be translated to a different format or a set of rows can be reversed.
                        <Italic>Writers</Italic> push data to a destination, such as a database or an email address. To construct
                        a dataset, processors are usually used in the above order, with some processor types as optional if they
                        are not needed.
                    </Chapter.Paragraph>
                    <Chapter.Paragraph>
                        Some of the supported Readers include fetching data from a REST API endpoint, scraping data from a web page,
                        reading data from an existing model (individual values or data ranges), fetching data from an existing dataset,
                        and internal data stores, including timeseries and key/value storage.
                    </Chapter.Paragraph>
                    <Chapter.Paragraph>
                        When building a dataset, the first processor is typically a Reader (gets some data to work with) and the
                        last processor is a Writer (sends the data to a destination). Between the two are usually one or more 
                        Parsers or Transformers.
                    </Chapter.Paragraph>
                </Chapter.Section>
                <Chapter.Section title="Schema">
                    <Chapter.Paragraph>
                        The final step in building a dataset is the definition of the <Italic>schema</Italic>. A schema will
                        refine the final output by assigning field names, descriptions, and order. Fields can also be selectively
                        removed if they are not desired. A default schema will automatically be generated based on sample data
                        that is generated from the sequence of processors. From the default, you can then refine the schema
                        by changing the properties of any of the fields.
                    </Chapter.Paragraph>
                </Chapter.Section>
            </Chapter>
        </DocFrame>
    );

}
