commit 0afeca617b728093633bef2531fc6cacca819180 Author: perro Date: Wed Jan 25 17:59:39 2023 -0800 Init diff --git a/README.md b/README.md new file mode 100644 index 0000000..d70ce79 --- /dev/null +++ b/README.md @@ -0,0 +1,411 @@ +--- +warn: This is just a prototype. +--- + +# YASD, Yet Another Schema Definition + +YASD is a YAML format for human writable XSDs (XML Schema Definition), humans +declare what is indispensable, leaving the machines to do the rest of the +unreadable ``. + +## Structure + +General structure: + + schema: + SCHM + elements: + - ELMT + ... + attributes: + - ATTR + ... + groups: + - GRPS + +Schema (SCHM) structure: + + elementFormDefault: qualified|unqualified + targetNamespace: http://a.link + xmlns: http://a.link + schemaLocation: http://a-link-to.xsd + version: 0.1 + +Element (ELMT) structure: + + name: element_name + description: Element description + type: simple|empty|no_text|no_elements|mixed + datatype: string|integer|decimal|date|time|language|duration|token|boolean|byte|int|double|float|long|short|normalizedString|dateTime|gDay|gMonth|gMonthDay|gYear|gYearMonth|negativeInteger|nonNegativeInteger|nonPositiveInteger|positiveInteger|unsignedLong|unsignedInt|unsignedShort|unsignedByte|anyURI|base64Binary|hexBinary|Name|QName|NCName|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|NOTATION + default: a_value + fixed: a_value + restriction: + CONSTRAIN + ... + attribute: + - attribute_name + ... + children_order: all|choice|sequence + children: + - name: element_name + maxOccurs: INTEGER + minOccurs: INTEGER + - group: group_name + ... + +Attribute (ATTR) structure: + + name: attribute_name + description: Attribute description + datatype: string|integer|decimal|date|time|language|duration|token|boolean|byte|int|double|float|long|short|normalizedString|dateTime|gDay|gMonth|gMonthDay|gYear|gYearMonth|negativeInteger|nonNegativeInteger|nonPositiveInteger|positiveInteger|unsignedLong|unsignedInt|unsignedShort|unsignedByte|anyURI|base64Binary|hexBinary|Name|QName|NCName|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|NOTATION + default: a_value + fixed: a_value + use: required + restriction: + CONSTRAIN + ... + +Group (GRPS) structure: + + name: group_name + attribute_group: true|false + children_order: all|choice|sequence + children: + - name: element_name + maxOccurs: INTEGER|unbounded + minOccurs: INTEGER + +## Reference + +### `elementFormDefault`[^1] + +Indicates that any elements used by the XML instance document which were +declared in this schema must be namespace qualified. + +Optional; if not present by default is `unqualified`. + +### `taget_namespace`[^1] + +Indicates that the elements defined by this schema come from the specified URL +namespace. + +Optional. + +### `xmlns`[^1] + +Indicates that the default namespace is the specified URL. + +Mandatory. + +### `schemaLocation`[^1] + +Indicates the location of the XML schema to use for that namespace. + +Optional. + +### `version` + +Indicates XML schema version. + +Mandatory. + +### `name` + +Indicates element or attribute name. + +Mandatory. + +For elements, its name is commonly known as tag name. + +Naming rules: + +- Element names are case-sensitive +- Element names must start with a letter or underscore +- Element names cannot start with the letters xml (or XML, or Xml, etc) +- Element names can contain letters, digits, hyphens, underscores, and periods +- Element names cannot contain spaces + +### `description` + +Indicates element or attribute description in human readable form. + +Optional. + +### `type` + +Indicates element type. + +Mandatory. + +Allowed types: + +- `simple`. Only text node allowed. +- `empty`. Only attributes allowed. +- `no_text`. No text nodes allowed. +- `no_elements`. No children elements allowed. +- `mixed`. Children elements, text node and attributes allowed. + +Chart: + +|type | children elements | text node | attributes | +|------------|:-----------------:|:----------:|:----------:| +|simple | ✗ | ✓ | ✗ | +|empty | ✗ | ✗ | ✓ | +|no_text | ✓ | ✗ | ✓ | +|no_elements | ✗ | ✓ | ✓ | +|mixed | ✓ | ✓ | ✓ | + +> **Note**: attributes are never mandatory; they could be zero or more. + +### `datatype` + +Indicates element or attribute data types.[^2] + +Only mandatory for 'simple' and 'no_elements' elements, and attributes. + +Allowed String Data Types: + +- `ENTITIES`.   +- `ENTITY`.   +- `ID`. A string that represents the ID attribute in XML (only used with + schema attributes). +- `IDREF`. A string that represents the IDREF attribute in XML (only used with + schema attributes). +- `IDREFS`. +- `language`. A string that contains a valid language id. +- `Name`. A string that contains a valid XML name. +- `NCName`. +- `NMTOKEN`. A string that represents the NMTOKEN attribute in XML (only used + with schema attributes). +- `NMTOKENS`. +- `normalizedString`. A string that does not contain line feeds, carriage + returns, or tabs. +- `QName`. +- `string`. A string. +- `token`. A string that does not contain line feeds, carriage returns, tabs, + leading or trailing spaces, or multiple spaces. + +Allowed Date and Time Data Types: + +- `date`. Defines a date value. +- `dateTime`. Defines a date and time value. +- `duration`. Defines a time interval. +- `gDay`. Defines a part of a date - the day (DD). +- `gMonth`. Defines a part of a date - the month (MM). +- `gMonthDay`. Defines a part of a date - the month and day (MM-DD). +- `gYear`. Defines a part of a date - the year (YYYY). +- `gYearMonth`. Defines a part of a date - the year and month (YYYY-MM). +- `time`. Defines a time value. + +Allowed Numeric Data Types: + +- `byte`. A signed 8-bit integer. +- `decimal`. A decimal value. +- `int`. A signed 32-bit integer. +- `integer`. An integer value. +- `long`. A signed 64-bit integer. +- `negativeInteger`. An integer containing only negative values (..,-2,-1). +- `nonNegativeInteger`. An integer containing only non-negative values + (0,1,2,..). +- `nonPositiveInteger`. An integer containing only non-positive values + (..,-2,-1,0). +- `positiveInteger`. An integer containing only positive values (1,2,..). +- `short`. A signed 16-bit integer. +- `unsignedLong`. An unsigned 64-bit integer. +- `unsignedInt`. An unsigned 32-bit integer. +- `unsignedShort`. An unsigned 16-bit integer. +- `unsignedByte`. An unsigned 8-bit integer. + +Allowed Miscellaneous Data Types: + +- `anyURI`. +- `base64Binary`. +- `boolean`. +- `double`. +- `float`. +- `hexBinary`. +- `NOTATION`. +- `QName`. + +### `default` + +Indicates default value when element or attribute is empty. + +Optional. + +### `fixed` + +Indicates fixed value to element or attribute. + +Optional. + +### `use` + +Indicates that the attribute is required. + +Optional. + +Only `required` is valid as value. + +### `restriction`[^3] + +Indicates accepted constrained values for element or attribute. + +Optional; if present, must contain at least one constrain. + +Not allowed for 'empty' and 'no_text' elements. + +Allowed constrains: + +- `enumeration`. Specifies a list of acceptable values. +- `fractionDigits`. Specifies the maximum number of decimal places allowed; + must be equal to or greater than zero. +- `length`. Specifies the exact number of characters or list items allowed; + must be equal to or greater than zero. +- `maxExclusive`. Specifies the upper bounds for numeric values (the value + must be less than this value). +- `maxInclusive`. Specifies the upper bounds for numeric values (the value + must be less than or equal to this value). +- `maxLength`. Specifies the maximum number of characters or list items + allowed; must be equal to or greater than zero. +- `minExclusive`. Specifies the lower bounds for numeric values (the value + must be greater than this value). +- `minInclusive`. Specifies the lower bounds for numeric values (the value + must be greater than or equal to this value). +- `minLength`. Specifies the minimum number of characters or list items + allowed; must be equal to or greater than zero. +- `pattern`. Defines the exact sequence of characters that are acceptable. +- `totalDigits`. Specifies the exact number of digits allowed; must be greater + than zero. +- `whiteSpace`. Specifies how white space (line feeds, tabs, spaces, and + carriage returns) is handled; accepted values are + `preserve|replace|collapse`. + +### `attribute` + +Indicates a list of attributes for an element. + +### `children_order` + +Indicates order indicators for children elements. + +Mandatory in `no_text` or `mixed` elements, and group. + +Allowed values: + +- `all`. Children elements can occur in any order. +- `choice`. Only one children element can accur. +- `sequence`. Children elements must occur in specified order. + +### `children` + +Indicates a list of children elements. + +Mandatory in `no_text` or `mixed` elements, and group. + +### `maxOccurs` + +Indicates max number of times a children element can accur. + +Optional; if not present by default is `1`. + +Valid values are non negative integer or `unbounded` for unlimited number of +times. + +### `minOccurs` + +Indicates min number of times a children element can accur. + +Optional; if not present by default is `1`. + +Valid value is non negative integer. + +### `group` + +Indicates group name. + +Optional. + +### `groups` + +Indicates element or attribute groups for schema. + +Optional. + +### `attribute_group` + +Indicates if group is an attribute group. + +Optional; if not present by default is `false`. + +Allowed values: + +- `true`. +- `false`. + +### `elements` + +Indicates elements for schema. + +Mandatory. + +### `attributes` + +Indicates attributes for schema. + +Optional. + +### `schema` + +Indicates schema general information. + +Mandatory. + +## Command-Line Interface + +``` +yasd [-q | -l] CMD + +CMD (mandatory and only one at once): + +convert [-f xsd | json] FILE +check FILE +sample [-n INT] FILE +man +help + +ARGS: + +-q | --quiet No prints; doesn't apply for man and help. +-l | --logs Writes logs. +-f | --format Format type; could be 'xsd' or 'json'; 'xsd' by default. +-n | --num Number of samples; 1 by default. +convert Converts YASD file to XSD or JSON format; XSD by default. +check Validates YASD file. +sample Writes XML samples according to schema. +man Prints README.md +help Prints this. +FILE YAML file; file extension doesn't matter. + +EXAMPLES: + +yasd convert schema.yasd +yasd --quiet --logs convert schema.yasd +yasd -ql -f json convert schema.yasd +yasd check schema.yaml +yasd sample schema.yasd +yasd sample -n 10 schema.yaml +yasd man +yasd help +``` + +[^1]: The following explanation is practically a copy-paste from "XSD - The + Element", W3Schools. Cfr. + . + +[^2]: The following explanation is practically a copy-paste from "XSD Data + Types", W3Schools. Cfr. . + +[^3]: The following explanation is practically a copy-paste from "XSD + Restrictions/Facets", W3Schools. Cfr. + . diff --git a/yasd.py b/yasd.py new file mode 100644 index 0000000..8c4755e --- /dev/null +++ b/yasd.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python +# (c) 2023 Perro Tuerto . +# Founded by Mexican Academy of Language . +# Licensed under GPLv3 . + +import sys +import yaml +import argparse +from pathlib import Path +from bs4 import BeautifulSoup +from bs4.formatter import XMLFormatter + + +class YASD: + """ + Performs YASD actions. + """ + + def do(quiet, log, samples, action, filepath): + yasd = YASD(quiet, log, samples, filepath) + if action == "convert": + yasd.convert() + elif action == "sample": + yasd.sample() + elif action == "document": + yasd.document() + + def __init__(self, quiet=False, log=False, samples=1, filepath=None): + """ + Inits YASD object. + + :param quiet: If messages are print or not; 'False' by default + :type quiet: False or True + :param log: If messages are write in a file or not; 'False' by default + :type log: False or True + :param int samples: Quantity of XML samples; '1' by default + :param filepath: YASD file path; 'None' by default + :type filepath: None or Path + """ + self.msgr = YASDMessenger(quiet=quiet, log=log) + valid_input = YASDCheck(self.msgr, filepath) + self.filepath = valid_input.filepath + self.yaml = valid_input.yaml + self.samples = samples + self.soups = { + "schema": "", + "elements": "", + "attributes": "", + "groups": "", + } + + def convert(self): + """ + Converts YASD to XSD. + """ + self.__build_schema() + self.__build_elements() + self.__build_attributes() + self.__write() + + def sample(self): + """ + Generates XML samples from YASD. + """ + print(f"TODO: {self.samples} samples") + + def document(self): + """ + Generates MD documentation + """ + print("TODO: MD document from :", self.__dict__) + + def __build_schema(self): + unwanted = "version schemaLocation".split() + for key in unwanted: + self.yaml["schema"].pop(key) + soup = BeautifulSoup(parser="xml") + schema = soup.new_tag("schema", nsprefix="xs") + schema["xmlns:xs"] = "http://www.w3.org/2001/XMLSchema" + soup.append(schema) + for key, val in self.yaml["schema"].items(): + schema[key] = val + self.soups["schema"] = soup + + def __build_elements(self): + soup = BeautifulSoup(parser="xml") + for el in self.yaml["elements"]: + el = self.__sanitize(el) + { + "simple": self.__build_element_simple(soup, el), + "empty": self.__build_element_empty(soup, el), + "no_text": self.__build_element_no_text(soup, el), + "no_elements": self.__build_element_no_elements(soup, el), + "mixed": self.__build_element_mixed(soup, el), + }[el["type"]] + if len(soup.contents) > 0: + print(len(soup.contents)) + self.soups["elements"] = soup + + def __build_element_simple(self, main_soup, el): + # element = self.__build_simple(el) + # main_soup.append(element) + ... + + def __build_element_empty(self, main_soup, el): + ... + + def __build_element_no_text(self, main_soup, el): + ... + + def __build_element_no_elements(self, main_soup, el): + ... + + def __build_element_mixed(self, main_soup, el): + ... + + def __build_attributes(self): + soup = BeautifulSoup(parser="xml") + for el in self.yaml["attributes"]: + element = self.__build_simple(self.__sanitize(el), tag="attribute") + soup.append(element) + self.soups["attributes"] = soup + + def __build_simple(self, el, tag="element"): + soup = BeautifulSoup(parser="xml") + element = soup.new_tag(tag, nsprefix="xs") + soup.append(element) + for key, val in el.items(): + if key == "datatype": + element["type"] = f"xs:{val}" + elif key == "restriction": + self.__build_restriction(element, val) + else: + element[key] = val + return soup + + def __build_restriction(self, root, elements): + soup = BeautifulSoup(parser="xml") + simple_type = soup.new_tag("simpleType", nsprefix="xs") + restriction = soup.new_tag("restriction", nsprefix="xs") + restriction["base"] = self.__get_base(list(elements.keys())[0]) + for key, val in elements.items(): + constrain = soup.new_tag(key, nsprefix="xs", value=val) + restriction.append(constrain) + simple_type.append(restriction) + root.append(simple_type) + + def __sanitize(self, el): + """ + Prepares element or attribute for conversion. + + It eliminates 'description' key. + + :param dict el: Element or attribute as a dictionary + :return: Sanitized element or attribute + :rtype: dict + """ + if "description" in el.keys(): + del el["description"] + return el + + def __get_base(self, key): + """ + Gets restriction data type. + + :param str key: Type of restriction + :return: 'xs:string' or 'xs:integer' + :rtype: str + """ + strings = "enumeration pattern whiteSpace length minLength maxLength" + if key in strings.split(): + return "xs:string" + else: + return "xs:integer" + + def __write(self): + """ + Writes XSD into a file. + """ + filename = Path(self.filepath.parent / f"{self.filepath.stem}.xsd") + formatter = XMLFormatter(indent=2) + for key, val in self.soups.items(): + if key == "schema": + xsd = val.schema + else: + xsd.append(val) + filename.write_text(xsd.prettify(formatter=formatter)) + + +class YASDCheck: + """ + Verifies YASD file. + """ + + def __init__(self, messenger, filepath): + """ + Inits YASD validator. + + :param YASDMessenger messenger: Object for print or save messages + :param filepath: YASD file path + :type filepath: None or Path + """ + self.msgr = messenger + self.__check_file(filepath) + self.__parse_file() + # TODO: do extra checks + + def __check_file(self, filepath): + """ + Verifies YASD file. + + :param filepath: YASD file path + :type filepath: None or Path + """ + if filepath is None: + self.msgr.run("no_input", level="error") + elif not filepath.exists() or not filepath.is_file(): + self.msgr.run("invalid_input", level="error", file=filepath) + self.filepath = filepath.resolve() + + def __parse_file(self): + """ + Attempts YASD file parsing. + """ + raw = self.filepath.read_text(encoding="utf8") + try: + self.yaml = yaml.safe_load(raw) + except yaml.YAMLError: + # TODO: should be log class + self.msgr.run("invalid_yaml", level="error") + + +class YASDMessenger: + """ + Prints or saves YASD messages. + """ + + def keys(): + """ + Messages keys dictionary. + """ + return { + "invalid_level": "Invalid log level '@lvl'", + "no_input": "Input file needed.", + "invalid_input": "Invalid file '@file'", + "invalid_yaml": "Invalid YAML structure", + } + + def __init__(self, quiet=False, log=False): + """ + Inits YASD Messenger. + """ + self.quiet = quiet + self.log = log + + def run(self, key="", level="info", **kwargs): + """ + Prints or writes messages. + + '**kwargs' are the keys for message text replacements. + + :param str key: Message key + :param str level: Log level; 'info' by default + """ + self.__check_level(level) + msg = self.__get_msg(key, **kwargs) + msg = f"[{level.upper()}] {msg}" + # TODO: print or save depending on self.quiet and self.log + print(msg) + if level in ["error", "fatal"]: + sys.exit(1) + + def __check_level(self, level): + """ + Verifies log level. + + Prints warning if log level doesn't exist. + + :param str level: Log level + """ + if not level in ["trace", "debug", "info", "warn", "error", "fatal"]: + YASDMessenger().run("invalid_level", level="warn", lvl=level) + + def __get_msg(self, key, **kwargs): + """ + Gets message based on key. + + '**kwargs' are the keys for message text replacements. + + :param str key: Message key + :return: Message or key if message key doesn't exist. + :rtype: str + """ + if key in YASDMessenger.keys().keys(): + msg = YASDMessenger.keys()[key] + for key, value in kwargs.items(): + msg = msg.replace(f"@{key}", str(value)) + return msg + else: + return key + + +def main(): + """ + Gets and parses argv, then calls YASD. + """ + parser = argparse.ArgumentParser( + prog="yasd", + description=""" + YASD, Yet Another Schema Definition. + YASD is a YAML format for human writable XSDs (XML Schema Definition), + humans declare what is indispensable, leaving the machines to do the + rest of the unreadable . + """, + epilog=""" + (c) 2023 Perro Tuerto . + Founded by Mexican Academy of Language . + Licensed under GPLv3 . + """, + ) + parser.add_argument( + "action", + choices=["convert", "check", "sample", "document", "man"], + help="action to perform", + ) + parser.add_argument( + "file", + type=Path, + nargs="?", + default=None, + help="file in YAML format", + ) + parser.add_argument( + "-q", "--quiet", action="store_true", help="enable quiet mode" + ) + parser.add_argument("-l", "--log", action="store_true", help="write log") + parser.add_argument( + "-n", "--num", default=1, help="number of XML samples; 1 by default" + ) + args = parser.parse_args() + if args.action == "man": + print("MAN") + else: + YASD.do(args.quiet, args.log, args.num, args.action, args.file) + + +if __name__ == "__main__": + main()