From ce365d728ef7c032fd12884d7a20b42944298f8d Mon Sep 17 00:00:00 2001 From: Benjamin Mako Hill Date: Sat, 3 Apr 2010 12:50:14 -0400 Subject: [PATCH] added a simple validation script --- scripts/csl.rnc | 919 +++++++++++++++++++++++++++++++++++++++++++++++ scripts/validate | 3 + 2 files changed, 922 insertions(+) create mode 100644 scripts/csl.rnc create mode 100755 scripts/validate diff --git a/scripts/csl.rnc b/scripts/csl.rnc new file mode 100644 index 0000000..cd49ddd --- /dev/null +++ b/scripts/csl.rnc @@ -0,0 +1,919 @@ +namespace atom = "http://www.w3.org/2005/Atom" +namespace cs = "http://purl.org/net/xbiblio/csl" +namespace dc = "http://purl.org/dc/elements/1.1/" +namespace s = "http://www.ascc.net/xml/schematron" +namespace xhtml = "http://www.w3.org/1999/xhtml" + +dc:title [ "Citation Style Language" ] +dc:creator [ "Bruce D'Arcus" ] +dc:creator [ "Simon Kornblith" ] +dc:copyright [ "Bruce D'Arcus and Simon Kornblith, 2007" ] +dc:rights [ "Permission to freely use, copy and distribute." ] +dc:description [ + "Citation Style Language (CSL) schema for describing bibliographic and citation formatting." +] +start = style + +## The CSL data model consists of resources and variables. Primary resources are the +## citation object itself, as well as related agents and events. Variables are +## attributes of the resource: titles, dates, volume numbers, and so forth. From the +## perspective of CSL, resources templates involve a list of variables. By contrast, +## a variable can be accessed directly. +## In addition, CSL also contains structures for macros and parameter options. +div { + style = + element cs:style { + attribute xml:lang { xsd:language }?, + + ## Refers to the default locale for the style; should generally + ## be set for any academic journal, since it can be used to ensure + ## proper quote formatting and such. + attribute default-locale { xsd:language }?, + attribute class { "in-text" | "note" }, + info, + (terms? & macro* & citation & bibliography?)? + } +} + +## info element +div { + + ## The Content model for the metadata is borrowed from Atom. + info = + element cs:info { + info-author* + & info-category* + & info-contributor* + & info-id + & info-issn? + & info-link* + & info-published? + & info-rights? + & info-source? + & info-summary? + & info-title + & info-updated + } + info-text = + attribute xml:lang { xsd:language }?, + text + info-author = element cs:author { info-Contributor-pattern } + info-Contributor-pattern = + element cs:name { text } + & element cs:email { text }? + & element cs:uri { xsd:anyURI }? + div { + + ## The schema attribute should be understood to specify the controlled + ## CSL category list. The terms thus typically include further logic, + ## such as the broader categories to which they belong ("humanities", + ## "social sciences") as well as the class of style ("author-date", "note", etc.). + info-category = + element cs:category { + attribute term { info-fields | info-classes }, + attribute scheme { xsd:anyURI }?, + attribute label { text }? + } + + ## Add one or more field categories. The "generic-base" category is reserved + ## for the handful of truly generic styles (APA, Harvard, etc.) from which most descend. + info-fields = + "anthropology" + | "astronomy" + | "biology" + | "botany" + | "chemistry" + | "communications" + | "engineering" + | "generic-base" + | "geography" + | "geology" + | "history" + | "humanities" + | "law" + | "linguistics" + | "literature" + | "math" + | "medicine" + | "philosophy" + | "physics" + | "psychology" + | "sociology" + | "science" + | "political_science" + | "social_science" + | "theology" + | "zoology" + | info-categories.extension + info-classes = "author-date" | "numeric" | "label" | "note" | "in-text" + + ## You can override in a customization schema, though please report + ## obvious gaps for inclusion in the schema. + info-categories.extension = notAllowed + } + info-contributor = element cs:contributor { info-Contributor-pattern } + info-translator = element cs:translator { info-Contributor-pattern } + + ## As in Atom, the cs:id element is a required URI that is globally-unique + ## and persistant. Once assigned it should never change. Ideally, of course, + ## the URI resolves to the correct CSL file. + info-id = element cs:id { string } + info-link = + element cs:link { + attribute href { xsd:anyURI }, + + ## For transformation to Atom, the rel values should be expanded to include the + ## base URI . + attribute rel { + + ## The CSL file URI whose configuration content should be + ## used for processing. To be used where, for example, a + ## journal may use a generic publisher style that is shared + ## with other journals. + "source" + | + ## The CSL file URI on which the style is based. Since most + ## styles are based on a handful of core styles, this can be + ## useful metadata to see those relationships. + "template" + | + ## The homepage URI for the style. + "homepage" + | + ## A documentation URI for a style. + "documentation" + }?, + # type borrows from Atom schema pattern + + ## media-type + attribute type { + xsd:string { pattern = ".+/.+" } + }?, + attribute hreflang { xsd:language }?, + attribute title { text }?, + attribute length { text }?, + string + } + info-published = element cs:published { xsd:dateTime } + info-rights = element cs:rights { info-text } + info-source = element cs:source { info-title & info-link? & info-id } + info-summary = element cs:summary { info-text } + info-title = element cs:title { info-text } + ## the issn for the journal the style corresponds to + # might use a regular expression pattern to validate content? + info-issn = element cs:issn { text } + info-updated = element cs:updated { xsd:dateTime } +} + +## Terms and forms +div { + cs-terms = + + ## Miscellaneous Terms + "accessed" + | "anonymous" + | "and" + | "and others" + | "at" + | "et-al" + | "forthcoming" + | "from" + | "in press" + | "ibid" + | "in" + | "no date" + | "references" + | "retrieved" + | "letter" + | "interview" + | "online" + | "cited" + | "edition" + | "internet" + | "presented at" + | + ## Roles + "editor" + | "translator" + | "interviewer" + | "recipient" + | + ## Months + "month-01" + | "month-02" + | "month-03" + | "month-04" + | "month-05" + | "month-06" + | "month-07" + | "month-08" + | "month-09" + | "month-10" + | "month-11" + | "month-12" + | + ## Other + cs-terms.locator + | cs-terms.extension + | info-fields + + ## Locators + cs-terms.locator = + "book" + | "chapter" + | "column" + | "figure" + | "folio" + | "issue" + | "line" + | "note" + | "opus" + | "page" + | + ## a synonym for "page" (to be deprecated) + "page-range" + | "page-first" + | "paragraph" + | "part" + | "section" + | "sub verbo" + | "volume" + | "verse" + + ## "symbol" and reverts to "short" if no term exists + ## "verb-short" reverts to "verb" if no term exists + ## all others revert to "long" if no term exists + cs-term-forms = "long" | "verb" | "short" | "verb-short" | "symbol" | cs-term-forms.extension + + ## include-period adds a period after a term if and only if the + ## term used (not necessarily term specified; see above) is + ## of form "short" or "verb-short" + include-period = attribute include-period { xsd:boolean }? + + ## Extension structures. You may override these in a customization + ## schema. If you do, please contact the xbiblio project to add + ## the term or form to the formal controlled list in the schema. + div { + cs-terms.extension = notAllowed + cs-term-forms.extension = notAllowed + } +} + +## Locales +div { + + ## Terms are localized strings used as content for the cs:text field. + ## It is only needed where you need to add a term not supported by + ## default, or override an existing one. + terms = element cs:terms { locale+ } + + ## A language-specific wrapper. + locale = + element cs:locale { + + ## Where localization is other than that of the style, use the + ## xml:lang attribute. + attribute xml:lang { xsd:language }?, + (simple-term | compound-term)+ + } + term-attributes = + attribute form { cs-term-forms }?, + attribute name { cs-terms }, + include-period + + ## Simple terms are basic strings, used to represent genres, media, etc. + simple-term = element cs:term { term-attributes, text } + + ## Compound terms are those whose output can be either singular or plural. + ## Typically used for things like page number or editor labels. + compound-term = element cs:term { term-attributes, single-term, multiple-term } + multiple-term = element cs:multiple { text } + single-term = element cs:single { text } +} + +## Names is a data type holding a text of authors, editors, or translators. +div { + names-attributes = + formatting, + delimiter, + attribute variable { + list { cs-names+ } + } + names = element cs:names { names-attributes, (name & name-label*), substitute? } + + ## Short version of "names" element, without children, allowed in + names-short = element cs:names { names-attributes } + name = + element cs:name { + formatting, + + ## Indicates long (first name + last name, for Western names) or short + ## (last name only, for Western names) form of name. Default is long + ## form + attribute form { "long" | "short" }?, + + ## Controls appearance of "and"/"&". To disable, do not specify. + attribute and { "text" | "symbol" }?, + + ## Delimiter between names (delimiter between variables is on + ## tag, where it should be). This is ", " in "J. Doe, S. Smith." + delimiter, + + ## delimiter for the last name is a list + attribute delimiter-precedes-last { + + ## The "always" value means that result is "J. Doe, and T. Timmons" + "always" + | + ## Default behavior; would result in "J. Doe and T. Timmons," but "J. Doe, + ## S. Smith, and T. Timmons" (note comma preceding 'and'). + "never" + }?, + + ## Sets the first-author name order to correspond to the sort order of + ## the bibliography; e.g. Doe, John (name-as-sort-order) vs. John Doe (w/o + ## attribute). + attribute name-as-sort-order { "first" | "all" }?, + + ## The delimiter for personal name parts where sort order differs from + ## display order (for example, in standard Western names). This is the + ## ", " in "Doe, John." + attribute sort-separator { text }?, + + ## Indicates whether given name parts ought to be given as initials + ## (e.g., J. K. Rowling) and the text to follow each initial. + attribute initialize-with { text }? + } + + ## Similar to label as below, but inherits variable from tag + name-label = element cs:label { label-primitives } + + ## Substitutions, if the name does not exist + substitute = element cs:substitute { (names-short | cs-element)+ } + cs-names = + "author" + | "editor" + | "translator" + | "recipient" + | "interviewer" + | "publisher" + | "composer" + | "original-publisher" + | "original-author" + | + ## to be used when citing a section of a book, for example, to distinguish the author + ## proper from the author of the containing work + "container-author" + | + ## use for series editor + "collection-editor" +} + +## date +div { + date = + element cs:date { + attribute variable { + list { cs-date-tokens+ } + }, + formatting, + delimiter, + date-part+ + } + date-part = element cs:date-part { formatting, (month | day | year-other) } + + ## Month formats: + ## long (default): January + ## short: Jan + ## numeric: 1 + ## numeric-leading-zeros: 01 + month = + attribute name { "month" }, + (attribute form { "long" | "short" | "numeric" | "numeric-leading-zeros" }?, + include-period) + + ## Day formats: + ## numeric (default): 5 + ## numeric-leading-zeros: 05 + ## ordinal: 5th + day = + attribute name { "day" }, + attribute form { "numeric" | "numeric-leading-zeros" | "ordinal" }? + + ## Year formats: + ## long (default): 2005 + ## short: 05 + ## Other represents any non-month/day/year date part + year-other = + attribute name { "year" | "other" }, + attribute form { "short" | "long" }? + cs-date-tokens = "issued" | "event" | "accessed" | "container" | "original-date" | "submitted" +} + +## The cs:text element is the main formatting element used to layout down content. +div { + cs-text = + element cs:text { + formatting, + (((( + ## adds a variable belonging to this item + attribute variable { + list { variables+ } + } + & delimiter) + | + ## adds the results of a macro, as specified in the main body of the CSL + attribute macro { token } + | + ## A descriptor that locates sub-item content within a cited resource. Used + ## in some styles to indicate specific page numbers for excerpted + ## content, for example. + attribute point-locator { cs-terms.locator }), + attribute form { "short" | "long" }?) + | ( + ## adds a localized term + attribute term { cs-terms }, + attribute form { cs-term-forms }?, + include-period, + plural?) + | + ## adds the text in the value field. use sparingly. + ## when dealing with localizable text, should be + ## used, even if this means defining a new term. + attribute value { text }) + } + + ## The number markup directive matches the first number found in a field, + ## and returns only that component. If no number is detected, the result + ## is empty. A non-empty number may be subject to further formatting consisting + ## of a form attribute whose value may be numeric, ordinal or roman to format + ## it as a simple number (the default), an ordinal number (1st, 2nd, 3rd etc) + ## or roman (i, ii, iii, iv etc). The text-case can also apply to capitalize + ## the roman numbers for instance. The other normal formatting rules apply + ## too (font-style, ...). When used in a conditional, number tests if + ## there is a number present, allowing conditional formatting. + cs-number = + element cs:number { + formatting, + attribute variable { "edition" | "volume" | "issue" | "number" | "number-of-volumes" }, + attribute form { "numeric" | "ordinal" | "roman" }? + } + variables = + + ## the primary title for the cited item + "title" + | + ## the secondary title for the cited item; for a book chapter, this + ## would be a book title, for an article the journal title, etc. + "container-title" + | + ## the tertiary title for the cited item; for example, a series title + "collection-title" + | + ## collection number; for example, series number + "collection-number" + | + ## title of a related original version; often useful in cases of translation + "original-title" + | + ## the name of the publisher + "publisher" + | + ## the location of the publisher + "publisher-place" + | + ## the name of the archive + "archive" + | + ## the location of the archive + "archive-place" + | + ## the location within an archival collection (for example, box and folder) + "archive_location" + | + ## issuing authority (for patents) or judicial authority (such as court + ## for legal cases) + "authority" + | + ## the name or title of a related event such as a conference or hearing + "event" + | + ## the location or place for the related event + "event-place" + | + ## + "page" + | + ## a description to locate an item within some larger container or + ## collection; a volume or issue number is a kind of locator, for example. + "locator" + | + ## version description + "version" + | + ## volume number for the container periodical + "volume" + | + ## refers to the number of items in multi-volume books and such + "number-of-volumes" + | + ## refers to the number of pages in a book or other document + "number-of-pages" + | + ## the issue number for the container publication + "issue" + | + ## + "chapter-number" + | + ## medium description (DVD, CD, etc.) + "medium" + | + ## the (typically publication) status of an item; for example "forthcoming" + "status" + | + ## an edition description + "edition" + | + ## a section description (for newspapers, etc.) + "section" + | + ## + "genre" + | + ## a short inline note, often used to refer to additional details of the resource + "note" + | + ## notes made by a reader about the content of the resource + "annote" + | + ## + "abstract" + | + ## + "keyword" + | + ## a document number; useful for reports and such + "number" + | + ## for related referenced resources; this is here for legal case + ## histories, but may be relevant for other contexts. + "references" + | + ## + "URL" + | + ## + "DOI" + | + ## + "ISBN" + | + ## + "call-number" + | + ## the number used for the in-text citation mark in numeric styles + "citation-number" + | + ## the label used for the in-text citation mark in label styles + "citation-label" + | + ## The number of a preceding note containing the first reference to + ## this item. Relevant only for note-based styles, and null for first references. + "first-reference-note-number" + | + ## The year suffix for author-date styles; e.g. the 'a' in '1999a'. + "year-suffix" +} +## The cs:label element. +div { + + ## The label element is used to print text terms that depend on document content + ## for pluralization. For labeling pages, this is preferable, as pages may be + ## either singular or plural (p. or pp.) + label = + element cs:label { + label-primitives, + attribute variable { "page" | "locator" } + } + label-primitives = + formatting, + include-period?, + attribute form { cs-term-forms }, + + ## On cs:label, use to modify default pluralization behavior. + plural? + + ## modifies plualization behavior [ a:defaultValue = "contextual" ] + plural = attribute plural { xsd:boolean } +} + +## The cs:macro element +div { + + ## The macro element works something like in BibTeX, and provides a ready-made + ## bundle of formatting templates that can be easily reusesd elsewhere. + macro = + element cs:macro { + attribute name { token }, + cs-element+ + } +} + +## The cs:group element +div { + + ## Group is used to provide delimiters and a common prefix/suffix. It is syntactic + ## sugar for a conditional that tests for the presence of any non-null child variable + ## or macro call results. So if there are no such results, then any + ## content will not be printed. + group = + element cs:group { + formatting, + delimiter, + ## the class attribute can be used to pass on styling hooks for CSS and such + attribute class { token }?, + cs-element+ + } +} +div { + layout = element cs:layout { formatting, delimiter, cs-element } + cs-element = (names | date | label | cs-text | cs-number | choose | group)+ +} + +## The cs:citation element +div { + + ## The cs:citation handles printing of citations. A citation may consist of + ## one-or-more references to bibliographic sources. These references can either + ## be simple in-text keys [doe99] or numeric markers [1], or more complex short + ## descriptors generated at runtime common in author-date (Doe, 1999a) or note-based + ## styles. + ## note: one issue unique to note-based styles is that a citation reference + ## effectively may become a full sentence. Implementers should consider this in + ## their design and insert the final formatted citation in the correct title form. + ## For example, if a citation is footnoted without any additional text, the first + ## character of the output should be uppercased. By contrast, if the citation is + ## within a pre-existing footnote, and preceded by non-citation text, then it should + ## be printed as is. + citation = element cs:citation { (citation-options* & sort?), layout } +} + +## The cs:bibliography element +div { + bibliography = element cs:bibliography { (bibliography-options* & sort?), layout } +} + +## The cs:option element +div { + citation-options = et-al | et-al-subsequent | disambiguate | collapse + bibliography-options = + et-al | hanging-indent | second-field-align | subsequent-author-substitute | line-formatting | group-by + + ## Pattern for options that are either true or false + option-tf = attribute value { xsd:boolean } + + ## Sorting and grouping by author assumes by default that the string includes the entire list + ## of authors. This option allows you to instead use first author only. + group-by = + element cs:option { + attribute name { "group-by" }, + attribute value { "first-author" } + } + + ## sets the bibliographic entries to be rendered with hanging-indents + hanging-indent = + element cs:option { + attribute name { "hanging-indent" }, + option-tf + } + + ## "citation-number" collapses numeric citations from [1, 2, 3] to [1-3] + ## "year" collapses authors from (Doe 2000, Doe 2001) to (Doe 2000, + ## 2001) + ## "year-suffix" collapses as "year", but also collapses (Doe 2000a, + ## Doe 2000b) to (Doe 2000a, b) (ignored if + ## disambiguate-add-year-suffix is missing) + collapse = + element cs:option { + attribute name { "collapse" }, + attribute value { "citation-number" | "year" | "year-suffix" } + } + + ## second-field-align is useful for formatting a numbered list. It aligns + ## any subsequent lines of a bibliography entry with the beginning of the + ## second field. For example, if the first field is + ## + ## + ## 1. Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do + ## eiusmod + ## + ## If set to "margin," then the first field is put in the margin and all + ## subsequent lines of text are aligned with the margin. This is useful for + ## IEEE style. + second-field-align = + element cs:option { + attribute name { "second-field-align" }, + attribute value { "true" | "margin" } + } + + ## substitutes subsequent recurrences of an author for a given string + subsequent-author-substitute = + element cs:option { + attribute name { "subsequent-author-substitute" }, + attribute value { text } + } + + ## defines parameters relating to "et al" formatting: + ## et-al-min: the minimum number of contributors to use "et al" + ## et-al-use-first: the number of contributors to explicitly print under + ## "et al" conditions + ## et-al-subsequent-*: same as above, but for subsequent references + ## (citation only) + et-al = + element cs:option { + attribute name { "et-al-min" | "et-al-use-first" }, + attribute value { xsd:integer } + } + et-al-subsequent = + element cs:option { + attribute name { "et-al-subsequent-min" | "et-al-subsequent-use-first" }, + attribute value { xsd:integer } + } + + ## defines parameters relating to disambiguation, followed in the order given + ## below until a citation is disambiguated + ## disambiguate-add-names: add additional names, disregarding + ## the "et-al" setting, to disambiguate the citations + ## disambiguate-add-givenname: add a given name to a citation + ## to disambiguate it (e.g., John Doe, 2005 vs. Doe, 2005) + ## disambiguate-add-year-suffix: add a suffix to the year (e.g., + ## 2007a) when there are two works by the same author published in + ## the same year included in one bibliography + disambiguate = + element cs:option { + attribute name { + "disambiguate-add-givenname" | "disambiguate-add-names" | "disambiguate-add-year-suffix" + }, + option-tf + } + # defines spacing between entries and between lines + # line-spacing: in units of lines; default 1 + # entry-spacing: in units of line-spacing; default 1 + line-formatting = + element cs:option { + attribute name { "line-spacing" | "entry-spacing" }, + attribute value { xsd:integer } + } +} + +## The cs:sort element +div { + all-variables = variables | cs-date-tokens | cs-names + + ## The sort element specifies the sort order for a citation or for the + ## bibliography. If it is not added, citations are sorted according to user + ## preferences, and bibliography is sorted according to order cited. + sort = element cs:sort { key+ } + key = + element cs:key { + (attribute variable { all-variables } + | attribute macro { token }), + + ## Default sort order is ascending + attribute sort { "ascending" | "descending" }? + } +} + +## The cs:choose element +div { + + ## The choose elements provides a simple conditional structure. + choose = element cs:choose { if, else-if*, else? } + if = element cs:if { condition, cs-element* } + else-if = element cs:else-if { condition, cs-element* } + else = element cs:else { cs-element+ } + condition = + + ## If the entry is of a given type, this is true + attribute type { + list { cs-types+ } + }?, + + ## If a given variable exists, this is true + attribute variable { + list { all-variables+ } + }?, + + ## If a given variable contains numeric data, this is true + attribute is-numeric { + list { all-variables+ } + }?, + + ## If a given variable contains a date, this s true + attribute is-date { + list { all-variables+ } + }?, + + ## The position of a citation. Whenever position="ibid-with-locator" + ## is true, position="ibid" is also true, and whenever position="ibid" + ## is true, position="subsequent" is also true + attribute position { + list { ("first" | "subsequent" | "ibid" | "ibid-with-locator")+ } + }?, + + ## If text inside an block can be used to + ## differentiate two otherwise identical citations, it will be added. + ## If the citations remain identical after its addition, it will not + ## be added. + attribute disambiguate { xsd:boolean }?, + + ## A conditional on the locator for this specific entry + attribute locator { + list { cs-terms.locator+ } + }?, + attribute match { "all" | "any" | "none" }? +} + +## variables and types; needs some more thought +div { + cs-types = + "article" + | "article-magazine" + | "article-newspaper" + | "article-journal" + | "bill" + | "book" + | "broadcast" + | "chapter" + | "entry" + | "entry-dictionary" + | "entry-encyclopedia" + | "figure" + | "graphic" + | "interview" + | "legislation" + | "legal_case" + | "manuscript" + | "map" + | "motion_picture" + | "musical_score" + | "pamphlet" + | "paper-conference" + | "patent" + | "post" + | "post-weblog" + | "personal_communication" + | "report" + | "review" + | "review-book" + | "song" + | "speech" + | "thesis" + | "treaty" + | "webpage" +} + +## Formatting attributes. +div { + delimiter = attribute delimiter { text }? + + ## attributes are drawn directly from CSS and FO where possible + formatting = + attribute prefix { text }?, + attribute suffix { text }?, + attribute font-family { text }?, + attribute font-style { "italic" | "normal" | "oblique" }?, + attribute font-variant { "normal" | "small-caps" }?, + attribute font-weight { "normal" | "bold" | "light" }?, + attribute text-decoration { "none" | "underline" }?, + attribute vertical-align { "baseline" | "sup" | "sub" }?, + attribute text-case { + + ## display all text as lowercase + "lowercase" + | + ## display all text as uppercase + "uppercase" + | + ## capitalize first character; other characters + ## displayed as is + "capitalize-first" + | + ## capitalize first character of every word; + ## other characters displayed lowercase + "capitalize-all" + | + ## display as title case (the Chicago Manual + ## of Style calls this "headline style") + "title" + | + ## display as sentence case/sentence style + "sentence" + }?, + + ## For examples such as abstracts and notes in annotated bibliographies + ## use the "block" display value. Otherwise, content is displayed inline. + attribute display { "block" | "inline-block" }?, + attribute quotes { xsd:boolean }? +} diff --git a/scripts/validate b/scripts/validate new file mode 100755 index 0000000..72bc04f --- /dev/null +++ b/scripts/validate @@ -0,0 +1,3 @@ +#!/bin/sh + +rnv scripts/csl.rnc acawiki.csl -- 2.30.2