13,398
edits
m (1 revision imported) |
(Update) |
||
Line 1: | Line 1: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
Line 87: | Line 86: | ||
if options.encode == true or options.encode == nil then | if options.encode == true or options.encode == nil then | ||
url_string = mw.uri.encode (url_string); | url_string = mw.uri.encode (url_string, 'PATH'); | ||
end | end | ||
Line 165: | Line 164: | ||
--[=[-------------------------< I S _ V A L I D _ | --[=[-------------------------< I S _ V A L I D _ R X I V _ D A T E >------------------------------------------ | ||
returns true if: | for biorxiv, returns true if: | ||
2019-12-11T00:00Z <= biorxiv_date < today + 2 days | 2019-12-11T00:00Z <= biorxiv_date < today + 2 days | ||
for medrxiv, returns true if: | |||
2020-01-01T00:00Z <= medrxiv_date < today + 2 days | |||
The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400 | ||
The medrxiv identifier has a start date of 2020-01-01. The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800 | |||
<rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC | |||
today is the current date at time 00:00:00 UTC plus 48 hours | <today> is the current date at time 00:00:00 UTC plus 48 hours | ||
if today is | if today's date is 2023-01-01T00:00:00 then | ||
adding 24 hours gives | adding 24 hours gives 2023-01-02T00:00:00 – one second more than today | ||
adding 24 hours gives | adding 24 hours gives 2023-01-03T00:00:00 – one second more than tomorrow | ||
inputs: | |||
<y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier | |||
<select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b' | |||
]=] | ]=] | ||
local function | local function is_valid_rxiv_date (y, m, d, select) | ||
if 0 == tonumber (m) and 12 < tonumber (m) then -- <m> must be a number 1–12 | |||
return false; | |||
end | |||
if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer? | |||
return false; | |||
end | |||
local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string | |||
local good1, good2; | local good1, good2; | ||
local | local rxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates | ||
local lang_object = mw.getContentLanguage(); | local lang_object = mw.getContentLanguage(); | ||
good1, | good1, rxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', rxiv_date); -- convert rxiv_date value to Unix timestamp | ||
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | ||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand | ||
rxiv_ts = tonumber (rxiv_ts) or lang_object:parseFormattedNumber (rxiv_ts); -- convert to numbers for the comparison; | |||
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | ||
else | else | ||
Line 200: | Line 209: | ||
end | end | ||
return (( | local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp | ||
return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date | |||
end | end | ||
Line 242: | Line 253: | ||
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | ||
for i, v in ipairs (isxn_str) do | for i, v in ipairs (isxn_str) do | ||
temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) ); | temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | ||
end | end | ||
return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | return temp % 10 == 0; -- sum modulo 10 is zero when ISBN-13/ISMN is correct | ||
Line 250: | Line 261: | ||
--[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | --[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | ||
LCCN normalization ( | LCCN normalization (https://www.loc.gov/marc/lccn-namespace.html#normalization) | ||
1. Remove all blanks. | 1. Remove all blanks. | ||
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | ||
Line 260: | Line 271: | ||
Returns a normalized LCCN for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. | Returns a normalized LCCN for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. | ||
]] | ]] | ||
Line 286: | Line 298: | ||
--[[--------------------------< A R X I V >-------------------------------------------------------------------- | --[[--------------------------< A R X I V >-------------------------------------------------------------------- | ||
See: | See: https://arxiv.org/help/arxiv_identifier | ||
format and error check arXiv identifier. There are three valid forms of the identifier: | format and error check arXiv identifier. There are three valid forms of the identifier: | ||
Line 311: | Line 323: | ||
<date code> and <version> are as defined for 0704-1412 | <date code> and <version> are as defined for 0704-1412 | ||
<number> is a five-digit number | <number> is a five-digit number | ||
]] | ]] | ||
Line 318: | Line 331: | ||
local handler = options.handler; | local handler = options.handler; | ||
local year, month, version; | local year, month, version; | ||
local | local err_msg = false; -- assume no error message | ||
local text; -- output text | local text; -- output text | ||
Line 327: | Line 340: | ||
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month | if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month | ||
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? | ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? | ||
err_msg = true; -- flag for error message | |||
end | end | ||
Line 336: | Line 349: | ||
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | ||
((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)? | ((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)? | ||
err_msg = true; -- flag for error message | |||
end | end | ||
Line 344: | Line 357: | ||
month = tonumber (month); | month = tonumber (month); | ||
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) | if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) | ||
err_msg = true; -- flag for error message | |||
end | end | ||
else | else | ||
err_msg = true; -- not a recognized format; flag for error message | |||
end | end | ||
if err_msg then | |||
options.coins_list_t['ARXIV'] = nil; -- when error, unset so not included in COinS | |||
end | |||
local err_msg_t = {}; | |||
if err_msg then | |||
set_message ('err_bad_arxiv'); | |||
end | |||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}); | ||
if is_set (class) then | if is_set (class) then | ||
if id:match ('^%d+') then | if id:match ('^%d+') then | ||
text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink | text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink | ||
else | else | ||
set_message ('err_class_ignored'); | |||
end | end | ||
end | end | ||
return text; | return text; | ||
end | end | ||
Line 372: | Line 392: | ||
Validates (sort of) and formats a bibcode ID. | Validates (sort of) and formats a bibcode ID. | ||
Format for bibcodes is specified here: | Format for bibcodes is specified here: https://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes | ||
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters | But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters | ||
Line 390: | Line 410: | ||
local access = options.access; | local access = options.access; | ||
local handler = options.handler; | local handler = options.handler; | ||
local ignore_invalid = options.accept; | |||
local err_type; | local err_type; | ||
local err_msg = ''; | |||
local year; | local year; | ||
Line 404: | Line 426: | ||
err_type = cfg.err_msg_supl.value; -- so value error | err_type = cfg.err_msg_supl.value; -- so value error | ||
else | else | ||
local next_year = tonumber (os.date ('%Y')) + 1; | local next_year = tonumber (os.date ('%Y')) + 1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
Line 411: | Line 433: | ||
if id:find('&%.') then | if id:find('&%.') then | ||
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) | err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter) | ||
end | |||
if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.' | |||
set_message ('maint_bibcode'); | |||
end | end | ||
end | end | ||
end | end | ||
if is_set (err_type) then | if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used | ||
set_message ('err_bad_bibcode', {err_type}); | |||
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return text; | return text; | ||
end | end | ||
Line 440: | Line 467: | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
local | local err_msg = true; -- flag; assume that there will be an error | ||
local patterns = { | local patterns = { | ||
'^10.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) | '^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11) | ||
'^10.1101/(20 | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11) | ||
'^10.1101/(20 | '^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11) | ||
} | } | ||
Line 453: | Line 480: | ||
if m then -- m is nil when id is the six-digit form | if m then -- m is nil when id is the six-digit form | ||
if not | if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for biorxiv limit | ||
break; -- date fail; break out early so we don't unset the error message | break; -- date fail; break out early so we don't unset the error message | ||
end | end | ||
end | end | ||
err_msg = nil; -- we found a match so unset the error message | |||
break; -- and done | break; -- and done | ||
end | end | ||
end -- err_cat remains set here when no match | end -- err_cat remains set here when no match | ||
if err_msg then | |||
options.coins_list_t['BIORXIV'] = nil; -- when error, unset so not included in COinS | |||
set_message ('err_bad_biorxiv'); -- and set the error message | |||
end | |||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, | prefix = handler.prefix, id = id, separator = handler.separator, | ||
encode = handler.encode, access = handler.access} | encode = handler.encode, access = handler.access}); | ||
end | end | ||
Line 473: | Line 505: | ||
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure | The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure | ||
]] | ]] | ||
Line 479: | Line 512: | ||
local handler = options.handler; | local handler = options.handler; | ||
local matched; | local matched; | ||
local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, | ||
Line 486: | Line 519: | ||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | ||
if not matched then | if not matched then | ||
set_message ('err_bad_citeseerx' ); | |||
options.coins_list_t['CITESEERX'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return text; | return text; | ||
end | end | ||
Line 506: | Line 541: | ||
and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | ||
if ever used in DOI names. | if ever used in DOI names. | ||
https://www.doi.org/doi_handbook/2_Numbering.html -- 2.2 Syntax of a DOI name | |||
https://www.doi.org/doi_handbook/2_Numbering.html#2.2.2 -- 2.2.2 DOI prefix | |||
]] | ]] | ||
Line 515: | Line 553: | ||
local ignore_invalid = options.accept; | local ignore_invalid = options.accept; | ||
local handler = options.handler; | local handler = options.handler; | ||
local | local err_flag; | ||
local text; | local text; | ||
if is_set (inactive) then | if is_set (inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d") | local inactive_year = inactive:match("%d%d%d%d"); -- try to get the year portion from the inactive date | ||
local inactive_month, good; | local inactive_month, good; | ||
Line 530: | Line 568: | ||
end | end | ||
end | end | ||
end -- otherwise, |doi-broken-date= has something but it isn't a date | |||
if is_set (inactive_year) and is_set (inactive_month) then | if is_set (inactive_year) and is_set (inactive_month) then | ||
Line 544: | Line 580: | ||
end | end | ||
local registrant = | local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form | ||
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | local registrant_err_patterns = { -- these patterns are for code ranges that are not supported | ||
'^[^1-3]%d%d%d%d%.%d | '^[^1-3]%d%d%d%d%.%d+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999 | ||
'^[^1- | '^[^1-6]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–69999 | ||
'^[^1-9]%d%d%d%.%d | '^[^1-9]%d%d%d%.%d+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999 | ||
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | '^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999 | ||
'^%d%d%d%d%d%d+', -- 6 or more digits | '^%d%d%d%d%d%d+', -- 6 or more digits | ||
'^%d%d?%d?$', -- less than 4 digits without subcode (with subcode is legitimate) | '^%d%d?%d?$', -- less than 4 digits without subcode (3 digits with subcode is legitimate) | ||
'^%d%d?%.[%d%.]+', -- 1 or 2 digits with subcode | |||
'^5555$', -- test registrant will never resolve | '^5555$', -- test registrant will never resolve | ||
'[^%d%.]', -- any character that isn't a digit or a dot | '[^%d%.]', -- any character that isn't a digit or a dot | ||
Line 558: | Line 595: | ||
if not ignore_invalid then | if not ignore_invalid then | ||
if registrant then -- when DOI has proper form | if registrant then -- when DOI has proper form | ||
for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | for i, pattern in ipairs (registrant_err_patterns) do -- spin through error patterns | ||
if registrant:match (pattern) then -- to validate registrant codes | if registrant:match (pattern) then -- to validate registrant codes | ||
err_flag = set_message ('err_bad_doi'); -- when found, mark this DOI as bad | |||
break; -- and done | break; -- and done | ||
end | end | ||
end | end | ||
else | else | ||
err_flag = set_message ('err_bad_doi'); -- invalid directory or malformed | |||
end | end | ||
else | else | ||
Line 572: | Line 609: | ||
end | end | ||
if err_flag then | |||
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS | |||
end | |||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access, | ||
auto_link = not ( | auto_link = not (err_flag or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored | ||
}) .. (inactive or ''); | }) .. (inactive or ''); | ||
return text | return text; | ||
end | end | ||
Line 596: | Line 637: | ||
if ever used in HDLs. | if ever used in HDLs. | ||
Query string parameters are named here: | Query string parameters are named here: https://www.handle.net/proxy_servlet.html. query strings are not displayed | ||
but since '?' is an allowed character in an HDL, '?' followed by one of the query parameters is the only way we | but since '?' is an allowed character in an HDL, '?' followed by one of the query parameters is the only way we | ||
have to detect the query string so that it isn't URL-encoded with the rest of the identifier. | have to detect the query string so that it isn't URL-encoded with the rest of the identifier. | ||
Line 606: | Line 647: | ||
local access = options.access; | local access = options.access; | ||
local handler = options.handler; | local handler = options.handler; | ||
local query_params = { -- list of known query parameters from | local query_params = { -- list of known query parameters from https://www.handle.net/proxy_servlet.html | ||
'noredirect', | 'noredirect', | ||
'ignore_aliases', | 'ignore_aliases', | ||
Line 640: | Line 681: | ||
if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma | ||
set_message ('err_bad_hdl' ); | |||
options.coins_list_t['HDL'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return text; | return text; | ||
end | end | ||
Line 664: | Line 707: | ||
else -- here when not ignoring | else -- here when not ignoring | ||
if not check then -- and there is an error | if not check then -- and there is an error | ||
options.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS | |||
set_message ('err_bad_isbn', err_type); -- set an error message | |||
return ISBN; -- return id text | |||
end | end | ||
end | end | ||
return ISBN; | return ISBN; -- return id text | ||
end | end | ||
Line 686: | Line 731: | ||
return return_result (false, cfg.err_msg_supl.form); | return return_result (false, cfg.err_msg_supl.form); | ||
end | end | ||
return return_result ( | if not is_valid_isxn (id, 10) then -- test isbn-10 for numerical validity | ||
return return_result (false, cfg.err_msg_supl.check); -- fail if isbn-10 is not numerically valid | |||
end | |||
if id:find ('^63[01]') then -- 630xxxxxxx and 631xxxxxxx are (apparently) not valid isbn group ids but are used by amazon as numeric identifiers (asin) | |||
return return_result (false, cfg.err_msg_supl.group); -- fail if isbn-10 begins with 630/1 | |||
end | |||
return return_result (true, cfg.err_msg_supl.check); -- pass if isbn-10 is numerically valid | |||
else | else | ||
if id:match ('^%d+$') == nil then | if id:match ('^%d+$') == nil then | ||
Line 709: | Line 760: | ||
Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit. | ||
|asin=630....... | |asin=630....... and |asin=631....... are (apparently) not a legitimate ISBN though it checksums as one; these | ||
function to emit the maint_asin message | do not cause this function to emit the maint_asin message | ||
This function is positioned here because it calls isbn() | This function is positioned here because it calls isbn() | ||
Line 720: | Line 771: | ||
local domain = options.ASINTLD; | local domain = options.ASINTLD; | ||
local | local err_flag; | ||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | ||
err_flag = set_message ('err_bad_asin'); -- ASIN is not a mix of 10 uppercase alpha and numeric characters | |||
else | else | ||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | ||
if | if is_valid_isxn (id, 10) then -- see if ASIN value is or validates as ISBN-10 | ||
if not id:find ('^ | if not id:find ('^63[01]') then -- 630xxxxxxx and 631xxxxxxx are (apparently) not a valid isbn prefixes but are used by amazon as a numeric identifier | ||
set_message (' | err_flag = set_message ('err_bad_asin'); -- ASIN has ISBN-10 form but begins with something other than 630/1 so probably an isbn | ||
end | end | ||
elseif not is_set ( | elseif not is_set (err_flag) then | ||
err_flag = set_message ('err_bad_asin'); -- ASIN is not ISBN-10 | |||
end | end | ||
elseif not id:match("^%u[%d%u]+$") then | elseif not id:match("^%u[%d%u]+$") then | ||
err_flag = set_message ('err_bad_asin'); -- asin doesn't begin with uppercase alpha | |||
end | end | ||
end | end | ||
if not is_set (domain) then | if (not is_set (domain)) or in_array (domain, {'us'}) then -- default: United States | ||
domain = "com"; | domain = "com"; | ||
elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | ||
domain = "co." .. domain; | domain = "co." .. domain; | ||
elseif in_array (domain, {'au', 'br', 'mx'}) then | elseif in_array (domain, {'z.cn'}) then -- China | ||
domain = "cn"; | |||
elseif in_array (domain, {'au', 'br', 'mx', 'sg', 'tr'}) then -- Australia, Brazil, Mexico, Singapore, Turkey | |||
domain = "com." .. domain; | domain = "com." .. domain; | ||
elseif not in_array (domain, {'ae', 'ca', 'cn', 'de', 'es', 'fr', 'in', 'it', 'nl', 'pl', 'sa', 'se', 'co.jp', 'co.uk', 'com', 'com.au', 'com.br', 'com.mx', 'com.sg', 'com.tr'}) then -- Arabic Emirates, Canada, China, Germany, Spain, France, Indonesia, Italy, Netherlands, Poland, Saudi Arabia, Sweden (as of 2021-03 Austria (.at), Liechtenstein (.li) and Switzerland (.ch) still redirect to the German site (.de) with special settings, so don't maintain local ASINs for them) | |||
err_flag = set_message ('err_bad_asin_tld'); -- unsupported asin-tld value | |||
end | end | ||
local handler = options.handler; | |||
if not is_set (err_flag) then | |||
options.coins_list_t['ASIN'] = handler.prefix .. domain .. "/dp/" .. id; -- asin for coins | |||
else | |||
options.coins_list_t['ASIN'] = nil; -- when error, unset so not included in COinS | |||
end | |||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix .. domain .. "/dp/", | prefix = handler.prefix .. domain .. "/dp/", | ||
id = id, encode = handler.encode, separator = handler.separator}) | id = id, encode = handler.encode, separator = handler.separator}) | ||
end | end | ||
Line 755: | Line 816: | ||
Determines whether an ISMN string is valid. Similar to ISBN-13, ISMN is 13 digits beginning 979-0-... and uses the | Determines whether an ISMN string is valid. Similar to ISBN-13, ISMN is 13 digits beginning 979-0-... and uses the | ||
same check digit calculations. See | same check digit calculations. See https://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | ||
section 2, pages 9–12. | section 2, pages 9–12. | ||
ismn value not made part of COinS metadata because we don't have a url or isn't a COinS-defined identifier (rft.xxx) | |||
or an identifier registered at info-uri.info (info:) | |||
]] | ]] | ||
Line 787: | Line 851: | ||
if false == valid_ismn then | if false == valid_ismn then | ||
options.coins_list_t['ISMN'] = nil; -- when error, unset so not included in COinS; not really necessary here because ismn not made part of COinS | |||
set_message ('err_bad_ismn'); -- create an error message if the ISMN is invalid | |||
end | end | ||
Line 800: | Line 865: | ||
like this: | like this: | ||
|issn=0819 4327 gives: [ | |issn=0819 4327 gives: [https://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link | ||
This code now prevents that by inserting a hyphen at the ISSN midpoint. It also validates the ISSN for length | This code now prevents that by inserting a hyphen at the ISSN midpoint. It also validates the ISSN for length | ||
Line 839: | Line 904: | ||
else | else | ||
if false == valid_issn then | if false == valid_issn then | ||
options.coins_list_t['ISSN'] = nil; -- when error, unset so not included in COinS | |||
set_message ('err_bad_issn', (options.hkey == 'EISSN') and 'e' or ''); -- create an error message if the ISSN is invalid | |||
end | end | ||
end | end | ||
return text | return text; | ||
end | end | ||
Line 857: | Line 923: | ||
local handler = options.handler; | local handler = options.handler; | ||
local id_num; | local id_num; | ||
id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier | id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier | ||
Line 870: | Line 935: | ||
id = id_num; -- jfm matches pattern | id = id_num; -- jfm matches pattern | ||
else | else | ||
set_message ('err_bad_jfm' ); -- set an error message | |||
options.coins_list_t['JFM'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | ||
end | end | ||
Line 888: | Line 954: | ||
local access = options.access; | local access = options.access; | ||
local handler = options.handler; | local handler = options.handler; | ||
if id:find ('[Jj][Ss][Tt][Oo][Rr]') or id:find ('^https?://') or id:find ('%s') then | if id:find ('[Jj][Ss][Tt][Oo][Rr]') or id:find ('^https?://') or id:find ('%s') then | ||
set_message ('err_bad_jstor'); -- set an error message | |||
options.coins_list_t['JSTOR'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}); | ||
end | end | ||
Line 903: | Line 969: | ||
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of | Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of | ||
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. | the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. | ||
https://oclc-research.github.io/infoURI-Frozen/info-uri.info/info:lccn/reg.html | |||
length = 8 then all digits | length = 8 then all digits | ||
Line 916: | Line 982: | ||
local lccn = options.id; | local lccn = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
local | local err_flag; -- presume that LCCN is valid | ||
local id = lccn; -- local copy of the LCCN | local id = lccn; -- local copy of the LCCN | ||
Line 924: | Line 990: | ||
if 8 == len then | if 8 == len then | ||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | ||
err_flag = set_message ('err_bad_lccn'); -- set an error message | |||
end | end | ||
elseif 9 == len then -- LCCN should be adddddddd | elseif 9 == len then -- LCCN should be adddddddd | ||
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | ||
err_flag = set_message ('err_bad_lccn'); -- set an error message | |||
end | end | ||
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | ||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | ||
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | ||
err_flag = set_message ('err_bad_lccn'); -- no match, set an error message | |||
end | end | ||
end | end | ||
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | ||
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | ||
err_flag = set_message ('err_bad_lccn'); -- no match, set an error message | |||
end | end | ||
elseif 12 == len then -- LCCN should be aadddddddddd | elseif 12 == len then -- LCCN should be aadddddddddd | ||
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | ||
err_flag = set_message ('err_bad_lccn'); -- no match, set an error message | |||
end | end | ||
else | else | ||
err_flag = set_message ('err_bad_lccn'); -- wrong length, set an error message | |||
end | end | ||
if not is_set ( | if not is_set (err_flag) and nil ~= lccn:find ('%s') then | ||
err_flag = set_message ('err_bad_lccn'); -- lccn contains a space, set an error message | |||
end | |||
if is_set (err_flag) then | |||
options.coins_list_t['LCCN'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}) | prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}); | ||
end | end | ||
--[[--------------------------< M R >----------------------------------------------------------------- | --[[--------------------------< M E D R X I V >----------------------------------------------------------------- | ||
Format medRxiv ID and do simple error checking. Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a | |||
yyyy.mm.dd. date and suffixed with an optional version identifier. Ealiest date accepted is 2020.01.01 | |||
The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits: | |||
https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2 | |||
]] | ]] | ||
local function | local function medrxiv (options) | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
local | local err_msg_flag = true; -- flag; assume that there will be an error | ||
local patterns = { | |||
'%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare | |||
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later) | |||
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later) | |||
end | } | ||
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match | |||
if id:match (pattern) then | |||
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier | |||
if m then -- m is nil when id is the 8-digit form | |||
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit | |||
break; -- date fail; break out early so we don't unset the error message | |||
end | |||
end | |||
err_msg_flag = nil; -- we found a match so unset the error message | |||
break; -- and done | |||
end | |||
end -- <err_msg_flag> remains set here when no match | |||
if err_msg_flag then | |||
if | options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS | ||
set_message ('err_bad_medrxiv'); -- and set the error message | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix = handler.prefix, id = id, separator = handler.separator, | ||
encode = handler.encode, access = handler.access}); | |||
end | end | ||
--[[--------------------------< | --[[--------------------------< M R >-------------------------------------------------------------------------- | ||
A seven digit number; if not seven digits, zero-fill leading digits to make seven digits. | |||
]] | ]] | ||
local function | local function mr (options) | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
local | local id_num; | ||
local | local id_len; | ||
id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix | |||
if is_set (id_num) then | |||
set_message ('maint_mr_format'); -- add maint cat | |||
else -- plain number without mr prefix | |||
id_num = id:match ('^%d+$'); -- if here id is all digits | |||
end | |||
id_len = id_num and id_num:len() or 0; | |||
if (7 >= id_len) and (0 ~= id_len) then | |||
id = string.rep ('0', 7-id_len) .. id_num; -- zero-fill leading digits | |||
else | |||
set_message ('err_bad_mr'); -- set an error message | |||
options.coins_list_t['MR'] = nil; -- when error, unset so not included in COinS | |||
end | |||
if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters) | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
number = id:match('ocm(%d+)'); -- get the number | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | ||
elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters) | end | ||
number = id:match('ocn(%d+)'); -- get the number | |||
elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters) | |||
--[[--------------------------< O C L C >---------------------------------------------------------------------- | |||
Validate and format an OCLC ID. https://www.oclc.org/batchload/controlnumber.en.html {{dead link}} | |||
archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html | |||
]] | |||
local function oclc (options) | |||
local id = options.id; | |||
local handler = options.handler; | |||
local number; | |||
if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters) | |||
number = id:match('ocm(%d+)'); -- get the number | |||
elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then -- ocn prefix and 9 digits; 001 field (12 characters) | |||
number = id:match('ocn(%d+)'); -- get the number | |||
elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then -- on prefix and 10 or more digits; 001 field (12 characters) | |||
number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number | number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$'); -- get the number | ||
elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field | elseif id:match('^%(OCoLC%)[1-9]%d*$') then -- (OCoLC) prefix and variable number digits; no leading zeros; 035 field | ||
Line 1,016: | Line 1,131: | ||
elseif id:match('^%d+$') then -- no prefix | elseif id:match('^%d+$') then -- no prefix | ||
number = id; -- get the number | number = id; -- get the number | ||
if | if tonumber (id) > handler.id_limit then | ||
number = nil; -- | number = nil; -- unset when id value exceeds the limit | ||
end | end | ||
end | end | ||
Line 1,024: | Line 1,139: | ||
id = number; -- exclude prefix, if any, from external link | id = number; -- exclude prefix, if any, from external link | ||
else | else | ||
set_message ('err_bad_oclc') -- add an error message if the id is malformed | |||
options.coins_list_t['OCLC'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | |||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | ||
end | end | ||
Line 1,045: | Line 1,159: | ||
local handler = options.handler; | local handler = options.handler; | ||
local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; | local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; | ||
local | local err_flag; | ||
local prefix = { -- these are appended to the handler.prefix according to code | local prefix = { -- these are appended to the handler.prefix according to code | ||
['A']='authors/OL', | ['A']='authors/OL', | ||
Line 1,056: | Line 1,170: | ||
code = 'X'; -- no code or id completely invalid | code = 'X'; -- no code or id completely invalid | ||
ident = id; -- copy id to ident so that we display the flawed identifier | ident = id; -- copy id to ident so that we display the flawed identifier | ||
err_flag = set_message ('err_bad_ol'); | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | if not is_set (err_flag) then | ||
options.coins_list_t['OL'] = handler.prefix .. prefix[code] .. ident; -- experiment for ol coins | |||
else | |||
options.coins_list_t['OL'] = nil; -- when error, unset so not included in COinS | |||
end | |||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | |||
prefix = handler.prefix .. prefix[code], | prefix = handler.prefix .. prefix[code], | ||
id = ident, separator = handler.separator, encode = handler.encode, | id = ident, separator = handler.separator, encode = handler.encode, | ||
access = access}) | access = access}); | ||
end | end | ||
Line 1,080: | Line 1,200: | ||
local access = options.access; | local access = options.access; | ||
local handler = options.handler; | local handler = options.handler; | ||
if id:match("[^%d]") then -- if OSTI has anything but digits | if id:match("[^%d]") then -- if OSTI has anything but digits | ||
set_message ('err_bad_osti'); -- set an error message | |||
options.coins_list_t['OSTI'] = nil; -- when error, unset so not included in COinS | |||
else -- OSTI is only digits | else -- OSTI is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber (id); -- convert id to a number for range testing | ||
if 1018 > id_num or handler.id_limit < id_num then -- if OSTI is outside test limit boundaries | if 1018 > id_num or handler.id_limit < id_num then -- if OSTI is outside test limit boundaries | ||
set_message ('err_bad_osti'); -- set an error message | |||
options.coins_list_t['OSTI'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}); | ||
end | end | ||
Line 1,117: | Line 1,238: | ||
local embargo = options.Embargo; -- TODO: lowercase? | local embargo = options.Embargo; -- TODO: lowercase? | ||
local handler = options.handler; | local handler = options.handler; | ||
local | local err_flag; | ||
local id_num; | local id_num; | ||
local text; | local text; | ||
Line 1,132: | Line 1,253: | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber (id_num); -- convert id_num to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMC is outside test limit boundaries | ||
err_flag = set_message ('err_bad_pmc'); -- set an error message | |||
else | else | ||
id = tostring (id_num); -- make sure id is a string | id = tostring (id_num); -- make sure id is a string | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_flag = set_message ('err_bad_pmc'); -- set an error message | |||
end | end | ||
Line 1,146: | Line 1,267: | ||
handler.separator, | handler.separator, | ||
id, | id, | ||
}); | }); | ||
else | else | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, -- no embargo date or embargo has expired, ok to link to article | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access, | ||
auto_link = not | auto_link = not err_flag and 'pmc' or nil -- do not auto-link when PMC has error | ||
} | }); | ||
end | end | ||
if err_flag then | |||
options.coins_list_t['PMC'] = nil; -- when error, unset so not included in COinS | |||
end | |||
return text; | return text; | ||
end | end | ||
Line 1,169: | Line 1,294: | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
if id:match("[^%d]") then -- if PMID has anything but digits | if id:match("[^%d]") then -- if PMID has anything but digits | ||
set_message ('err_bad_pmid'); -- set an error message | |||
options.coins_list_t['PMID'] = nil; -- when error, unset so not included in COinS | |||
else -- PMID is only digits | else -- PMID is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber (id); -- convert id to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if PMID is outside test limit boundaries | ||
set_message ('err_bad_pmid'); -- set an error message | |||
options.coins_list_t['PMID'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | ||
end | end | ||
Line 1,198: | Line 1,324: | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
if id:match("[^%d]") then -- if RFC has anything but digits | if id:match("[^%d]") then -- if RFC has anything but digits | ||
set_message ('err_bad_rfc'); -- set an error message | |||
options.coins_list_t['RFC'] = nil; -- when error, unset so not included in COinS | |||
else -- RFC is only digits | else -- RFC is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber (id); -- convert id to a number for range testing | ||
if 1 > id_num or handler.id_limit < id_num then -- if RFC is outside test limit boundaries | if 1 > id_num or handler.id_limit < id_num then -- if RFC is outside test limit boundaries | ||
set_message ('err_bad_rfc'); -- set an error message | |||
options.coins_list_t['RFC'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}); | ||
end | end | ||
Line 1,228: | Line 1,355: | ||
local access = options.access; | local access = options.access; | ||
local handler = options.handler; | local handler = options.handler; | ||
local id_num; | local id_num; | ||
local text; | local text; | ||
Line 1,237: | Line 1,363: | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber (id_num); -- convert id_num to a number for range testing | ||
if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | if handler.id_limit < id_num then -- if S2CID is outside test limit boundaries | ||
set_message ('err_bad_s2cid'); -- set an error message | |||
options.coins_list_t['S2CID'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
set_message ('err_bad_s2cid'); -- set an error message | |||
options.coins_list_t['S2CID'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}); | ||
return text; | return text; | ||
Line 1,254: | Line 1,381: | ||
9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | 9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits | ||
sbn value not made part of COinS metadata because we don't have a url or isn't a COinS-defined identifier (rft.xxx) | |||
or an identifier registered at info-uri.info (info:) | |||
]] | ]] | ||
Line 1,261: | Line 1,391: | ||
local ignore_invalid = options.accept; | local ignore_invalid = options.accept; | ||
local handler = options.handler; | local handler = options.handler; | ||
local function return_result (check, err_type) -- local function to handle the various returns | local function return_result (check, err_type) -- local function to handle the various returns | ||
local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator}); | prefix = handler.prefix, id = id, separator = handler.separator}); | ||
if not ignore_invalid then -- if not ignoring SBN errors | if not ignore_invalid then -- if not ignoring SBN errors | ||
if not check then | if not check then | ||
options.coins_list_t['SBN'] = nil; -- when error, unset so not included in COinS; not really necessary here because sbn not made part of COinS | |||
set_message ('err_bad_sbn', {err_type}); -- display an error message | |||
return SBN; | |||
end | end | ||
else | else | ||
Line 1,278: | Line 1,410: | ||
end | end | ||
local ident = id:gsub ('[%s-]', ''); | local ident = id:gsub ('[%s-]', ''); -- remove hyphens and whitespace; they interfere with the rest of the tests | ||
if 9 ~= ident:len() then | if 9 ~= ident:len() then | ||
Line 1,305: | Line 1,437: | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
local id_num; | local id_num; | ||
local text; | local text; | ||
Line 1,314: | Line 1,445: | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber (id_num); -- convert id_num to a number for range testing | ||
if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | if 100 > id_num or handler.id_limit < id_num then -- if SSRN is outside test limit boundaries | ||
set_message ('err_bad_ssrn'); -- set an error message | |||
options.coins_list_t['SSRN'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
set_message ('err_bad_ssrn'); -- set an error message | |||
options.coins_list_t['SSRN'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = options.access}); | ||
return text; | return text; | ||
Line 1,342: | Line 1,475: | ||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | ||
set_message ('err_bad_usenet_id') -- add an error message if the message id is invalid | |||
options.coins_list_t['USENETID'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return text | return text; | ||
end | end | ||
Line 1,362: | Line 1,496: | ||
local id = options.id; | local id = options.id; | ||
local handler = options.handler; | local handler = options.handler; | ||
if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? | if id:match('^%d%d%d%d%d%d%d%d$') then -- is this identifier using temporary format? | ||
set_message ('maint_zbl'); -- yes, add maint cat | set_message ('maint_zbl'); -- yes, add maint cat | ||
elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then -- not temporary, is it normal format? | ||
set_message ('err_bad_zbl'); -- no, set an error message | |||
options.coins_list_t['ZBL'] = nil; -- when error, unset so not included in COinS | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | ||
end | end | ||
Line 1,414: | Line 1,548: | ||
if is_set (access_level) then | if is_set (access_level) then | ||
if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | if not in_array (access_level, cfg.keywords_lists['id-access']) then -- exact match required | ||
set_message ('err_invalid_param_val', {access_param, access_level}); | |||
access_level = nil; -- invalid so unset | access_level = nil; -- invalid so unset | ||
end | end | ||
if not is_set (id_list[k]) then -- identifier access-level must have a matching identifier | if not is_set (id_list[k]) then -- identifier access-level must have a matching identifier | ||
set_message ('err_param_access_requires_param', {k:lower()}); -- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message | |||
end | end | ||
id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | id_accesses_list[k] = cfg.keywords_xlate[access_level]; -- get translated keyword | ||
Line 1,432: | Line 1,566: | ||
render the identifiers into a sorted sequence table | render the identifiers into a sorted sequence table | ||
< | <ID_list_coins_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value | ||
<options_t> | <options_t> is a table of various k/v option pairs provided in the call to new_build_id_list(); | ||
modified by this function and passed to all identifier rendering functions | modified by this function and passed to all identifier rendering functions | ||
<access_levels_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value (if valid) | <access_levels_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value (if valid) | ||
returns a sequence table of sorted (by hkey) rendered identifier strings | returns a sequence table of sorted (by hkey - 'handler' key) rendered identifier strings | ||
]] | ]] | ||
local function build_id_list ( | local function build_id_list (ID_list_coins_t, options_t, access_levels_t) | ||
local | local ID_list_t = {}; | ||
local accept; | local accept; | ||
local func_map = { --function map points to functions associated with hkey identifier | local func_map = { --function map points to functions associated with hkey identifier | ||
Line 1,459: | Line 1,593: | ||
['JSTOR'] = jstor, | ['JSTOR'] = jstor, | ||
['LCCN'] = lccn, | ['LCCN'] = lccn, | ||
['MEDRXIV'] = medrxiv, | |||
['MR'] = mr, | ['MR'] = mr, | ||
['OCLC'] = oclc, | ['OCLC'] = oclc, | ||
Line 1,465: | Line 1,600: | ||
['PMC'] = pmc, | ['PMC'] = pmc, | ||
['PMID'] = pmid, | ['PMID'] = pmid, | ||
['RFC'] = rfc, | ['RFC'] = rfc, | ||
['S2CID'] = s2cid, | ['S2CID'] = s2cid, | ||
['SBN'] = sbn, | ['SBN'] = sbn, | ||
['SSRN'] = ssrn, | ['SSRN'] = ssrn, | ||
['USENETID'] = usenet_id, | ['USENETID'] = usenet_id, | ||
['ZBL'] = zbl, | ['ZBL'] = zbl, | ||
} | } | ||
for hkey, v in pairs ( | for hkey, v in pairs (ID_list_coins_t) do | ||
v, accept = has_accept_as_written (v); -- remove accept-as-written markup if present; accept is boolean true when markup removed; false else | v, accept = has_accept_as_written (v); -- remove accept-as-written markup if present; accept is boolean true when markup removed; false else | ||
-- every function gets the options table with value v and accept boolean | -- every function gets the options table with value v and accept boolean | ||
options_t.hkey = hkey; -- ~/Configuration handler key | options_t.hkey = hkey; -- ~/Configuration handler key | ||
options_t.id = v; -- add that identifier value to the options table | options_t.id = v; -- add that identifier value to the options table | ||
options_t.accept = accept; -- add the accept boolean flag | options_t.accept = accept; -- add the accept boolean flag | ||
options_t.access = access_levels_t[hkey]; -- add the access level for those that have an |<identifier-access= parameter | options_t.access = access_levels_t[hkey]; -- add the access level for those that have an |<identifier-access= parameter | ||
options_t.handler = cfg.id_handlers[hkey]; | options_t.handler = cfg.id_handlers[hkey]; | ||
if func_map[hkey] then | options_t.coins_list_t = ID_list_coins_t; -- pointer to ID_list_coins_t; for |asin= and |ol=; also to keep erroneous values out of the citation's metadata | ||
table.insert ( | options_t.coins_list_t[hkey] = v; -- id value without accept-as-written markup for metadata | ||
if options_t.handler.access and not in_array (options_t.handler.access, cfg.keywords_lists['id-access']) then | |||
error (cfg.messages['unknown_ID_access'] .. options_t.handler.access); -- here when handler access key set to a value not listed in list of allowed id access keywords | |||
end | |||
if func_map[hkey] then | |||
local id_text = func_map[hkey] (options_t); -- call the function to get identifier text and any error message | |||
table.insert (ID_list_t, {hkey, id_text}); -- add identifier text to the output sequence table | |||
else | |||
error (cfg.messages['unknown_ID_key'] .. hkey); -- here when func_map doesn't have a function for hkey | |||
end | |||
end | |||
local function comp (a, b) -- used by following table.sort() | |||
return a[1]:lower() < b[1]:lower(); -- sort by hkey | |||
end | |||
table.sort (ID_list_t, comp); -- sequence table of tables sort | |||
for k, v in ipairs (ID_list_t) do -- convert sequence table of tables to simple sequence table of strings | |||
ID_list_t[k] = v[2]; -- v[2] is the identifier rendering from the call to the various functions in func_map{} | |||
end | |||
return ID_list_t; | |||
end | |||
--[[--------------------------< O P T I O N S _ C H E C K >---------------------------------------------------- | |||
check that certain option parameters have their associated identifier parameters with values | |||
<ID_list_coins_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value | |||
<ID_support_t> is a sequence table of tables created in citation0() where each subtable has four elements: | |||
[1] is the support parameter's assigned value; empty string if not set | |||
[2] is a text string same as key in cfg.id_handlers | |||
[3] is cfg.error_conditions key used to create error message | |||
[4] is original ID support parameter name used to create error message | |||
returns nothing; on error emits an appropriate error message | |||
]] | |||
local function options_check (ID_list_coins_t, ID_support_t) | |||
for _, v in ipairs (ID_support_t) do | |||
if is_set (v[1]) and not ID_list_coins_t[v[2]] then -- when support parameter has a value but matching identifier parameter is missing or empty | |||
set_message (v[3], (v[4])); -- emit the appropriate error message | |||
end | end | ||
end | end | ||
end | end | ||
Line 1,512: | Line 1,676: | ||
]] | ]] | ||
local function identifier_lists_get ( | local function identifier_lists_get (args_t, options_t, ID_support_t) | ||
local ID_list_coins_t = extract_ids ( | local ID_list_coins_t = extract_ids (args_t); -- get a table of identifiers and their values for use locally and for use in COinS | ||
local ID_access_levels_t = extract_id_access_levels ( | options_check (ID_list_coins_t, ID_support_t); -- ID support parameters must have matching identifier parameters | ||
local ID_access_levels_t = extract_id_access_levels (args_t, ID_list_coins_t); -- get a table of identifier access levels | |||
local ID_list_t = build_id_list (ID_list_coins_t, options_t, ID_access_levels_t); -- get a sequence table of rendered identifier strings | local ID_list_t = build_id_list (ID_list_coins_t, options_t, ID_access_levels_t); -- get a sequence table of rendered identifier strings | ||
return ID_list_t, ID_list_coins_t; -- return the tables | return ID_list_t, ID_list_coins_t; -- return the tables | ||
Line 1,550: | Line 1,713: | ||
auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title= | auto_link_urls = auto_link_urls, -- table of identifier URLs to be used when auto-linking |title= | ||
identifier_lists_get = identifier_lists_get, -- experiment to replace individual calls to build_id_list, extract_ids, extract_id_access_levels | identifier_lists_get = identifier_lists_get, -- experiment to replace individual calls to build_id_list(), extract_ids, extract_id_access_levels | ||
is_embargoed = is_embargoed; | is_embargoed = is_embargoed; | ||
set_selected_modules = set_selected_modules; | set_selected_modules = set_selected_modules; | ||
} | } |
edits