User:Js/urldecoder.js: Difference between revisions

Content deleted Content added
Js (talk | contribs)
corrections
Js (talk | contribs)
code restructuring, fixes and improvements
Line 5:
if (!tlb) return
var bt = document.createElement('input')
bt.type = 'button'; bt.onclick = urlDecoderRun; bt.id = 'urlDecoder'
bt.value = '→[\[]]'; bt.id = 'urlDecoder'
bt.title = 'Decode URL before cursor or all URLs in selected text'
bt.style.cssText = 'background:#adbede; height:22px; vertical-align:top; padding:0'
Line 16:
}
if (wgAction=='edit' || wgAction=='submit') addOnloadHook(urlDecoderButton)
 
 
 
Line 23 ⟶ 22:
var httpRegExp = '(https?:\\/\\/[^\\]\\[\\n\\r<>" ]+)' // except []<>"
var beforeCursor = new RegExp('(\\[{0,2})'+httpRegExp+'( +[^\\]\n]+)?\\]{0,2}$', 'i')
var localPrefix = getPrefixesWMPrefixes(wgServer.substringunSecure(7wgServer+wgScript))
if (wgServer == 'https://secure.wikimedia.org')
localPrefix = getPrefixes(wgScript.split('/')[2]+'.'+wgScript.split('/')[1]+'.org')
var newText, linkSize, txtarea = document.editform.wpTextbox1
var isBeforeCursor = false
Line 38 ⟶ 35:
}else { //no selection
if (!(rr=range.duplicate())) return
rr.moveStart('character', - 10001500)
linkSize = processBeforeCursor(rr.text)
if (!linkSize) return
Line 59 ⟶ 56:
newText = processSelText(txt.substring(startPos, endPos))
}else{ //no selection
linkSize = processBeforeCursor(txt.substring((endPos-10001500>0?endPos-10001500:0), endPos))
if (!linkSize) return
startPos = endPos - linkSize //select matched
Line 73 ⟶ 70:
 
 
function processBeforeCursor(str){ //looks forfinds http:.* insidein string, returns it'sits distancelength to cursor,and also sets newText var
isBeforeCursor = true
var pos = str.lastIndexOf('http://') //to cut out possible preceding http links
if (pos == -1) pos = str.lastIndexOf('https://')
if (pos == -1) return 0
else if (pos >= 102) str = str.substring(pos-2) //cut chars before http, leavingmove someleft roomto forinclude possibleleading [s
var ma = str.match(beforeCursor) // result: (whole string)' '[', 'http:...', ' name]'
if (!ma) return 0
if (ma[3]) //link with name: automatically add brackets just in case
newText = decodeMatchedLinksimplifyMatched(ma[0], '[', ma[2], ma[3]+']')
else //just url: add closing bracket only if there is leading bracket
newText = decodeMatchedLinksimplifyMatched(ma[0], ma[1], ma[2], ma[1]?']':'')
return ma[0].length
}
Line 90 ⟶ 87:
function processSelText(txt){
txt = txt.replace(RegExp('(\\[{0,2})' + httpRegExp + '([^\\]\\[\\n\\r]*?\\]\\]?)?', 'ig'),
simplifyMatched)
decodeMatchedLink)
if (window.urlDecoderIntLinks) txt = txt.replace(/\[\[[^\]\|\n]+/g, decodeWikiLink)
function(lnk){//skip user_talk
return /^\[\[user_talk:[^#]+$/i.test(lnk) ? lnk : decodeAnchor(lnk)
})
return txt
}
 
function decodeMatchedLink(str, bracket, url, rest){//gets: (whole string), '[', url, ' name]'
if (!bracket) return decodeLink(url) + str.substring(url.length) //just url
if (!rest) return str //no matching closing bracket
var name = rest.replace(/^ +|\]+$/g,'') //trim leading spaces and ending brackets
str = decodeLink(url, name)
if (str.charAt(0) != '[' ) str = '['+str+']' //not wikilink - restore single brackets
return str
}
 
function simplifyMatched(str, bracket, url, rest){//arguments: (whole string), '[', url, ' name]'; calls decodeUrl
var pos = url.indexOf("''")
if (pos != -1) url = url.substring(0, pos) // double ' is not allowed inside urls
if (!bracket){//no brackets, just url
var trail = url.match(RegExp('[,;\\\\\.:!\\?' //trailing punctuation, per Parser.php
+ (!/\(/.test(url) ? '\\)' : '') + ']+$' )) //trailing no-matching )
if (trail) url = url.substring(0, url.length-trail[0].length) //move these out of url
if (/(\}\}|\|)$/.test(url)) return str //trailing | or }} can be a part of template, skip to be safe
return decodeUrl(url) + str.substring(url.length)
}else if (rest) //both brackets and possibly name
return decodeUrl(url, rest.replace(/\]+$|^ +| +$/g,'')) //trim ending brackets and spaces in 'name]'
else return str //probably broken wikicode in selected text
}
 
function decodeLinkdecodeUrl(url, name){ //url -> %-decoded -> [[link|name]] (if possible); name is optional
url = unSecure(url)
var parts, endingDots, result
if (nameurl.indexOf('%') name!= -1) try { url = name.replacedecodeURI(/^url) +|} +$/g,''catch(e){} //trimdecode spaces%
url = url.replace(/%(3B|2F|2C|3A)/g, function(s){return decodeURIComponent(s)}) //decode ;/,:
//secure Wikimedia link into non-secure
url = url.replace(/https:[ <>"\/\/secure.wikimedia.org[\]]/g, function(\w+s)\/{return encodeURIComponent(\w+s)})\ /([^\]\|\n\r ]+)/," disallowed 'http://$2.$1.org/$3')chars
//bugzilla
if (parts = url.match(/^https:\/\/bugzilla\.wikimedia.org\/show_bug\.cgi\?id=(\d+)/))
return '[\[mediazilla:'+parts[1]+ (name?'|'+name:'') + ']]'
 
//decode %
if (url.indexOf('%') != -1) try { url = decodeURI(url) } catch(e){}
url = url.replace(/%(3B|2F|2C|3A)/g, function(s){return decodeURIComponent(s)}) // ;/,:
//encode back some chars not allowed by MediaWiki
url = url.replace(/[ <>"\[\]]/g, function(s){return encodeURIComponent(s)}) //"
if (isBeforeCursor)
for (var n in window.urlDecoderEngNames) //to eng keywords
url = url.replace(RegExp('(title=|wiki\/)('+urlDecoderEngNames[n]+':)'), '$1' + n + ':')
var link = toWikilink(url)
//analyze
if (link){ //(below) usually no need for leading colon in articles (e.g. inserting interwiki or category)
parts = url.substring(7).split('/')
if (wgNamespaceNumber==0 || wgNamespaceNumber==14) link=link.replace(/^:/,'')
var ___domain = parts[0]
return '[\[' + link + (name?'|'+name:'') + ']]'
var linkPrefix = getPrefixes(___domain)
}else if (typeof name == 'string') return '[' + url + (name?' '+name:'') + ']' //empty name
//return unchanged if cannot convert into wiki link
else return url
if (!linkPrefix || parts[1] != 'wiki' || url.indexOf('?')!=-1)
}
return name ? '[' + url + ' ' + name + ']' : url
//convert to wiki link
function toWikilink(url){//url -> wikilink, otherwise null
//try bugzilla and user-defined prefixes
if (!window.urlDecoderPrefixes) urlDecoderPrefixes = {}
urlDecoderPrefixes['https://bugzilla.wikimedia.org/show_bug.cgi?id=']='mediazilla'
for (var key in urlDecoderPrefixes)
if (url.toLowerCase().indexOf(key)!=-1)
return urlDecoderPrefixes[key]+':'+ url.substring(url.indexOf(key)+key.length)
//try WM prefixes
var parts = url.substring(7).split('/')
if (parts[1]!='wiki' || url.indexOf('?')!=-1) return null
var linkPrefix = WMPrefixes(url.toLowerCase()), prefixes = ''
if (!linkPrefix) return null
var title = url.substring(parts[0].length + parts[1].length + 9) //get part after /wiki/
title = decodeWikiLinkdecodeAnchor(title)
if (linkPrefix[0] && (linkPrefix[0] != localPrefix[0])) prefixes = linkPrefix[0]
var prefixes = ''
if (linkPrefix[01] && (linkPrefix[01] != localPrefix[01])) prefixes += ':' + linkPrefix[01]
if (linkPrefix[1]prefixes &&|| isColonNeeded(linkPrefix[1] != localPrefix[1]title)) prefixes += ':'+linkPrefix[1] //dividing colon or cat/file leading colon
if (prefixes)return prefixes += ':'title
if (!prefixes && isColonNeeded(title)) prefixes = ':' //leading colon for cat/file
//return result
result = '[\[' + prefixes + title
if (name) result += '|' + name + ']]'
else result = (result + ']]').replace(/(\.+)\]\]/,']]$1') //url. -> [[w]]. not [[w.]]
return result
}
 
 
function decodeWikiLink(link){
function decodeAnchor(link){//simplify internal link: replace %20 and _ then decode anchor
link = link.replace(/_/g, ' ').replace(/^ +| +$/g, '')
link = link.replace(/(_|%20)/g, ' ').replace(/^ +| +$/g, '')
var parts = link.split('#')
if (parts.length != 2) return link //no anchor
var anchor = parts[1], hidIdx = -1, hidden = []
link = parts[0]
anchor = anchor.replace( //hide IPs
/(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/,
function(s){ hidden[++hidIdx] = s; return '\x01' + hidIdx + '\x02' }
)
//decode 4, 3 and 2-byte: http://en.wikipedia.org/wiki/UTF-8
anchor = anchor.replace(/\.F[0-4]\.[89AB][\dA-F]\.[89AB][\dA-F]\.[89AB][\dA-F]/g, deChar)
anchor = anchor.replace(/\.E[\dA-F]\.[89AB][\dA-F]\.[89AB][\dA-F]/g, deChar)
anchor = anchor.replace(/\.[CD][\dA-F]\.[89AB][\dA-F]/g, deChar)
anchor = anchor.replace( //hide IPs
//decode reserved 1-byte chars: http://meta.wikimedia.org/wiki/Url , http://en.wikipedia.org/wiki/Percent_encoding
/(?:^|[^0-9A-F\.])(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)/,
anchor = anchor.replace(/\.[2-5][0-9A-F]/g, function(str){
function(s){ hidden[++hidIdx] = s; return '\x01' + hidIdx + '\x02' }
var ch = deChar(str) // ;:@&=+$,/?%#[]<>
)
if ((ch != ':') &&
//decode 1-byte chars: all symbols except -.:_ and []{} prohibited in links
((encodeURIComponent(ch).replace(/%/,'.') == str) || ('!*\'()'.indexOf(ch) != -1)))
anchor = anchor.replace(/\.[2-7][0-9A-F]/g, function(hhh){
return ch
var ch = deChar(hhh)
else
if ('!"#$%&\'()*+,/;<=>?@\\^`~'.indexOf(ch) >= 0) return ch; else return hhh
return str //do not decode e.g. %35 which is '5' and not supposed to be encoded
})
//unhide IPs and return
function deChar(str){ return tryDecode(str.replace(/\.([0-9A-F][0-9A-F])/g, '%$1')) }
function tryDecode(s){ try {s = decodeURIComponent(s)} catch(e){}; return s }
//unhide IPs
for (var i=hidIdx; i>=0; i--) anchor = anchor.replace('\x01'+i+'\x02', hidden[i])
if (anchor.indexOf("''") != -1) return link //cannot have double '' in link
//return
else return linkparts[0] + '#' + anchor
}
 
function deChar(ss){
function getPrefixes(___domain){ //en.wikipedia.org -> [ 'w', 'en'] , using projPrefix{}
try{ss = decodeURIComponent(ss.replace(/\.([0-9A-F][0-9A-F])/g, '%$1'))} catch(e){}
var projPrefix = {
return ss
'test.wikipedia.org':'testwiki',
'wikipedia.org':'w',
'wikibooks.org':'b',
'wikinews.org':'n',
'wikiquote.org':'q',
'wikisource.org':'s',
'wikiversity.org':'v',
'wiktionary.org':'wikt',
'mediawiki.org':'mw',
'www.wikimedia.org':'foundation',
'wikimedia.org':'*',
'wikimediafoundation.org':'foundation'
}
}
___domain = ___domain.toLowerCase()
 
var proj, isFound
function WMPrefixes(url){ // htp://en.wikipedia.org/wiki/... -> [ 'w', 'en']
for (proj in projPrefix) if (___domain.indexOf(proj) != -1) {isFound=true; break}
var dd = url.substring(7).split('/')[0].split('.') // -> ['en','wikipedia','org']
if (!isFound) return null
if (dd.pop() != 'org') return null
var prefix = projPrefix[proj], lang = ''
var proj='', lang = '', part = dd.pop()
if (prefix == '*') prefix = ___domain.split('.')[0] // .wikimedia.org project
if (proj = {'mediawiki':'mw','wikimediafoundation':'foundation'}[part]);
else if ((___domain=___domain.replace(proj, '')) && (___domain != 'www.')) lang = ___domain.split('.')[0] //multi-lang project
else if (proj = {'wikipedia':'w','wikibooks':'b','wikinews':'n','wikiquote':'q',
//else if (prefix.charAt(0) == ':'){ //multi-lang project if (pp.length == 3) lang = pp[0]
'wikisource':'s','wikiversity':'v','wiktionary':'wikt'}[part]){
return [prefix, lang]
lang = dd.pop()
if (!lang || lang=='www') lang = ''
else if (lang=='test') {lang=''; proj='testwiki'}
}else if (part == 'wikimedia'){
part = dd.pop()
if (!part || part=='www') proj = 'foundation'
else if (/^(meta|commons|incubator|species|strategy)$/.test(part)) proj = part
else return null
}else return null
return [proj, lang]
}
 
function unSecure(url){
return url.replace(/https:\/\/secure\.wikimedia\.org\/(\w+)\/(\w+)\/([^\]\|\n\r ]+)/,
'http://$2.$1.org/$3')
}