MediaWiki

Difference between revisions of "LexemeQueriesGenerator.js"

m
 
(7 intermediate revisions by the same user not shown)
Line 1: Line 1:
/* TODO
+
/* ************************************************************************** */
 +
/* Lexeme Queries Generator (lqg) ******************************************* */
 +
// Description: Generates a form to search for words in Wikidata Lexeme and DBnary, a wiktionary-based wikibase. Returns a SPARQL query OR open tthat QUERY.
 +
// Usage: [[Template:LexemeQueriesGenerator]]
 +
// Usage: open [[Help:SPARQL 2#Tools]] > fill the form > click "Generate" or "Run !"
 +
// Hack pad:https://jsfiddle.net/hugolpz/9jqg72u8/
 +
 
 +
/* ************************************************************************** */
 +
// Author: Sina Ahmadi @sinaahmadi
 +
// Code: https://github.com/sinaahmadi/sinaahmadi.github.io/blob/master/_posts/2021-10-04-sparql-query-generator-for-lexicographical-data.md
 +
// Revamp: @hugolpz
 +
// LICENSE: MIT
 +
 
 +
/* ************************************************************************** */
 +
/* TODO ********************************************************************* * /
 
1) List of languages with recordings AND number of speakers
 
1) List of languages with recordings AND number of speakers
 
See base : https://lingualibre.org/wiki/LinguaLibre:List_of_languages
 
See base : https://lingualibre.org/wiki/LinguaLibre:List_of_languages
Line 10: Line 24:
 
[x] Run/Open Query
 
[x] Run/Open Query
  
Improve Query with federate SPARQL including LinguaLibre
+
Improve  
1) Explore
+
[ ] Query with federate SPARQL including LinguaLibre
 +
[ ] Factorize SPARQL codes.
  
 
Add to LinguaLibre
 
Add to LinguaLibre
1) Add to [[User:Yug/OOUI]]
+
[x] Add to [[User:Yug/OOUI]], [[Help:SPARQL 2]]
2) Migrate to general space.
+
[x] Migrate to general space. : MediaWiki:LexemeQueriesGenerator.js ; MediaWiki:Common.js
  
 
Translate
 
Translate
1) Find a what to translate ?
+
[ ] Find a way to translate ?
  
 
MediaWiki Extension ?
 
MediaWiki Extension ?
1) Explore
+
- [x] Explore -> Nope.
 
*/
 
*/
  

Latest revision as of 11:15, 17 January 2022

/* ************************************************************************** */
/* Lexeme Queries Generator (lqg) ******************************************* */
// Description: Generates a form to search for words in Wikidata Lexeme and DBnary, a wiktionary-based wikibase. Returns a SPARQL query OR open tthat QUERY.
// Usage: [[Template:LexemeQueriesGenerator]]
// Usage: open [[Help:SPARQL 2#Tools]] > fill the form > click "Generate" or "Run !"
// Hack pad:https://jsfiddle.net/hugolpz/9jqg72u8/

/* ************************************************************************** */
// Author: Sina Ahmadi @sinaahmadi
// Code: https://github.com/sinaahmadi/sinaahmadi.github.io/blob/master/_posts/2021-10-04-sparql-query-generator-for-lexicographical-data.md
// Revamp: @hugolpz 
// LICENSE: MIT

/* ************************************************************************** */
/* TODO ********************************************************************* * /
1) List of languages with recordings AND number of speakers
See base : https://lingualibre.org/wiki/LinguaLibre:List_of_languages
See nbSp : https://w.wiki/4ZB3

Finish hacking based on https://jsfiddle.net/hugolpz/rygo9s5b/
[x] getData from elements
[x] Identify suitable query
[x] Edit suitable query
[x] Run/Open Query

Improve 
[ ] Query with federate SPARQL including LinguaLibre
[ ] Factorize SPARQL codes.

Add to LinguaLibre
[x] Add to [[User:Yug/OOUI]], [[Help:SPARQL 2]]
[x] Migrate to general space. : MediaWiki:LexemeQueriesGenerator.js ; MediaWiki:Common.js

Translate
[ ] Find a way to translate ?

MediaWiki Extension ?
- [x] Explore -> Nope.
*/


// Toolbox

/* ********************************************************************** */
/* QUERIES BASIS ******************************************************** */
// Current: Wikidata, Dbnary.
 var queries = {
 // Queries for Wikidata
  wikidata : 
    { 
      values :`VALUES ?word {'book'@GLWSSA}`,
      basic: `SELECT * WHERE {
      VALUESTOBEADDEDHERE
      ?l a ontolex:LexicalEntry ;
        dct:language wd:LNGCDE ;
        ontolex:lexicalForm ?form ; #test
        wikibase:lexicalCategory wd:POSTAG ;
        wikibase:lemma ?lemma .
      ?form ontolex:representation ?word .
    }`,
    senses: `SELECT * WHERE {
      VALUESTOBEADDEDHERE
      ?l a ontolex:LexicalEntry ;
        dct:language wd:LNGCDE ;
        ontolex:lexicalForm ?form ;
        wikibase:lexicalCategory wd:POSTAG ;
        wikibase:lemma ?lemma ;
        ontolex:sense ?sense .
      ?form ontolex:representation ?word .
    }`,
  sensesDefinitions : `SELECT * WHERE {
  VALUESTOBEADDEDHERE
	?l a ontolex:LexicalEntry ;
		dct:language wd:LNGCDE ;
		wikibase:lemma ?lemma ;
		ontolex:lexicalForm ?form ;
		wikibase:lexicalCategory ?category ;
		ontolex:sense ?sense .
	?form ontolex:representation ?word .
	?language wdt:P218 "GLWSSA" .
	?sense skos:definition ?gloss .
	FILTER EXISTS {?l ontolex:sense ?sense }
	FILTER(LANG(?gloss) = "GLWSSA")
}`,
  sensesDefinitionsExamples: `SELECT * WHERE {
  VALUESTOBEADDEDHERE
	?l a ontolex:LexicalEntry ;
		dct:language wd:LNGCDE ;
		wikibase:lemma ?lemma ;
		ontolex:lexicalForm ?form ;
		wikibase:lexicalCategory ?category ;
		ontolex:sense ?sense .
		?language wdt:P218 "GLWSSA" .
	?form ontolex:representation ?word .
	?sense skos:definition ?gloss .
	OPTIONAL{
		?l p:P5831 ?statement .
		?statement ps:P5831 ?example .
	}
	FILTER EXISTS {?l ontolex:sense ?sense }
	FILTER(LANG(?gloss) = "GLWSSA")
}`,
  translation: `SELECT DISTINCT * WHERE {
	?source dct:language wd:LNGCDE;
		wikibase:lemma ?sourceLemma;
		ontolex:sense [ wdt:P5137 ?sense ].
	?target dct:language wd:LNGCDETRG;
		wikibase:lemma ?targetLemma;
		ontolex:sense [ wdt:P5137 ?sense ].
} ORDER BY ASC(UCASE(str(?sourceLemma)))`,
  translationLemma : `SELECT DISTINCT * WHERE {
  VALUESTOBEADDEDHERE
	?source dct:language wd:LNGCDE;
		wikibase:lemma ?sourceLemma;
		ontolex:lexicalForm ?form ;
		wikibase:lexicalCategory wd:POSTAG ;
		ontolex:sense [ wdt:P5137 ?sense ].
	?target dct:language wd:LNGCDETRG;
		wikibase:lemma ?targetLemma;
		ontolex:sense [ wdt:P5137 ?sense ].
	?form ontolex:representation ?word .
} ORDER BY ASC(UCASE(str(?sourceLemma)))`
},
 // Queries for Dbnary
dbnary : { 
 values: `VALUES ?label {'book'@GLWSSA}
 VALUES ?pos {<http://www.lexinfo.net/ontology/2.0/lexinfo#POSTAGNM>}`,
 basic: `SELECT * WHERE {VALUESTOBEADDEDHERE
   ?lexeme a ontolex:LexicalEntry ;
     rdfs:label ?label ;
     ontolex:canonicalForm ?form ;
     lime:language ?lang ;
     lexinfo:partOfSpeech   ?pos .
  FILTER(?lang = "GLWSSA")
}`,
  senses: `SELECT * WHERE {
  VALUESTOBEADDEDHERE
   ?lexeme a ontolex:LexicalEntry ;
     rdfs:label ?label ;
     ontolex:canonicalForm ?form ;
     lime:language ?lang ;
     lexinfo:partOfSpeech   ?pos ;
     ontolex:sense  ?sense .
 FILTER(?lang = "GLWSSA")
}`,
  sensesDefinitions: `SELECT ?lexeme ?label ?pos ?sense ?definition
WHERE {
   ?sense a ontolex:LexicalSense ;
     skos:definition ?def .
   ?def rdf:value ?definition .
   FILTER(lang(?definition) = "GLWSSA")
   {
      SELECT * WHERE {VALUESTOBEADDEDHERE
         ?lexeme a ontolex:LexicalEntry ;
         rdfs:label ?label ;
         ontolex:canonicalForm ?form ;
         lime:language ?lang ;
         lexinfo:partOfSpeech   ?pos ;
         ontolex:sense  ?sense .
      FILTER(?lang = "GLWSSA")
      } 
   }
}`,
  sensesDefinitionsExamples: ``,
  translation: ``,
  translationLemma: ``
  }
 };
 
/* ********************************************************************** */
/* OOJS / OOUI ********************************************************** */
// Data & options
var baseLanguages = [ //Major languages: cmn, spa, eng, fra, ara, rus, hin, swa
      // data https://w.wiki/4ZB3
      { data:'Q13955', ll:'', label:'ara — Arabic' },
      { data:'Q9610', ll:'', label:'ben — Bengali' },
      { data:'Q9192', ll:'Q113', label:'cmn — MandarinChinese' } ,
      { data:'Q188', ll:'', label:'deu — German' },
      { data:'Q1860', ll:'', label:'eng — English' },
      { data:'Q150', ll:'', label:'fra — French' },
      { data:'Q1568', ll:'', label:'hin — Hindi' },
      { data:'Q5287', ll:'', label:'jpn — Japanese' },
      { data:'Q1571', ll:'Q34', label:'mar — Marathi' },
      { data:'Q58635', ll:'', label:'pan — Punjabi' },
      { data:'Q5146', ll:'', label:'por — Portuguese' },
      { data:'Q7737', ll:'', label:'rus — Russian' },
      { data:'Q1321', ll:'', label:'spa — Spanish' },
    ],
    availableLanguages = [
       { data:'Q13955',ll:'', iso1:'ar', label:'ara — Arabic' },
       { data:'Q9610',ll:'', iso1:'bn', label:'ben — Bengali' },
       { data:'Q12107', ll:'Q209', iso1:'br', label: 'bre: Breton' }, // lili 
       { data:'Q9192', ll:'Q113', iso1:'zh', label:'cmn — MandarinChinese' } ,
       { data:'Q188',ll:'', iso1:'de', label:'deu — German' },
       { data:'Q1860',ll:'', iso1:'en', label:'eng — English' },
       { data:'Q150',ll:'', iso1:'fr', label:'fra — French' },
       { data:'Q33454',ll:'', iso1:'ff', label:'ful — Fula' },
       { data:'Q56475',ll:'', iso1:'ha', label:'hau — Hausa' },
       { data:'Q1568',ll:'', iso1:'hi', label:'hin — Hindi' },
       { data:'Q9240',ll:'', iso1:'id', label:'ind — Indonesian' },
       { data:'Q652',ll:'', iso1:'it', label:'ita — Italian' },
       { data:'Q33549',ll:'', iso1:'jv', label:'jav — Javanese' },
       { data:'Q5287',ll:'', iso1:'ja', label:'jpn — Japanese' },
       { data:'Q9176',ll:'', iso1:'ko', label:'kor — Korean' },
       { data:'Q1571',ll:'', iso1:'mr', label:'mar — Marathi' },
       { data:'Q9237',ll:'', iso1:'ms', label:'msa — Malay' },
       { data:'Q58635',ll:'', iso1:'pa', label:'pan — Punjabi' },
       { data:'Q5146',ll:'', iso1:'pt', label:'por — Portuguese' },
       { data:'Q7737',ll:'', iso1:'ru', label:'rus — Russian' },
       { data:'Q1321',ll:'', iso1:'es', label:'spa — Spanish' },
       { data:'Q5885',ll:'', iso1:'ta', label:'tam — Tamil' },
       { data:'Q8097',ll:'', iso1:'te', label:'tel — Telugu' },
       { data:'Q256',ll:'', iso1:'tr', label:'tur — Turkish' },
       { data:'Q1617',ll:'', iso1:'ur', label:'urd — Urdu' },
       { data:'Q9199',ll:'', iso1:'vi', label:'vie — Vietnamese' },
    ],
    languagesArray = [ ...availableLanguages ],
    posArray = [
      { data: 'Q1084', label: 'noun' },
      { data: 'Q36224', label: 'pronoun' },
      { data: 'Q24905', label: 'verb' },
      { data: 'Q380057', label: 'adverb' },
      { data: 'Q34698', label: 'adjective' },
      { data: 'Q21087400', label: 'quantitative'},
      { data: 'Q83034', label: 'interjection' },
      { data: 'Q4833830', label: 'preposition' },
      { data: 'Q103184', label: 'article' },
      { data: 'Q36484', label: 'conjunction' },
    ].map(o => new OO.ui.MenuOptionWidget({ data: o.data, label: o.label })),
    infoTypesArray = [
      { data: 'basic', label: 'Just look !' },
      { data: 'senses', label: 'Sense' },
      { data: 'sensesDefinitions', label: 'Sense, definition' },
      { data: 'sensesDefinitionsExamples', label: 'Sense, definition and examples'},
    ].map(o => new OO.ui.MenuOptionWidget({ data: o.data, label: o.label })),
    languagesArrayT = [ ...baseLanguages, ],
    endpointsArray = [
      { data: 'wikidata', label: 'Wikidata', selected: true },
      { data: 'Dbnary', label: 'Dbnary' },
      //{ data: 'LinguaLibre', label: 'LinguaLibre' },
    ];

// Elements
var lemma = new OO.ui.TextInputWidget({
    id: 'lqg-lemma',
    icon: 'search',
    placeholder: 'books',
    value: 'books',
		label: 'Word to search',
  }),
  languages = new OO.ui.ComboBoxInputWidget({ // ComboBox
    id: 'lqg-languages',
		placeholder : 'Target language',
    options: languagesArray,
    menu: {	filterFromInput: true, filterMode : 'substring'	}
  	// value: 'Q34',
	}),
  pos = new OO.ui.DropdownWidget( {
    id: 'lqg-pos',
		label: 'Part-of-speech',
		menu: { items: posArray }
	}),
  infoTypes = new OO.ui.DropdownWidget({
    id: 'lqg-infoTypes',
		label: 'Information to fetch ... (if any)',
		menu: { items: infoTypesArray }
	}),
  translations = new OO.ui.ComboBoxInputWidget({ // ComboBox
    id: 'lqg-translations',
		placeholder : 'Translations to fetch ... (if any)',
    options: languagesArrayT,
    menu: {	filterFromInput: true, filterMode : 'substring'	}
	}),
  endpoints = new OO.ui.RadioSelectInputWidget( { 
    id: 'lqg-endpoints',
  	label: 'Radios buttons',
    options: endpointsArray,
} ),
  limit = new OO.ui.CheckboxInputWidget({ id: 'lqg-limit', selected: true, }),
  limitLabel = new OO.ui.LabelWidget( { label: 'Limit to 100 (faster)' }),
  queryDisplay = new OO.ui.MultilineTextInputWidget( { 
    id: 'lqg-queryDisplay',
		//value: `# Query will appear here.\n\n\n\n\n\n`,
    placeholder: `# Query will appear here.\n\n\n\n\n\n`,
		multiline: true,
		autosize: true,
    minRows: 10,
		maxRows: 20
	} ),
  generate = new OO.ui.ButtonWidget({ 
  	id: 'lqg-generate', 
    label: 'Generate',
  }),
  run = new OO.ui.ButtonWidget({ 
    id: 'lqg-run',
    label:'Run !',
  });
  
// An example of a fieldset with horizontal layout.
var fieldset = new OO.ui.FieldsetLayout( {
		label: 'Queries Generator for Wikidata Lexemes'
	} );
fieldset.addItems( [ 
  new OO.ui.FieldLayout(
    new OO.ui.Widget( {
      content: [ 
        new OO.ui.HorizontalLayout( { items: [
          lemma,
          pos,
          languages,
          infoTypes,
          translations,
        ]}),
        new OO.ui.HorizontalLayout( { items: [
          generate,
        	run,
          limit,
          limitLabel,
          endpoints,
        ]}),
        queryDisplay
      ]
    }),{
			label: 'Select the suitable values to build your Wikidata Lexemes Query.',
			align: 'top'
		})
	]
);
$( "#lqg" ).append( fieldset.$element );

// Check data
var checkAvailableData = function(identity,element){
	// console.log('2',languages)
	console.log(identity+'a getElementId(): ',element.getElementId())
	console.log(identity+'b getData(): ',element.getData())
  console.log(identity+'c getValue(): ',element.getValue())
}

$('#lqg-lemma').on('focusout',function() {checkAvailableData(1,lemma);})
$('#lqg-languages').on('focusout',function() { checkAvailableData(2,languages);});
checkAvailableData(2,languages);



/* ********************************************************************** */
/* GENERATE QUERY STRING ************************************************ */
// Current: Wikidata, Dbnary.
 var generateQuery = function () {
   // getData from elements
   var selectedDropdown = function (group){
    var items = group.getMenu().items.filter(item=> item.selected==true )
    return items[0]?items[0].data:null;
   }
   var selectedRadio = function (group){
    var items = group.getMenu().items.filter(item=> item.selected==true )
    return items[0]?items[0].data:null;
   }
   var qid2value = function (arr,qid,field){
    field = field || 'label'
    var match = arr.filter(item=> item.data==qid);
    return match[0]?match[0][field]:null;
   }
   var form = {
     lemma: lemma.getValue(),//string
     pos: selectedDropdown(pos),//Qid
     posLabel: qid2value(posArray,selectedDropdown(pos)),//noun
     languages: languages.getValue(),//Qid
     languagesISO1: qid2value(availableLanguages,languages.getValue(),'iso1'),//en
     infoTypes: selectedDropdown(infoTypes),//string
     translations: translations.getValue(),//Qid
     limit: limit.isSelected(),//boolean
     endpoints: endpoints.getValue(),//wikidata|dbnary
     queryDisplay: queryDisplay.getValue(),//string
   };
   console.log(form)
   //  console.log(form);
   var query = form.queryDisplay;
// Identify suitable query
   if (form.endpoints == 'wikidata' && form.translations != "null") {
     form.infoTypes = form.lemma? 'translationLemma': 'translation';
   }
   if (form.endpoints == 'dbnary' && form.infoTypes == 'sensesDefinitionsExamples') {
     form.infoTypes = 'sensesDefinitions';
   }
   query = queries[form.endpoints][form.infoTypes];
  
   // Edit suitable query
   val = form.lemma ? queries[form.endpoints].values : '# no lemma provided';
   
   query = query
   		.replace("VALUESTOBEADDEDHERE", val.replace("book", form.lemma))
   		.replace("WORD", form.lemma)
   		.replace("LNGCDE", form.languages)// Q
   		.replaceAll("GLWSSA", form.languagesISO1)// en
   		.replaceAll("LNGCDETRG", form.translations)// Q
   		.replace("POSTAGNM", form.posLabel) // noun
   		.replace("POSTAG", form.pos)// Q 
      .concat(form.limit?`\nLIMIT 100`:'');

   // Inject query
   queryDisplay.setValue(query);
 }

/* ********************************************************************** */
/* OPENS EXTERNAL QUERY SERVICE ***************************************** */
// Current: Wikidata, Dbnary. Broken: lingualibre.
var runQuery = function () {
		var endpointLabel =  endpoints.getValue(),
    queryText = queryDisplay.getValue(),
    queryEncoded = encodeURIComponent(queryText),
    baseEndpointUrl = 
    	endpointLabel == 'wikidata'? 'https://query.wikidata.org/#'
      :'http://kaiko.getalp.org/sparql?default-graph-uri=&query=';
   if (queryText) {
       window.open(baseEndpointUrl.concat(queryEncoded), '_blank');
   }
 }
 $('#lqg-run').on('click',function(){ runQuery(); });
 $('#lqg-generate').on('click',function(){ generateQuery(); });