Skip to content

Utilizes the reticulate package to import the flair.datasets dataset from Flair's datasets in Python, enabling the use of this dataset in an R environment.

Usage

flair_datasets()

Value

A Python Module(flair.datasets) from Flair, which can be utilized for NLP tasks.

References

Python equivalent:


from flair.datasets import UD_ENGLISH
corpus = UD_ENGLISH().downsample(0.1)

See also

https://github.com/flairNLP/flair for additional information on Flair's capabilities and datasets in NLP.

Examples

if (FALSE) { # \dontrun{
UD_ENGLISH <- flair_datasets()$UD_ENGLISH
corpus <- UD_ENGLISH()$downsample(0.1)} # }

# print all the datasets from flair
names(flair_datasets())
#>   [1] "AGNEWS"                          
#>   [2] "AMAZON_REVIEWS"                  
#>   [3] "ANAT_EM"                         
#>   [4] "AZDZ"                            
#>   [5] "BC2GM"                           
#>   [6] "BIOBERT_CHEMICAL_BC4CHEMD"       
#>   [7] "BIOBERT_CHEMICAL_BC5CDR"         
#>   [8] "BIOBERT_DISEASE_BC5CDR"          
#>   [9] "BIOBERT_DISEASE_NCBI"            
#>  [10] "BIOBERT_GENE_BC2GM"              
#>  [11] "BIOBERT_GENE_JNLPBA"             
#>  [12] "BIOBERT_SPECIES_LINNAEUS"        
#>  [13] "BIOBERT_SPECIES_S800"            
#>  [14] "BIONLP2013_CG"                   
#>  [15] "BIONLP2013_PC"                   
#>  [16] "BIOSCOPE"                        
#>  [17] "BIOSEMANTICS"                    
#>  [18] "BIO_INFER"                       
#>  [19] "CDR"                             
#>  [20] "CELL_FINDER"                     
#>  [21] "CEMP"                            
#>  [22] "CHEMDNER"                        
#>  [23] "CLL"                             
#>  [24] "COMMUNICATIVE_FUNCTIONS"         
#>  [25] "CONLL_03"                        
#>  [26] "CONLL_03_DUTCH"                  
#>  [27] "CONLL_03_GERMAN"                 
#>  [28] "CONLL_03_SPANISH"                
#>  [29] "CONLL_2000"                      
#>  [30] "CRAFT"                           
#>  [31] "CRAFT_V4"                        
#>  [32] "CSVClassificationCorpus"         
#>  [33] "CSVClassificationDataset"        
#>  [34] "CTD_CHEMICALS_DICTIONARY"        
#>  [35] "CTD_DISEASES_DICTIONARY"         
#>  [36] "ClassificationCorpus"            
#>  [37] "ClassificationDataset"           
#>  [38] "ColumnCorpus"                    
#>  [39] "ColumnDataset"                   
#>  [40] "DECA"                            
#>  [41] "DataLoader"                      
#>  [42] "DataPairCorpus"                  
#>  [43] "DataPairDataset"                 
#>  [44] "DataTripleCorpus"                
#>  [45] "DataTripleDataset"               
#>  [46] "EntityLinkingDictionary"         
#>  [47] "FEWNERD"                         
#>  [48] "FSU"                             
#>  [49] "FeideggerCorpus"                 
#>  [50] "FeideggerDataset"                
#>  [51] "FlairDatapointDataset"           
#>  [52] "GELLUS"                          
#>  [53] "GERMEVAL_2018_OFFENSIVE_LANGUAGE"
#>  [54] "GLUE_COLA"                       
#>  [55] "GLUE_MNLI"                       
#>  [56] "GLUE_MRPC"                       
#>  [57] "GLUE_QNLI"                       
#>  [58] "GLUE_QQP"                        
#>  [59] "GLUE_RTE"                        
#>  [60] "GLUE_SST2"                       
#>  [61] "GLUE_STSB"                       
#>  [62] "GLUE_WNLI"                       
#>  [63] "GO_EMOTIONS"                     
#>  [64] "GPRO"                            
#>  [65] "HUNER_CELL_LINE"                 
#>  [66] "HUNER_CELL_LINE_CELL_FINDER"     
#>  [67] "HUNER_CELL_LINE_CLL"             
#>  [68] "HUNER_CELL_LINE_GELLUS"          
#>  [69] "HUNER_CELL_LINE_JNLPBA"          
#>  [70] "HUNER_CHEMICAL"                  
#>  [71] "HUNER_CHEMICAL_CDR"              
#>  [72] "HUNER_CHEMICAL_CEMP"             
#>  [73] "HUNER_CHEMICAL_CHEBI"            
#>  [74] "HUNER_CHEMICAL_CHEMDNER"         
#>  [75] "HUNER_CHEMICAL_CRAFT_V4"         
#>  [76] "HUNER_CHEMICAL_SCAI"             
#>  [77] "HUNER_DISEASE"                   
#>  [78] "HUNER_DISEASE_CDR"               
#>  [79] "HUNER_DISEASE_MIRNA"             
#>  [80] "HUNER_DISEASE_NCBI"              
#>  [81] "HUNER_DISEASE_PDR"               
#>  [82] "HUNER_DISEASE_SCAI"              
#>  [83] "HUNER_DISEASE_VARIOME"           
#>  [84] "HUNER_GENE"                      
#>  [85] "HUNER_GENE_BC2GM"                
#>  [86] "HUNER_GENE_BIO_INFER"            
#>  [87] "HUNER_GENE_CELL_FINDER"          
#>  [88] "HUNER_GENE_CHEBI"                
#>  [89] "HUNER_GENE_CRAFT_V4"             
#>  [90] "HUNER_GENE_DECA"                 
#>  [91] "HUNER_GENE_FSU"                  
#>  [92] "HUNER_GENE_GPRO"                 
#>  [93] "HUNER_GENE_IEPA"                 
#>  [94] "HUNER_GENE_JNLPBA"               
#>  [95] "HUNER_GENE_LOCTEXT"              
#>  [96] "HUNER_GENE_MIRNA"                
#>  [97] "HUNER_GENE_OSIRIS"               
#>  [98] "HUNER_GENE_VARIOME"              
#>  [99] "HUNER_SPECIES"                   
#> [100] "HUNER_SPECIES_CELL_FINDER"       
#> [101] "HUNER_SPECIES_CHEBI"             
#> [102] "HUNER_SPECIES_CRAFT_V4"          
#> [103] "HUNER_SPECIES_LINNEAUS"          
#> [104] "HUNER_SPECIES_LOCTEXT"           
#> [105] "HUNER_SPECIES_MIRNA"             
#> [106] "HUNER_SPECIES_S800"              
#> [107] "HUNER_SPECIES_VARIOME"           
#> [108] "HunerEntityLinkingDictionary"    
#> [109] "IEPA"                            
#> [110] "IMDB"                            
#> [111] "JNLPBA"                          
#> [112] "KEYPHRASE_INSPEC"                
#> [113] "KEYPHRASE_SEMEVAL2010"           
#> [114] "KEYPHRASE_SEMEVAL2017"           
#> [115] "LINNEAUS"                        
#> [116] "LOCTEXT"                         
#> [117] "MASAKHA_POS"                     
#> [118] "MIRNA"                           
#> [119] "MongoDataset"                    
#> [120] "NCBI_DISEASE"                    
#> [121] "NCBI_GENE_HUMAN_DICTIONARY"      
#> [122] "NCBI_TAXONOMY_DICTIONARY"        
#> [123] "NEL_ENGLISH_AIDA"                
#> [124] "NEL_ENGLISH_AQUAINT"             
#> [125] "NEL_ENGLISH_IITB"                
#> [126] "NEL_ENGLISH_REDDIT"              
#> [127] "NEL_ENGLISH_TWEEKI"              
#> [128] "NEL_GERMAN_HIPE"                 
#> [129] "NER_ARABIC_ANER"                 
#> [130] "NER_ARABIC_AQMAR"                
#> [131] "NER_BASQUE"                      
#> [132] "NER_CHINESE_WEIBO"               
#> [133] "NER_DANISH_DANE"                 
#> [134] "NER_ENGLISH_MOVIE_COMPLEX"       
#> [135] "NER_ENGLISH_MOVIE_SIMPLE"        
#> [136] "NER_ENGLISH_PERSON"              
#> [137] "NER_ENGLISH_RESTAURANT"          
#> [138] "NER_ENGLISH_SEC_FILLINGS"        
#> [139] "NER_ENGLISH_STACKOVERFLOW"       
#> [140] "NER_ENGLISH_TWITTER"             
#> [141] "NER_ENGLISH_WEBPAGES"            
#> [142] "NER_ENGLISH_WIKIGOLD"            
#> [143] "NER_ENGLISH_WNUT_2020"           
#> [144] "NER_ESTONIAN_NOISY"              
#> [145] "NER_FINNISH"                     
#> [146] "NER_GERMAN_BIOFID"               
#> [147] "NER_GERMAN_EUROPARL"             
#> [148] "NER_GERMAN_GERMEVAL"             
#> [149] "NER_GERMAN_LEGAL"                
#> [150] "NER_GERMAN_MOBIE"                
#> [151] "NER_GERMAN_POLITICS"             
#> [152] "NER_HIPE_2022"                   
#> [153] "NER_HUNGARIAN"                   
#> [154] "NER_ICDAR_EUROPEANA"             
#> [155] "NER_ICELANDIC"                   
#> [156] "NER_JAPANESE"                    
#> [157] "NER_MASAKHANE"                   
#> [158] "NER_MULTI_CONER"                 
#> [159] "NER_MULTI_CONER_V2"              
#> [160] "NER_MULTI_WIKIANN"               
#> [161] "NER_MULTI_WIKINER"               
#> [162] "NER_MULTI_XTREME"                
#> [163] "NER_NERMUD"                      
#> [164] "NER_SWEDISH"                     
#> [165] "NER_TURKU"                       
#> [166] "NER_UKRAINIAN"                   
#> [167] "NEWSGROUPS"                      
#> [168] "ONTONOTES"                       
#> [169] "OSIRIS"                          
#> [170] "OcrJsonDataset"                  
#> [171] "OpusParallelCorpus"              
#> [172] "PDR"                             
#> [173] "ParallelTextCorpus"              
#> [174] "ParallelTextDataset"             
#> [175] "RE_ENGLISH_CONLL04"              
#> [176] "RE_ENGLISH_DRUGPROT"             
#> [177] "RE_ENGLISH_SEMEVAL2010"          
#> [178] "RE_ENGLISH_TACRED"               
#> [179] "S800"                            
#> [180] "SCAI_CHEMICALS"                  
#> [181] "SCAI_DISEASE"                    
#> [182] "SENTEVAL_CR"                     
#> [183] "SENTEVAL_MPQA"                   
#> [184] "SENTEVAL_MR"                     
#> [185] "SENTEVAL_SST_BINARY"             
#> [186] "SENTEVAL_SST_GRANULAR"           
#> [187] "SENTEVAL_SUBJ"                   
#> [188] "SENTIMENT_140"                   
#> [189] "SROIE"                           
#> [190] "STACKOVERFLOW"                   
#> [191] "SUPERGLUE_RTE"                   
#> [192] "SentenceDataset"                 
#> [193] "StringDataset"                   
#> [194] "TREC_50"                         
#> [195] "TREC_6"                          
#> [196] "UD_AFRIKAANS"                    
#> [197] "UD_ANCIENT_GREEK"                
#> [198] "UD_ARABIC"                       
#> [199] "UD_ARMENIAN"                     
#> [200] "UD_BASQUE"                       
#> [201] "UD_BAVARIAN_MAIBAAM"             
#> [202] "UD_BELARUSIAN"                   
#> [203] "UD_BULGARIAN"                    
#> [204] "UD_BURYAT"                       
#> [205] "UD_CATALAN"                      
#> [206] "UD_CHINESE"                      
#> [207] "UD_CHINESE_KYOTO"                
#> [208] "UD_COPTIC"                       
#> [209] "UD_CROATIAN"                     
#> [210] "UD_CZECH"                        
#> [211] "UD_DANISH"                       
#> [212] "UD_DUTCH"                        
#> [213] "UD_ENGLISH"                      
#> [214] "UD_ESTONIAN"                     
#> [215] "UD_FAROESE"                      
#> [216] "UD_FINNISH"                      
#> [217] "UD_FRENCH"                       
#> [218] "UD_GALICIAN"                     
#> [219] "UD_GERMAN"                       
#> [220] "UD_GERMAN_HDT"                   
#> [221] "UD_GOTHIC"                       
#> [222] "UD_GREEK"                        
#> [223] "UD_HEBREW"                       
#> [224] "UD_HINDI"                        
#> [225] "UD_INDONESIAN"                   
#> [226] "UD_IRISH"                        
#> [227] "UD_ITALIAN"                      
#> [228] "UD_JAPANESE"                     
#> [229] "UD_KAZAKH"                       
#> [230] "UD_KOREAN"                       
#> [231] "UD_LATIN"                        
#> [232] "UD_LATVIAN"                      
#> [233] "UD_LITHUANIAN"                   
#> [234] "UD_LIVVI"                        
#> [235] "UD_MALTESE"                      
#> [236] "UD_MARATHI"                      
#> [237] "UD_NAIJA"                        
#> [238] "UD_NORTH_SAMI"                   
#> [239] "UD_NORWEGIAN"                    
#> [240] "UD_OLD_CHURCH_SLAVONIC"          
#> [241] "UD_OLD_FRENCH"                   
#> [242] "UD_PERSIAN"                      
#> [243] "UD_POLISH"                       
#> [244] "UD_PORTUGUESE"                   
#> [245] "UD_ROMANIAN"                     
#> [246] "UD_RUSSIAN"                      
#> [247] "UD_SERBIAN"                      
#> [248] "UD_SLOVAK"                       
#> [249] "UD_SLOVENIAN"                    
#> [250] "UD_SPANISH"                      
#> [251] "UD_SWEDISH"                      
#> [252] "UD_TURKISH"                      
#> [253] "UD_UKRAINIAN"                    
#> [254] "UD_WOLOF"                        
#> [255] "UP_CHINESE"                      
#> [256] "UP_ENGLISH"                      
#> [257] "UP_FINNISH"                      
#> [258] "UP_FRENCH"                       
#> [259] "UP_GERMAN"                       
#> [260] "UP_ITALIAN"                      
#> [261] "UP_SPANISH"                      
#> [262] "UP_SPANISH_ANCORA"               
#> [263] "UniversalDependenciesCorpus"     
#> [264] "UniversalDependenciesDataset"    
#> [265] "VARIOME"                         
#> [266] "WASSA_ANGER"                     
#> [267] "WASSA_FEAR"                      
#> [268] "WASSA_JOY"                       
#> [269] "WASSA_SADNESS"                   
#> [270] "WNUT_17"                         
#> [271] "WSD_MASC"                        
#> [272] "WSD_OMSTI"                       
#> [273] "WSD_RAGANATO_ALL"                
#> [274] "WSD_SEMCOR"                      
#> [275] "WSD_TRAINOMATIC"                 
#> [276] "WSD_UFSAC"                       
#> [277] "WSD_WORDNET_GLOSS_TAGGED"        
#> [278] "YAHOO_ANSWERS"                   
#> [279] "ZELDA"                           
#> [280] "base"                            
#> [281] "biomedical"                      
#> [282] "document_classification"         
#> [283] "entity_linking"                  
#> [284] "ocr"                             
#> [285] "relation_extraction"             
#> [286] "sequence_labeling"               
#> [287] "text_image"                      
#> [288] "text_text"                       
#> [289] "treebanks"