{"id":"https://openalex.org/W2337116603","doi":"https://doi.org/10.1145/2889311","title":"Automatic Discovery of Abnormal Values in Large Textual Databases","display_name":"Automatic Discovery of Abnormal Values in Large Textual Databases","publication_year":2016,"publication_date":"2016-04-19","ids":{"openalex":"https://openalex.org/W2337116603","doi":"https://doi.org/10.1145/2889311","mag":"2337116603"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/2889311","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1963","issn":["1936-1963","1936-1955"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022945960","display_name":"Peter Christen","orcid":"https://orcid.org/0000-0003-3435-2015"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Peter Christen","raw_affiliation_strings":["The Australian National University, Acton, Australia"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Acton, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080206968","display_name":"Ross W. Gayler","orcid":"https://orcid.org/0000-0003-4679-585X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ross W. Gayler","raw_affiliation_strings":["Veda#TAB#"],"affiliations":[{"raw_affiliation_string":"Veda#TAB#","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013716682","display_name":"Nguyen Khoi Tran","orcid":"https://orcid.org/0000-0002-9538-7476"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Khoi-Nguyen Tran","raw_affiliation_strings":["The Australian National University, Acton, Australia"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Acton, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065586720","display_name":"Jeffrey Fisher","orcid":null},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jeffrey Fisher","raw_affiliation_strings":["The Australian National University, Acton, Australia"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Acton, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079324810","display_name":"Dinusha Vatsalan","orcid":"https://orcid.org/0000-0001-6713-7667"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dinusha Vatsalan","raw_affiliation_strings":["The Australian National University, Acton, Australia"],"affiliations":[{"raw_affiliation_string":"The Australian National University, Acton, Australia","institution_ids":["https://openalex.org/I118347636"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.203,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":10,"citation_normalized_percentile":{"value":0.89961,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":86,"max":87},"biblio":{"volume":"7","issue":"1-2","first_page":"1","last_page":"31"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9939,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.86889315},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.543599},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.48867825},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.46733817},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4546301},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.40282083},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34124842},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.34012294},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32286575},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/2889311","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1963","issn":["1936-1963","1936-1955"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[{"funder":"https://openalex.org/F4320334704","funder_display_name":"Australian Research Council","award_id":"LP100200079"}],"datasets":[],"versions":[],"referenced_works_count":37,"referenced_works":["https://openalex.org/W1488893975","https://openalex.org/W1518977884","https://openalex.org/W1520377376","https://openalex.org/W1547612978","https://openalex.org/W1823976324","https://openalex.org/W188224872","https://openalex.org/W1917350224","https://openalex.org/W1985005465","https://openalex.org/W1999563429","https://openalex.org/W2014459399","https://openalex.org/W2029873015","https://openalex.org/W2031101249","https://openalex.org/W2038819732","https://openalex.org/W2045778260","https://openalex.org/W2045812729","https://openalex.org/W2063918473","https://openalex.org/W2087615914","https://openalex.org/W2106950427","https://openalex.org/W2122646361","https://openalex.org/W2125270091","https://openalex.org/W2131904035","https://openalex.org/W2132870739","https://openalex.org/W2134532107","https://openalex.org/W2135779000","https://openalex.org/W2137130182","https://openalex.org/W2146022760","https://openalex.org/W2147505890","https://openalex.org/W2149706766","https://openalex.org/W2153635508","https://openalex.org/W2158195707","https://openalex.org/W2161920802","https://openalex.org/W2162774438","https://openalex.org/W2561675875","https://openalex.org/W2755088640","https://openalex.org/W2997591727","https://openalex.org/W3004248957","https://openalex.org/W54330468"],"related_works":["https://openalex.org/W4290792893","https://openalex.org/W2748952813","https://openalex.org/W2384888906","https://openalex.org/W2376314740","https://openalex.org/W2366644548","https://openalex.org/W2357241418","https://openalex.org/W2144190808","https://openalex.org/W2128719260","https://openalex.org/W1575659177","https://openalex.org/W1509467138"],"abstract_inverted_index":{"Textual":[0],"databases":[1,98,104,289,341],"are":[2,30,105,161,178,238],"ubiquitous":[3],"in":[4,108,119,144,151,166,250],"many":[5,50],"application":[6],"domains.":[7],"Examples":[8],"of":[9,17,184,245,253,298,306,338,345],"textual":[10,146,324,340],"data":[11,122,255,266,270,332],"range":[12],"from":[13,282,308],"names":[14,80],"and":[15,23,53,180,214,234,241,257,301,320,334],"addresses":[16],"customers":[18],"to":[19,33,58,71,136,221,232,329],"social":[20,51,309],"media":[21],"posts":[22],"bibliographic":[24,97,299],"records.":[25],"With":[26],"online":[27,43,63,96,101],"services,":[28,48],"individuals":[29],"increasingly":[31,106],"required":[32],"enter":[34,72],"their":[35,339],"personal":[36,291],"details":[37],"for":[38,46,69,267],"example":[39],"when":[40],"purchasing":[41],"products":[42],"or":[44,74,87,99,141,261],"registering":[45],"government":[47],"while":[49,169],"network":[52],"e-commerce":[54],"sites":[55,64],"allow":[56],"users":[57],"post":[59],"short":[60],"comments.":[61],"Many":[62],"leave":[65],"open":[66],"the":[67,182,205,215,251,293,302,336,343],"possibility":[68],"people":[70],"unintended":[73],"malicious":[75],"abnormal":[76,139,172,186,323],"values,":[77,84,325],"such":[78,94],"as":[79,95,188,264],"with":[81],"errors,":[82],"bogus":[83],"profane":[85],"comments,":[86],"random":[88],"character":[89],"sequences.":[90],"In":[91,129],"other":[92],"applications,":[93],"comparative":[100],"shopping":[102],"sites,":[103],"populated":[107],"(semi-)":[109],"automatic":[110],"ways":[111],"through":[112],"Web":[113],"crawls.":[114],"This":[115],"practice":[116],"can":[117,248,318],"result":[118],"low":[120],"quality":[121,337],"being":[123],"added":[124],"automatically":[125,137,321],"into":[126],"a":[127,167,197,210,223],"database.":[128],"this":[130],"article,":[131],"we":[132],"develop":[133,235],"three":[134],"techniques":[135,177,236,247,276,317],"discover":[138,322],"(unexpected":[140],"unusual)":[142],"values":[143,160,187],"large":[145,279],"databases.":[147],"Following":[148],"recent":[149],"work":[150],"categorical":[152],"outlier":[153,190],"detection,":[154],"our":[155,246,275,316],"assumption":[156],"is":[157,174,196,207,231],"that":[158,163,237,315],"\u201cnormal\u201d":[159],"those":[162],"occur":[164],"frequently":[165],"database,":[168],"an":[170,189,327],"individual":[171],"value":[173],"rare.":[175],"Our":[176,193,229,312],"unsupervised":[179],"address":[181],"challenge":[183],"discovering":[185],"detection":[191],"problem.":[192],"first":[194],"technique":[195],"basic":[198],"but":[199],"efficient":[200,331],"q-gram":[201],"set":[202],"based":[203,208],"technique,":[204],"second":[206],"on":[209,277],"probabilistic":[211],"language":[212],"model,":[213],"third":[216],"employs":[217],"morphological":[218],"word":[219],"features":[220],"train":[222],"one-class":[224],"support":[225],"vector":[226],"machine":[227],"classifier.":[228],"aim":[230],"investigate":[233],"fast,":[239],"efficient,":[240],"automatic.":[242],"The":[243],"output":[244],"help":[249],"development":[252],"rule-based":[254],"cleaning":[256,271],"information":[258],"extraction":[259],"systems,":[260],"be":[262],"used":[263],"training":[265,348],"further":[268],"supervised":[269],"procedures.":[272],"We":[273],"evaluate":[274],"four":[278],"real-world":[280],"datasets":[281],"different":[283],"domains:":[284],"two":[285],"US":[286],"voter":[287],"registration":[288],"containing":[290],"details,":[292],"2013":[294],"KDD":[295],"Cup":[296],"dataset":[297,305],"records,":[300],"SNAP":[303],"Memetracker":[304],"phrases":[307],"networking":[310],"sites.":[311],"results":[313],"show":[314],"efficiently":[319],"allowing":[326],"organization":[328],"conduct":[330],"exploration,":[333],"improve":[335],"without":[342],"need":[344],"requiring":[346],"explicit":[347],"data.":[349]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2337116603","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2}],"updated_date":"2024-12-13T23:41:31.613641","created_date":"2016-06-24"}
  NODES
Association 4
USERS 1