{"id":"https://openalex.org/W4387929320","doi":"https://doi.org/10.48550/arxiv.2310.14616","title":"Rethinking SIGN Training: Provable Nonconvex Acceleration without First- and Second-Order Gradient Lipschitz","display_name":"Rethinking SIGN Training: Provable Nonconvex Acceleration without First- and Second-Order Gradient Lipschitz","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4387929320","doi":"https://doi.org/10.48550/arxiv.2310.14616"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.14616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2310.14616","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057518342","display_name":"Tao Sun","orcid":"https://orcid.org/0000-0001-5277-8699"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027220244","display_name":"Congliang Chen","orcid":"https://orcid.org/0000-0002-9795-4200"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Congliang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058476148","display_name":"Peng Qiao","orcid":"https://orcid.org/0000-0001-6752-7892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Peng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100333320","display_name":"Li Shen","orcid":"https://orcid.org/0000-0002-5443-0503"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727888","display_name":"Xinwang Liu","orcid":"https://orcid.org/0000-0001-9066-1475"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xinwang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100440903","display_name":"Dongsheng Li","orcid":"https://orcid.org/0000-0001-9743-2034"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dongsheng","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9981,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9895,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/smoothness","display_name":"Smoothness","score":0.6676752},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.532324}],"concepts":[{"id":"https://openalex.org/C22324862","wikidata":"https://www.wikidata.org/wiki/Q652707","display_name":"Lipschitz continuity","level":2,"score":0.8383881},{"id":"https://openalex.org/C139676723","wikidata":"https://www.wikidata.org/wiki/Q1193832","display_name":"Sign (mathematics)","level":2,"score":0.7304356},{"id":"https://openalex.org/C102634674","wikidata":"https://www.wikidata.org/wiki/Q868473","display_name":"Smoothness","level":2,"score":0.6676752},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5888963},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.5881568},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5514563},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.532324},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.44127113},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.43699133},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.43345025},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3926984},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.38842455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21194723},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.16070119},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.14616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.14616","pdf_url":"http://arxiv.org/pdf/2310.14616","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2310.14616","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2310.14616","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W98480971","https://openalex.org/W2597809628","https://openalex.org/W2164382479","https://openalex.org/W2157978810","https://openalex.org/W2150291671","https://openalex.org/W2146343568","https://openalex.org/W2058965144","https://openalex.org/W2027972911","https://openalex.org/W2013643406","https://openalex.org/W192619126"],"abstract_inverted_index":{"Sign-based":[0],"stochastic":[1],"methods":[2,30,66],"have":[3],"gained":[4],"attention":[5],"due":[6],"to":[7,10,181],"their":[8,69],"ability":[9],"achieve":[11],"robust":[12],"performance":[13],"despite":[14],"using":[15],"only":[16],"the":[17,24,33,83,86,95,123,127,146,175],"sign":[18],"information":[19],"for":[20,108],"parameter":[21],"updates.":[22],"However,":[23],"current":[25],"convergence":[26,70,84],"analysis":[27],"of":[28,36,75,85,126,148,159,178,185],"sign-based":[29,65,87,112,179],"relies":[31],"on":[32,115],"strong":[34],"assumptions":[35,74],"first-order":[37,91,97],"gradient":[38,42],"Lipschitz":[39],"and":[40,67,77],"second-order":[41,78,103],"Lipschitz,":[43,98],"which":[44],"may":[45],"not":[46],"hold":[47],"in":[48,111,145,164],"practical":[49],"tasks":[50],"like":[51],"deep":[52],"neural":[53],"network":[54],"training":[55],"that":[56,105,137,165],"involve":[57],"high":[58],"non-smoothness.":[59],"In":[60,132],"this":[61,138],"paper,":[62],"we":[63,99,119,135],"revisit":[64],"analyze":[68],"under":[71,89,169],"more":[72],"realistic":[73],"first-":[76],"smoothness.":[79],"We":[80],"first":[81],"establish":[82],"method":[88],"weak":[90,96],"Lipschitz.":[92],"Motivated":[93],"by":[94],"propose":[100],"a":[101,182],"relaxed":[102],"condition":[104],"still":[106],"allows":[107],"nonconvex":[109,139],"acceleration":[110,140],"methods.":[113],"Based":[114],"our":[116,160],"theoretical":[117,161],"results,":[118],"gain":[120],"insights":[121],"into":[122],"computational":[124],"advantages":[125],"recently":[128],"developed":[129],"LION":[130],"algorithm.":[131],"distributed":[133],"settings,":[134],"prove":[136],"persists":[141],"with":[142],"linear":[143],"speedup":[144],"number":[147],"nodes,":[149],"when":[150],"utilizing":[151],"fast":[152],"communication":[153],"compression":[154],"gossip":[155],"protocols.":[156],"The":[157],"novelty":[158],"results":[162],"lies":[163],"they":[166],"are":[167],"derived":[168],"much":[170],"weaker":[171],"assumptions,":[172],"thereby":[173],"expanding":[174],"provable":[176],"applicability":[177],"algorithms":[180],"wider":[183],"range":[184],"problems.":[186]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4387929320","counts_by_year":[],"updated_date":"2025-01-01T21:56:49.140367","created_date":"2023-10-25"}
  NODES