diff --git "a/data/users/ddowney/seedset-ddowney-maple.json" "b/data/users/ddowney/seedset-ddowney-maple.json" --- "a/data/users/ddowney/seedset-ddowney-maple.json" +++ "b/data/users/ddowney/seedset-ddowney-maple.json" @@ -15,12 +15,48 @@ "We structure this paper around challenges scholars and the public face when reading research papers -- Discovery, Efficiency, Comprehension, Synthesis, and Accessibility -- and present an overview of our progress and remaining open challenges." ] }, + { + "title": "S2abEL: A Dataset for Entity Linking from Scientific Tables", + "abstract": [ + "Entity linking (EL) is the task of linking a textual mention to its corresponding entry in a knowledge base, and is critical for many knowledge-intensive NLP applications.", + "When applied to tables in scientific papers, EL is a step toward large-scale scientific knowledge bases that could enable advanced scientific question answering and analytics.", + "We present the first dataset for EL in scientific tables.", + "EL for scientific tables is especially challenging because scientific knowledge bases can be very incomplete, and disambiguating table mentions typically requires understanding the papers's tet in addition to the table.", + "Our dataset, S2abEL, focuses on EL in machine learning results tables and includes hand-labeled cell types, attributed sources, and entity links from the PaperswithCode taxonomy for 8,429 cells from 732 tables.", + "We introduce a neural baseline method designed for EL on scientific tables containing many out-of-knowledge-base mentions, and show that it significantly outperforms a state-of-the-art generic table EL method.", + "The best baselines fall below human performance, and our analysis highlights avenues for improvement." + ] + }, + { + "title": "Learning to Generate Novel Scientific Directions with Contextualized Literature-based Discovery", + "abstract": [ + "Literature-Based Discovery (LBD) aims to discover new scientific knowledge by mining papers and generating hypotheses.", + "Standard LBD is limited to predicting pairwise relations between discrete concepts (e.g., drug-disease links), and ignores critical contexts like experimental settings (e.g., a specific patient population where a drug is evaluated) and background motivations (e.g., to find drugs without specific side effects).", + "We address these limitations with a novel formulation of contextualized-LBD (C-LBD): generating scientific hypotheses in natural language, while grounding them in a context that controls the hypothesis search space.", + "We present a modeling framework using retrieval of ``inspirations'' from past scientific papers.", + "Our evaluations reveal that GPT-4 tends to generate ideas with overall low technical depth and novelty, while our inspiration prompting approaches partially mitigate this issue.", + "Our work represents a first step toward building language models that generate new ideas derived from scientific literature." + ] + }, + { + "title": "Are Layout-Infused Language Models Robust to Layout Distribution Shifts? A Case Study with Scientific Documents", + "abstract": [ + "Recent work has shown that infusing layout features into language models (LMs) improves processing of visually-rich documents such as scientific papers.", + "Layout-infused LMs are often evaluated on documents with familiar layout features (e.g., papers from the same publisher), but in practice models encounter documents with unfamiliar distributions of layout features, such as new combinations of text sizes and styles, or new spatial configurations of textual elements.", + "In this work we test whether layout-infused LMs are robust to layout distribution shifts.", + "As a case study we use the task of scientific document structure recovery, segmenting a scientific paper into its structural categories (e.g.,\"title\",\"caption\",\"reference\").", + "To emulate distribution shifts that occur in practice we re-partition the GROTOAP2 dataset.", + "We find that under layout distribution shifts model performance degrades by up to 20 F1.", + "Simple training strategies, such as increasing training diversity, can reduce this degradation by over 35% relative F1; however, models fail to reach in-distribution performance in any tested out-of-distribution conditions.", + "This work highlights the need to consider layout distribution shifts during model evaluation, and presents a methodology for conducting such evaluations." + ] + }, { "title": "CiteSee: Augmenting Citations in Scientific Papers with Persistent and Personalized Historical Context", "abstract": [ "When reading a scholarly article, inline citations help researchers contextualize the current article and discover relevant prior work.", "However, it can be challenging to prioritize and make sense of the hundreds of citations encountered during literature reviews.", - "This paper introduces CiteSee, a paper reading tool that leverages a user's publishing, reading, and saving activities to provide personalized visual augmentations and context around citations.", + "This paper introduces CiteSee, a paper reading tool that leverages a user\u2019s publishing, reading, and saving activities to provide personalized visual augmentations and context around citations.", "First, CiteSee connects the current paper to familiar contexts by surfacing known citations a user had cited or opened.", "Second, CiteSee helps users prioritize their exploration by highlighting relevant but unknown citations based on saving and reading history.", "We conducted a lab study that suggests CiteSee is significantly more effective for paper discovery than three baselines.", @@ -32,7 +68,7 @@ "abstract": [ "Scholars who want to research a scientific topic must take time to read, extract meaning, and identify connections across many papers.", "As scientific literature grows, this becomes increasingly challenging.", - "Meanwhile, authors summarize prior research in papers' related work sections, though this is scoped to support a single paper.", + "Meanwhile, authors summarize prior research in papers\u2019 related work sections, though this is scoped to support a single paper.", "A formative study found that while reading multiple related work paragraphs helps overview a topic, it is hard to navigate overlapping and diverging references and research foci.", "In this work, we design a system, Relatedly, that scaffolds exploring and reading multiple related work paragraphs on a topic, with features including dynamic re-ranking and highlighting to spotlight unexplored dissimilar information, auto-generated descriptive paragraph headings, and low-lighting of redundant information.", "From a within-subjects user study (n=15), we found that scholars generate more coherent, insightful, and comprehensive topic outlines using Relatedly compared to a baseline paper list." @@ -49,6 +85,14 @@ "We will update this living document to reflect changes as we add new data offerings and improve existing services." ] }, + { + "title": "Perspective: Large Language Models in Applied Mechanics", + "abstract": [ + "\n Large language models (LLMs), such as ChatGPT and PaLM, are able to perform sophisticated text comprehension and generation tasks with little or no training.", + "Alongside their broader societal impacts, these capabilities carry great promise for the physical sciences, including applied mechanics.", + "We present a summary of recent developments in these models, their application to mechanics and adjacent fields, and a perspective on their future use in applied mechanics, taking into account their limitations and the unique challenges of the field." + ] + }, { "title": "Beyond Summarization: Designing AI Support for Real-World Expository Writing Tasks", "abstract": [ @@ -61,6 +105,31 @@ "We sketch three components for AI support design and discuss considerations for future research." ] }, + { + "title": "ARIES: A Corpus of Scientific Paper Edits Made in Response to Peer Reviews", + "abstract": [ + "Revising scientific papers based on peer feedback is a challenging task that requires not only deep scientific knowledge and reasoning, but also the ability to recognize the implicit requests in high-level feedback and to choose the best of many possible ways to update the manuscript in response.", + "We introduce this task for large language models and release ARIES, a dataset of review comments and their corresponding paper edits, to enable training and evaluating models.", + "We study two versions of the task: comment-edit alignment and edit generation, and evaluate several baselines, including GPT-4.", + "We find that models struggle even to identify the edits that correspond to a comment, especially in cases where the comment is phrased in an indirect way or where the edit addresses the spirit of a comment but not the precise request.", + "When tasked with generating edits, GPT-4 often succeeds in addressing comments on a surface level, but it rigidly follows the wording of the feedback rather than the underlying intent, and includes fewer technical details than human-written edits.", + "We hope that our formalization, dataset, and analysis will form a foundation for future work in this area." + ] + }, + { + "title": "SciRepEval: A Multi-Format Benchmark for Scientific Document Representations", + "abstract": [ + "Learned representations of scientific documents can serve as valuable input features for downstream tasks without further fine-tuning.", + "However, existing benchmarks for evaluating these representations fail to capture the diversity of relevant tasks.", + "In response, we introduce SciRepEval, the first comprehensive benchmark for training and evaluating scientific document representations.", + "It includes 24 challenging and realistic tasks, 8 of which are new, across four formats: classification, regression, ranking and search.", + "We then use this benchmark to study and improve the generalization ability of scientific document representation models.", + "We show how state-of-the-art models like SPECTER and SciNCL struggle to generalize across the task formats, and that simple multi-task training fails to improve them.", + "However, a new approach that learns multiple embeddings per document, each tailored to a different format, can improve performance.", + "We experiment with task-format-specific control codes and adapters and find they outperform the existing single-embedding state-of-the-art by over 2 points absolute.", + "We release the resulting family of multi-format models, called SPECTER2, for the community to use and build on." + ] + }, { "title": "FeedLens: Polymorphic Lenses for Personalizing Exploratory Search over Knowledge Graphs", "abstract": [ @@ -76,17 +145,6 @@ "Our qualitative results also highlight people\u2019s preference for this more effective exploratory search experience enabled by FeedLens." ] }, - { - "title": "Don\u2019t Say What You Don\u2019t Know: Improving the Consistency of Abstractive Summarization by Constraining Beam Search", - "abstract": [ - "Abstractive summarization systems today produce fluent and relevant output, but often \u201challucinate\u201d statements not supported by the source text.", - "We analyze the connection between hallucinations and training data, and find evidence that models hallucinate because they train on target summaries that are unsupported by the source.", - "Based on our findings, we present PINOCCHIO, a new decoding method that improves the consistency of a transformer-based abstractive summarizer by constraining beam search to avoid hallucinations.", - "Given the model states and outputs at a given step, PINOCCHIO detects likely model hallucinations based on various measures of attribution to the source text.", - "PINOCCHIO backtracks to find more consistent output, and can opt to produce no summary at all when no consistent generation can be found.", - "In experiments, we find that PINOCCHIO improves the consistency of generation by an average of 67% on two abstractive summarization datasets, without hurting recall." - ] - }, { "title": "ACCoRD: A Multi-Document Approach to Generating Diverse Descriptions of Scientific Concepts", "abstract": [ @@ -99,17 +157,9 @@ ] }, { - "title": "I2D2: Inductive Knowledge Distillation with NeuroLogic and Self-Imitation", + "title": "A Computational Inflection for Scientific Discovery", "abstract": [ - "Pre-trained language models, despite their rapid advancements powered by scale, still fall short of robust commonsense capabilities.", - "And yet, scale appears to be the winning recipe; after all, the largest models seem to have acquired the largest amount of commonsense capabilities.", - "Or is it?", - "In this paper, we investigate the possibility of a seemingly impossible match: can smaller language models with dismal commonsense capabilities (i.e., GPT-2), ever win over models that are orders of magnitude larger and better (i.e., GPT-3), if the smaller models are powered with novel commonsense distillation algorithms?", - "The key intellectual question we ask here is whether it is possible, if at all, to design a learning algorithm that does not benefit from scale, yet leads to a competitive level of commonsense acquisition.", - "In this work, we study the generative models of commonsense knowledge, focusing on the task of generating generics, statements of commonsense facts about everyday concepts, e.g., birds can fly.", - "We introduce a novel commonsense distillation framework, I2D2, that loosely follows the Symbolic Knowledge Distillation of West et al. but breaks the dependence on the extreme-scale models as the teacher model by two innovations: (1) the novel adaptation of NeuroLogic Decoding to enhance the generation quality of the weak, off-the-shelf language models, and (2) self-imitation learning to iteratively learn from the model's own enhanced commonsense acquisition capabilities.", - "Empirical results suggest that scale is not the only way, as novel algorithms can be a promising alternative.", - "Moreover, our study leads to a new corpus of generics, Gen-A-Tomic, that is of the largest and highest quality available to date." + "Enabling researchers to leverage systems to overcome the limits of human cognitive capacity." ] }, { @@ -137,6 +187,29 @@ "Compared with chain of thought prompting, CFT performs at least as well using LMs only 7.4% of the size, and is moreover applicable to task domains for which data are not available during pretraining." ] }, + { + "title": "I2D2: Inductive Knowledge Distillation with NeuroLogic and Self-Imitation", + "abstract": [ + "Commonsense capabilities of pre-trained language models dramatically improve with scale, leading many to believe that scale is the only winning recipe.", + "But is it?", + "Here, we investigate an alternative that a priori seems impossible: can smaller language models (e.g., GPT-2) win over models that are orders of magnitude larger and better (e.g., GPT-3), if powered with novel commonsense distillation algorithms?The key intellectual challenge is to design a learning algorithm that achieve a competitive level of commonsense acquisition, without relying on the benefits of scale.", + "In particular, we study generative models of commonsense knowledge, focusing on the task of generating generics, statements of commonsense facts about everyday concepts, e.g., birds can fly.", + "We introduce I2D2, a novel commonsense distillation framework that loosely follows the Symbolic Knowledge Distillation of West et al. but breaks the dependence on the extreme-scale teacher model with two innovations: (1) the novel adaptation of NeuroLogic Decoding to enhance the generation quality of the weak, off-the-shelf language models, and (2) self-imitation learning to iteratively learn from the model\u2019s own enhanced commonsense acquisition capabilities.", + "Empirical results suggest that scale is not the only way, as novel algorithms can be a promising alternative.", + "Moreover, our study leads to a new corpus of generics, Gen-A-tomic, that is the largest and highest quality available to date." + ] + }, + { + "title": "Don\u2019t Say What You Don\u2019t Know: Improving the Consistency of Abstractive Summarization by Constraining Beam Search", + "abstract": [ + "Abstractive summarization systems today produce fluent and relevant output, but often \u201challucinate\u201d statements not supported by the source text.", + "We analyze the connection between hallucinations and training data, and find evidence that models hallucinate because they train on target summaries that are unsupported by the source.", + "Based on our findings, we present PINOCCHIO, a new decoding method that improves the consistency of a transformer-based abstractive summarizer by constraining beam search to avoid hallucinations.", + "Given the model states and outputs at a given step, PINOCCHIO detects likely model hallucinations based on various measures of attribution to the source text.", + "PINOCCHIO backtracks to find more consistent output, and can opt to produce no summary at all when no consistent generation can be found.", + "In experiments, we find that PINOCCHIO improves the consistency of generation by an average of 67% on two abstractive summarization datasets, without hurting recall." + ] + }, { "title": "Building a Shared Conceptual Model of Complex, Heterogeneous Data Systems: A Demonstration", "abstract": [ @@ -161,19 +234,6 @@ "We release Multi-LexSum for further research in summarization methods as well as to facilitate development of applications to assist in the CRLC's mission at https://multilexsum.github.io." ] }, - { - "title": "SciRepEval: A Multi-Format Benchmark for Scientific Document Representations", - "abstract": [ - "Learned representations of scientific documents can serve as valuable input features for downstream tasks, without the need for further fine-tuning.", - "However, existing benchmarks for evaluating these representations fail to capture the diversity of relevant tasks.", - "In response, we introduce SciRepEval, the first comprehensive benchmark for training and evaluating scientific document representations.", - "It includes 25 challenging and realistic tasks, 11 of which are new, across four formats: classification, regression, ranking and search.", - "We then use the benchmark to study and improve the generalization ability of scientific document representation models.", - "We show how state-of-the-art models struggle to generalize across task formats, and that simple multi-task training fails to improve them.", - "However, a new approach that learns multiple embeddings per document, each tailored to a different format, can improve performance.", - "We experiment with task-format-specific control codes and adapters in a multi-task setting and find that they outperform the existing single-embedding state-of-the-art by up to 1.5 points absolute." - ] - }, { "title": "From Who You Know to What You Read: Augmenting Scientific Recommendations with Implicit Social Networks", "abstract": [ @@ -199,21 +259,6 @@ "Code and pretrained models are available at https://github.com/tsafavi/cascader." ] }, - { - "title": "A Computational Inflection for Scientific Discovery", - "abstract": [ - "We stand at the foot of a significant inflection in the trajectory of scientific discovery.", - "As society continues on its fast-paced digital transformation, so does humankind's collective scientific knowledge and discourse.", - "We now read and write papers in digitized form, and a great deal of the formal and informal processes of science are captured digitally -- including papers, preprints and books, code and datasets, conference presentations, and interactions in social networks and communication platforms.", - "The transition has led to the growth of a tremendous amount of information, opening exciting opportunities for computational models and systems that analyze and harness it.", - "In parallel, exponential growth in data processing power has fueled remarkable advances in AI, including self-supervised neural models capable of learning powerful representations from large-scale unstructured text without costly human supervision.", - "The confluence of societal and computational trends suggests that computer science is poised to ignite a revolution in the scientific process itself.", - "However, the explosion of scientific data, results and publications stands in stark contrast to the constancy of human cognitive capacity.", - "While scientific knowledge is expanding with rapidity, our minds have remained static, with severe limitations on the capacity for finding, assimilating and manipulating information.", - "We propose a research agenda of task-guided knowledge retrieval, in which systems counter humans' bounded capacity by ingesting corpora of scientific knowledge and retrieving inspirations, explanations, solutions and evidence synthesized to directly augment human performance on salient tasks in scientific endeavors.", - "We present initial progress on methods and prototypes, and lay out important opportunities and challenges ahead with computational approaches that have the potential to revolutionize science." - ] - }, { "title": "Infrastructure for Rapid Open Knowledge Network Development", "abstract": [ @@ -226,11 +271,11 @@ ] }, { - "title": "Penguins Don't Fly: Reasoning about Generics through Instantiations and Exceptions", + "title": "Penguins Don\u2019t Fly: Reasoning about Generics through Instantiations and Exceptions", "abstract": [ "Generics express generalizations about the world (e.g., birds can fly) that are not universally true (e.g., newborn birds and penguins cannot fly).", "Commonsense knowledge bases, used extensively in NLP, encode some generic knowledge but rarely enumerate such exceptions and knowing when a generic statement holds or does not hold true is crucial for developing a comprehensive understanding of generics.", - "We present a novel framework informed by linguistic theory to generate exemplars -- specific cases when a generic holds true or false.", + "We present a novel framework informed by linguistic theory to generate exemplars\u2014specific cases when a generic holds true or false.", "We generate ~19k exemplars for ~650 generics and show that our framework outperforms a strong GPT-3 baseline by 12.8 precision points.", "Our analysis highlights the importance of linguistic theory-based controllability for generating exemplars, the insufficiency of knowledge bases as a source of exemplars, and the challenges exemplars pose for the task of natural language inference." ] @@ -244,31 +289,1010 @@ "While multiple ER techniques have been proposed, their practical effectiveness is still unknown because existing evaluations consider very few models and do not adequately account for overhead costs.", "We perform an extensive evaluation of ER across eight different models (17 to 900 million parameters) and fourteen tasks in English.", "We show how a simple ER technique that caches activations from an intermediate layer of a pretrained model, and learns task-specific adapters on the later layers, is broadly effective.", - "For the best-performing baseline in our experiments (DeBERTa-v2 XL), adding a precomputed cache results in a>90% speedup during training and 87-91% speedup for inference, with negligible impact on accuracy.", + "For the best-performing baseline in our experiments (DeBERTa-v2 XL), adding a precomputed cache results in a 90% speedup during training and 87-91% speedup for inference, with negligible impact on accuracy.", "Our analysis reveals important areas of future work." ] + }, + { + "title": "SciCo: Hierarchical Cross-Document Coreference for Scientific Concepts", + "abstract": [ + "Determining coreference of concept mentions across multiple documents is a fundamental task in natural language understanding.", + "Previous work on cross-document coreference resolution (CDCR) typically considers mentions of events in the news, which seldom involve abstract technical concepts that are prevalent in science and technology.", + "These complex concepts take diverse or ambiguous forms and have many hierarchical levels of granularity (e.g., tasks and subtasks), posing challenges for CDCR.", + "We present a new task of Hierarchical CDCR (H-CDCR) with the goal of jointly inferring coreference clusters and hierarchy between them.", + "We create SciCo, an expert-annotated dataset for H-CDCR in scientific papers, 3X larger than the prominent ECB+ resource.", + "We study strong baseline models that we customize for H-CDCR, and highlight challenges for future work." + ] + }, + { + "title": "Simplified Data Wrangling with ir_datasets", + "abstract": [ + "Managing the data for Information Retrieval (IR) experiments can be challenging.", + "Dataset documentation is scattered across the Internet and once one obtains a copy of the data, there are numerous different data formats to work with.", + "Even basic formats can have subtle dataset-specific nuances that need to be considered for proper use.", + "To help mitigate these challenges, we introduce a new robust and lightweight tool (ir_datasets) for acquiring, managing, and performing typical operations over datasets used in IR.", + "We primarily focus on textual datasets used for ad-hoc search.", + "This tool provides both a Python and command line interface to numerous IR datasets and benchmarks.", + "To our knowledge, this is the most extensive tool of its kind.", + "Integrations with popular IR indexing and experimentation toolkits demonstrate the tool's utility.", + "We also provide documentation of these datasets through the \\sys catalog: https://ir-datasets.com/. The catalog acts as a hub for information on datasets used in IR, providing core information about what data each benchmark provides as well as links to more detailed information.", + "We welcome community contributions and intend to continue to maintain and grow this tool." + ] + }, + { + "title": "S2AND: A Benchmark and Evaluation System for Author Name Disambiguation", + "abstract": [ + "Author Name Disambiguation (AND) is the task of resolving which author mentions in a bibliographic database refer to the same real-world person, and is a critical ingredient of digital library applications such as search and citation analysis.", + "While many AND algorithms have been proposed, comparing them is difficult because they often employ distinct features and are evaluated on different datasets.", + "In response to this challenge, we present S2AND, a unified benchmark dataset for AND on scholarly papers, as well as an open-source reference model implementation.", + "Our dataset harmonizes eight disparate AND datasets into a uniform format, with a single rich feature set drawn from the Semantic Scholar (S2) database.", + "Our evaluation suite for S2AND reports performance split by facets like publication year and number of papers, allowing researchers to track both global performance and measures of fairness across facet values.", + "Our experiments show that because previous datasets tend to cover idiosyncratic and biased slices of the literature, algorithms trained to perform well on one on them may generalize poorly to others.", + "By contrast, we show how training on a union of datasets in S2AND results in more robust models that perform well even on datasets unseen in training.", + "The resulting AND model also substantially improves over the production algorithm in S2, reducing error by over 50% in terms of B3 F1.", + "We release our unified dataset, model code, trained models, and evaluation suite to the research community.11https://github.com/allenai/S2AND/" + ] + }, + { + "title": "Incorporating Visual Layout Structures for Scientific Text Classification", + "abstract": [ + "Classifying the core textual components of a scientific paper\u2014title, author, body text, etc.\u2014 is a critical first step in automated scientific document understanding.", + "Previous work has shown how using elementary layout information, i.e., each token\u2019s 2D position on the page, leads to more accurate classification.", + "We introduce new methods for incorporating VIsual LAyout (VILA) structures, e.g., the grouping of page texts into text lines or text blocks, into language models to further improve performance.", + "We show that the I-VILA approach, which simply adds special tokens denoting the boundaries of layout structures into model inputs, can lead to 1.9% Macro F1 improvements for token classification.", + "Moreover, we design a hierarchical model, H-VILA, that encodes the text based on layout structures and record an up-to 47% inference time reduction with less than 1.5% Macro F1 loss for the text classification models.", + "Experiments are conducted on a newly curated evaluation suite, S2-VLUE, with a novel metric measuring classification uniformity within visual groups and a new dataset of gold annotations covering papers from 19 scientific disciplines.", + "Pre-trained weights, benchmark datasets, and source code will be available at https://github.com/allenai/VILA." + ] + }, + { + "title": "Who\u2019s on First?: Probing the Learning and Representation Capabilities of Language Models on Deterministic Closed Domains", + "abstract": [ + "The capabilities of today\u2019s natural language processing systems are typically evaluated using large datasets of curated questions and answers.", + "While these are critical benchmarks of progress, they also suffer from weakness due to artificial distributions and incomplete knowledge.", + "Artifacts arising from artificial distributions can overstate language model performance, while incomplete knowledge limits fine-grained analysis.", + "In this work, we introduce a complementary benchmarking approach based on SimPlified Language Activity Traces (SPLAT).", + "SPLATs are corpora of language encodings of activity in some closed domain (we study traces from chess and baseball games in this work).", + "SPLAT datasets use naturally-arising distributions, allow the generation of question-answer pairs at scale, and afford complete knowledge in their closed domains.", + "We show that language models of three different architectures can answer questions about world states using only verb-like encodings of activity.", + "Our approach is extensible to new language models and additional question-answering tasks." + ] + }, + { + "title": "Few-Shot Self-Rationalization with Natural Language Prompts", + "abstract": [ + "Self-rationalization models that predict task labels and generate free-text elaborations for their predictions could enable more intuitive interaction with NLP systems.", + "These models are, however, currently trained with a large amount of human-written free-text explanations for each task which hinders their broader usage.", + "We propose to study a more realistic setting of self-rationalization using few training examples.", + "We present FEB -- a standardized collection of four existing English-language datasets and associated metrics.", + "We identify the right prompting approach by extensively exploring natural language prompts on FEB.", + "Then, by using this prompt and scaling the model size, we demonstrate that making progress on few-shot self-rationalization is possible.", + "We show there is still ample room for improvement in this task: the average plausibility of generated explanations assessed by human annotators is at most 51% (with GPT-3), while plausibility of human explanations is 76%.", + "We hope that FEB and our proposed approach will spur the community to take on the few-shot self-rationalization challenge." + ] + }, + { + "title": "Towards Personalized Descriptions of Scientific Concepts", + "abstract": [ + "A single scientific concept can be described in many different ways, and the most informative description depends on the audience.", + "In this paper, we propose generating personalized scientific concept descriptions that are tailored to the user\u2019s expertise and context.", + "We outline a complete architecture for the task and release an expert-annotated resource, ACCoRD, which includes 2,360 labeled extractions and 1,309 hand-authored concept descriptions for the key first step of extracting and generating multiple distinct descriptions of a concept in terms of different reference concepts.", + "Our results show that existing models are not suitable for our task and that our extractive model substantially outperforms these baselines." + ] + }, + { + "title": "\u201cIt doesn\u2019t look good for a date\u201d: Transforming Critiques into Preferences for Conversational Recommendation Systems", + "abstract": [ + "Conversations aimed at determining good recommendations are iterative in nature.", + "People often express their preferences in terms of a critique of the current recommendation (e.g., \u201cIt doesn\u2019t look good for a date\u201d), requiring some degree of common sense for a preference to be inferred.", + "In this work, we present a method for transforming a user critique into a positive preference (e.g., \u201cI prefer more romantic\u201d) in order to retrieve reviews pertaining to potentially better recommendations (e.g., \u201cPerfect for a romantic dinner\u201d).", + "We leverage a large neural language model (LM) in a few-shot setting to perform critique-to-preference transformation, and we test two methods for retrieving recommendations: one that matches embeddings, and another that fine-tunes an LM for the task.", + "We instantiate this approach in the restaurant domain and evaluate it using a new dataset of restaurant critiques.", + "In an ablation study, we show that utilizing critique-to-preference transformation improves recommendations, and that there are at least three general cases that explain this improved performance." + ] + }, + { + "title": "CODE: Compiler-based Neuron-aware Ensemble training", + "abstract": [ + "Deep Neural Networks (DNNs) are redefining the state-of-the-art performance in a variety of tasks like speech recognition and image classification.", + "These impressive results are often enabled by ensembling many DNNs together.", + "Surprisingly, ensembling is often done by training several DNN instances from scratch and combining them.", + "This paper shows that there is significant redundancy in today\u2019s way of ensembling.", + "The novelty we propose is CODE, a compiler approach designed to automatically generate DNN ensembles while avoiding unnecessary retraining among its DNNs.", + "For this purpose, CODE introduces neuron-level analyses and transformations aimed at identifying and removing redundant computation from the networks that compose the ensemble.", + "Removing redundancy enables CODE to train large DNN ensembles in a fraction of the time and memory footprint needed by current techniques.", + "These savings can be leveraged by CODE to increase the output quality of its ensembles." + ] + }, + { + "title": "Exploring the Role of Local and Global Explanations in Recommender Systems", + "abstract": [ + "Explanations are well-known to improve recommender systems\u2019 transparency.", + "These explanations may be local, explaining individual recommendations, or global, explaining the recommender model overall.", + "Despite their widespread use, there has been little investigation into the relative benefits of the two explanation approaches.", + "We conducted a 30-participant exploratory study and a 30-participant controlled user study with a research-paper recommender to analyze how providing local, global, or both explanations influences user understanding of system behavior.", + "Our results provide evidence suggesting that both are more helpful than either alone for explaining how to improve recommendations, yet both appeared less helpful than global alone for efficiently identifying false positive and negative recommendations.", + "However, we note that the two explanation approaches may be better compared in a higher-stakes or more opaque domain." + ] + }, + { + "title": "LIMEADE: A General Framework for Explanation-Based Human Tuning of Opaque Machine Learners", + "abstract": [ + "Research in human-centered AI has shown the benefits of systems that can explain their predictions.", + "Methods that allow humans to tune a model in response to the explanations are similarly useful.", + "While both capabilities are well-developed for transparent learning models (e.g., linear models and GAMs), and recent techniques (e.g., LIME and SHAP) can generate explanations for opaque models, no method for tuning opaque models in response to explanations has been user-tested to date.", + "This paper introduces LIMEADE, a general framework for tuning an arbitrary machine learning model based on an explanation of the model\u2019s prediction.", + "We demonstrate the generality of our approach with two case studies.", + "First, we successfully utilize LIMEADE for the human tuning of opaque image classifiers.", + "Second, we apply our framework to a neural recommender system for scientific papers on a public website and report on a user study showing that our framework leads to significantly higher perceived user control, trust, and satisfaction.", + "Analyzing 300 user logs from our publicly-deployed website, we uncover a tradeoff between canonical greedy explanations and diverse explanations that better facilitate human tuning." + ] + }, + { + "title": "VILA: Improving Structured Content Extraction from Scientific PDFs Using Visual Layout Groups", + "abstract": [ + "Accurately extracting structured content from PDFs is a critical first step for NLP over scientific papers.", + "Recent work has improved extraction accuracy by incorporating elementary layout information, for example, each token\u2019s 2D position on the page, into language model pretraining.", + "We introduce new methods that explicitly model VIsual LAyout (VILA) groups, that is, text lines or text blocks, to further improve performance.", + "In our I-VILA approach, we show that simply inserting special tokens denoting layout group boundaries into model inputs can lead to a 1.9% Macro F1 improvement in token classification.", + "In the H-VILA approach, we show that hierarchical encoding of layout-groups can result in up to 47% inference time reduction with less than 0.8% Macro F1 loss.", + "Unlike prior layout-aware approaches, our methods do not require expensive additional pretraining, only fine-tuning, which we show can reduce training cost by up to 95%.", + "Experiments are conducted on a newly curated evaluation suite, S2-VLUE, that unifies existing automatically labeled datasets and includes a new dataset of manual annotations covering diverse papers from 19 scientific disciplines.", + "Pre-trained weights, benchmark datasets, and source code are available at https://github.com/allenai/VILA." + ] + }, + { + "title": "Stolen Probability: A Structural Weakness of Neural Language Models", + "abstract": [ + "Neural Network Language Models (NNLMs) generate probability distributions by applying a softmax function to a distance metric formed by taking the dot product of a prediction vector with all word vectors in a high-dimensional embedding space.", + "The dot-product distance metric forms part of the inductive bias of NNLMs.", + "Although NNLMs optimize well with this inductive bias, we show that this results in a sub-optimal ordering of the embedding space that structurally impoverishes some words at the expense of others when assigning probability.", + "We present numerical, theoretical and empirical analyses which show that words on the interior of the convex hull in the embedding space have their probability bounded by the probabilities of the words on the hull." + ] + }, + { + "title": "G-DAug: Generative Data Augmentation for Commonsense Reasoning", + "abstract": [ + "Recent advances in commonsense reasoning depend on large-scale human-annotated training sets to achieve peak performance.", + "However, manual curation of training sets is expensive and has been shown to introduce annotation artifacts that neural models can readily exploit and overfit to.", + "We propose a novel generative data augmentation technique, G-DAUG\u02c6C, that aims to achieve more accurate and robust learning in a low-resource setting.", + "Our approach generates synthetic examples using pretrained language models and selects the most informative and diverse set of examples for data augmentation.", + "On experiments with multiple commonsense reasoning benchmarks, G-DAUG\u02c6C consistently outperforms existing data augmentation methods based on back-translation, establishing a new state-of-the-art on WinoGrande, CODAH, and CommonsenseQA, as well as enhances out-of-distribution generalization, proving to be robust against adversaries or perturbations.", + "Our analysis demonstrates that G-DAUG\u02c6C produces a diverse set of fluent training examples, and that its selection and training approaches are important for performance." + ] + }, + { + "title": "Practical Methods for Semi-automated Peer Grading in a Classroom Setting", + "abstract": [ + "Peer grading, in which students grade each other's work, can provide an educational opportunity for students and reduce grading effort for instructors.", + "A variety of methods have been proposed for synthesizing peer-assigned grades into accurate submission grades.", + "However, when the assumptions behind these methods are not met, they may underperform a simple baseline of averaging the peer grades.", + "We introduce SABTXT, which improves over previous work through two mechanisms.", + "First, SABTXT uses a limited amount of historical instructor ground truth to model and correct for each peer's grading bias.", + "Secondly, SABTXT models the thoroughness of a peer review based on its textual content, and puts more weight on the more thorough peer reviews when computing submission grades.", + "In our experiments with over ten thousand peer reviews collected over four courses, we show that SABTXT outperforms existing approaches on our collected data, and achieves a mean squared error that is 6% lower than the strongest baseline on average." + ] + }, + { + "title": "ABNIRML: Analyzing the Behavior of Neural IR Models", + "abstract": [ + "Pretrained contextualized language models such as BERT and T5 have established a new state-of-the-art for ad-hoc search.", + "However, it is not yet well understood why these methods are so effective, what makes some variants more effective than others, and what pitfalls they may have.", + "We present a new comprehensive framework for Analyzing the Behavior of Neural IR ModeLs (ABNIRML), which includes new types of diagnostic probes that allow us to test several characteristics\u2014such as writing styles, factuality, sensitivity to paraphrasing and word order\u2014that are not addressed by previous techniques.", + "To demonstrate the value of the framework, we conduct an extensive empirical study that yields insights into the factors that contribute to the neural model\u2019s gains, and identify potential unintended biases the models exhibit.", + "Some of our results confirm conventional wisdom, for example, that recent neural ranking models rely less on exact term overlap with the query, and instead leverage richer linguistic information, evidenced by their higher sensitivity to word and sentence order.", + "Other results are more surprising, such as that some models (e.g., T5 and ColBERT) are biased towards factually correct (rather than simply relevant) texts.", + "Further, some characteristics vary even for the same base language model, and other characteristics can appear due to random variations during model training.1" + ] + }, + { + "title": "Explanation-Based Tuning of Opaque Machine Learners with Application to Paper Recommendation", + "abstract": [ + "Research in human-centered AI has shown the benefits of machine-learning systems that can explain their predictions.", + "Methods that allow users to tune a model in response to the explanations are similarly useful.", + "While both capabilities are well-developed for transparent learning models (e.g., linear models and GA2Ms), and recent techniques (e.g., LIME and SHAP) can generate explanations for opaque models, no method currently exists for tuning of opaque models in response to explanations.", + "This paper introduces LIMEADE, a general framework for tuning an arbitrary machine learning model based on an explanation of the model's prediction.", + "We apply our framework to Semantic Sanity, a neural recommender system for scientific papers, and report on a detailed user study, showing that our framework leads to significantly higher perceived user control, trust, and satisfaction." + ] + }, + { + "title": "High-Precision Extraction of Emerging Concepts from Scientific Literature", + "abstract": [ + "Identification of new concepts in scientific literature can help power faceted search, scientific trend analysis, knowledge-base construction, and more, but current methods are lacking.", + "Manual identification can't keep up with the torrent of new publications, while the precision of existing automatic techniques is too low for many applications.", + "We present an unsupervised concept extraction method for scientific literature that achieves much higher precision than previous work.", + "Our approach relies on a simple but novel intuition: each scientific concept is likely to be introduced or popularized by a single paper that is disproportionately cited by subsequent papers mentioning the concept.", + "From a corpus of computer science papers on arXiv, we find that our method achieves a Precision@1000 of 99%, compared to 86% for prior work, and a substantially better precision-yield trade-off across the top 15,000 extractions.", + "To stimulate research in this area, we release our code and data." + ] + }, + { + "title": "LIMEADE: From AI Explanations to Advice Taking", + "abstract": [ + "Research in human-centered AI has shown the benefits of systems that can explain their predictions.", + "Methods that allow AI to take advice from humans in response to explanations are similarly useful.", + "While both capabilities are well developed for transparent learning models (e.g., linear models and GA2Ms) and recent techniques (e.g., LIME and SHAP) can generate explanations for opaque models, little attention has been given to advice methods for opaque models.", + "This article introduces LIMEADE, the first general framework that translates both positive and negative advice (expressed using high-level vocabulary such as that employed by post hoc explanations) into an update to an arbitrary, underlying opaque model.", + "We demonstrate the generality of our approach with case studies on 70 real-world models across two broad domains: image classification and text recommendation.", + "We show that our method improves accuracy compared to a rigorous baseline on the image classification domains.", + "For the text modality, we apply our framework to a neural recommender system for scientific papers on a public website; our user study shows that our framework leads to significantly higher perceived user control, trust, and satisfaction." + ] + }, + { + "title": "SPECTER: Document-level Representation Learning using Citation-informed Transformers", + "abstract": [ + "Representation learning is a critical ingredient for natural language processing systems.", + "Recent Transformer language models like BERT learn powerful textual representations, but these models are targeted towards token- and sentence-level training objectives and do not leverage information on inter-document relatedness, which limits their document-level representation power.", + "For applications on scientific documents, such as classification and recommendation, accurate embeddings of documents are a necessity.", + "We propose SPECTER, a new method to generate document-level embedding of scientific papers based on pretraining a Transformer language model on a powerful signal of document-level relatedness: the citation graph.", + "Unlike existing pretrained language models, Specter can be easily applied to downstream applications without task-specific fine-tuning.", + "Additionally, to encourage further research on document-level models, we introduce SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation.", + "We show that Specter outperforms a variety of competitive baselines on the benchmark." + ] + }, + { + "title": "Don\u2019t Stop Pretraining: Adapt Language Models to Domains and Tasks", + "abstract": [ + "Language models pretrained on text from a wide variety of sources form the foundation of today\u2019s NLP.", + "In light of the success of these broad-coverage models, we investigate whether it is still helpful to tailor a pretrained model to the domain of a target task.", + "We present a study across four domains (biomedical and computer science publications, news, and reviews) and eight classification tasks, showing that a second phase of pretraining in-domain (domain-adaptive pretraining) leads to performance gains, under both high- and low-resource settings.", + "Moreover, adapting to the task\u2019s unlabeled data (task-adaptive pretraining) improves performance even after domain-adaptive pretraining.", + "Finally, we show that adapting to a task corpus augmented using simple data selection strategies is an effective alternative, especially when resources for domain-adaptive pretraining might be unavailable.", + "Overall, we consistently find that multi-phase adaptive pretraining offers large gains in task performance." + ] + }, + { + "title": "Supplementary Material for the manuscript: \u201cA new evaluation framework for topic modeling algorithms based on synthetic corpora\u201d", + "abstract": [ + "LDA requires hyperparameter values for the topicdocument distribution, a Ka-dimensional vector \u03b1\u20d7 = (\u03b1j)j=1,...,Ka (where Ka is the assumed number of topics), and the word-topic distribution, a V -dimensional vector \u03b2\u20d7 = (\u03b2w)w=1,...,V (where V is the size of the vocabulary).", + "We assume symmetric priors, i.e. \u03b1j = \u03b1 and \u03b2w = \u03b2, such that the hyperparameters are fully determined by the scalar parameters \u03b1 and \u03b2.", + "For LDAVB we use the default values of the gensim implementations.", + "For LDAGS we use the default values of the gensim-wrapper of the mallet implementation." + ] + }, + { + "title": "Just Add Functions: A Neural-Symbolic Language Model", + "abstract": [ + "Neural network language models (NNLMs) have achieved ever-improving accuracy due to more sophisticated architectures and increasing amounts of training data.", + "However, the inductive bias of these models (formed by the distributional hypothesis of language), while ideally suited to modeling most running text, results in key limitations for today's models.", + "In particular, the models often struggle to learn certain spatial, temporal, or quantitative relationships, which are commonplace in text and are second-nature for human readers.", + "Yet, in many cases, these relationships can be encoded with simple mathematical or logical expressions.", + "How can we augment today's neural models with such encodings?In this paper, we propose a general methodology to enhance the inductive bias of NNLMs by incorporating simple functions into a neural architecture to form a hierarchical neural-symbolic language model (NSLM).", + "These functions explicitly encode symbolic deterministic relationships to form probability distributions over words.", + "We explore the effectiveness of this approach on numbers and geographic locations, and show that NSLMs significantly reduce perplexity in small-corpus language modeling, and that the performance improvement persists for rare tokens even on much larger corpora.", + "The approach is simple and general, and we discuss how it can be applied to other word classes beyond numbers and geography." + ] + }, + { + "title": "A Semantic Cover Approach for Topic Modeling", + "abstract": [ + "We introduce a novel topic modeling approach based on constructing a semantic set cover for clusters of similar documents.", + "Specifically, our approach first clusters documents using their Tf-Idf representation, and then covers each cluster with a set of topic words based on semantic similarity, defined in terms of a word embedding.", + "Computing a topic cover amounts to solving a minimum set cover problem.", + "Our evaluation compares our topic modeling approach to Latent Dirichlet Allocation (LDA) on three metrics: 1) qualitative topic match, measured using evaluations by Amazon Mechanical Turk (MTurk) workers, 2) performance on classification tasks using each topic model as a sparse feature representation, and 3) topic coherence.", + "We find that qualitative judgments significantly favor our approach, the method outperforms LDA on topic coherence, and is comparable to LDA on document classification tasks." + ] + }, + { + "title": "Multi-sense Definition Modeling using Word Sense Decompositions", + "abstract": [ + "Word embeddings capture syntactic and semantic information about words.", + "Definition modeling aims to make the semantic content in each embedding explicit, by outputting a natural language definition based on the embedding.", + "However, existing definition models are limited in their ability to generate accurate definitions for different senses of the same word.", + "In this paper, we introduce a new method that enables definition modeling for multiple senses.", + "We show how a Gumble-Softmax approach outperforms baselines at matching sense-specific embeddings to definitions during training.", + "In experiments, our multi-sense definition model improves recall over a state-of-the-art single-sense definition model by a factor of three, without harming precision." + ] + }, + { + "title": "Using Large Corpus N-gram Statistics to Improve Recurrent Neural Language Models", + "abstract": [ + "Recurrent neural network language models (RNNLM) form a valuable foundation for many NLP systems, but training the models can be computationally expensive, and may take days to train on a large corpus.", + "We explore a technique that uses large corpus n-gram statistics as a regularizer for training a neural network LM on a smaller corpus.", + "In experiments with the Billion-Word and Wikitext corpora, we show that the technique is effective, and more time-efficient than simply training on a larger sequential corpus.", + "We also introduce new strategies for selecting the most informative n-grams, and show that these boost efficiency." + ] + }, + { + "title": "Machine learning mortality classification in clinical documentation with increased accuracy in visual\u2010based analyses", + "abstract": [ + "The role of machine learning on clinical documentation for predictive outcomes remains undefined.", + "We aimed to compare three neural networks on inpatient providers\u2019 notes to predict mortality in neonatal hypoxic\u2010ischaemic encephalopathy (HIE)." + ] + }, + { + "title": "Abductive Commonsense Reasoning", + "abstract": [ + "Abductive reasoning is inference to the most plausible explanation.", + "For example, if Jenny finds her house in a mess when she returns from work, and remembers that she left a window open, she can hypothesize that a thief broke into her house and caused the mess, as the most plausible explanation.", + "While abduction has long been considered to be at the core of how people interpret and read between the lines in natural language (Hobbs et al., 1988), there has been relatively little research in support of abductive natural language inference and generation.", + "We present the first study that investigates the viability of language-based abductive reasoning.", + "We introduce a challenge dataset, ART, that consists of over 20k commonsense narrative contexts and 200k explanations.", + "Based on this dataset, we conceptualize two new tasks -- (i) Abductive NLI: a multiple-choice question answering task for choosing the more likely explanation, and (ii) Abductive NLG: a conditional generation task for explaining given observations in natural language.", + "On Abductive NLI, the best model achieves 68.9% accuracy, well below human performance of 91.4%.", + "On Abductive NLG, the current best language generators struggle even more, as they lack reasoning capabilities that are trivial for humans.", + "Our analysis leads to new insights into the types of reasoning that deep pre-trained language models fail to perform--despite their strong performance on the related but more narrowly defined task of entailment NLI--pointing to interesting avenues for future research." + ] + }, + { + "title": "A new evaluation framework for topic modeling algorithms based on synthetic corpora", + "abstract": [ + "Topic models are in widespread use in natural language processing and beyond.", + "Here, we propose a new framework for the evaluation of topic modeling algorithms based on synthetic corpora containing an unambiguously defined ground truth topic structure.", + "The major innovation of our approach is the ability to quantify the agreement between the planted and inferred topic structures by comparing the assigned topic labels at the level of the tokens.", + "In experiments, our approach yields novel insights about the relative strengths of topic models as corpus characteristics vary, and the first evidence of an ``undetectable phase'' for topic models when the planted structure is weak.", + "We also establish the practical relevance of the insights gained for synthetic corpora by predicting the performance of topic modeling algorithms in classification tasks in real-world corpora." + ] + }, + { + "title": "Estimating Marginal Probabilities of n-grams for Recurrent Neural Language Models", + "abstract": [ + "Recurrent neural network language models (RNNLMs) are the current standard-bearer for statistical language modeling.", + "However, RNNLMs only estimate probabilities for complete sequences of text, whereas some applications require context-independent phrase probabilities instead.", + "In this paper, we study how to compute an RNNLM\u2019s em marginal probability: the probability that the model assigns to a short sequence of text when the preceding context is not known.", + "We introduce a simple method of altering the RNNLM training to make the model more accurate at marginal estimation.", + "Our experiments demonstrate that the technique is effective compared to baselines including the traditional RNNLM probability and an importance sampling approach.", + "Finally, we show how we can use the marginal estimation to improve an RNNLM by training the marginals to match n-gram probabilities from a larger corpus." + ] + }, + { + "title": "Associations in Electronic Data Warehouse Elements of Neonatal Hypoxic-ischemic Encephalopathy and Length of Stay", + "abstract": [ + "Background: Neonatal hypoxic-ischemic encephalopathy (HIE) is often accompanied by physiologic derangements and organ dysfunction; however, the natural history of these perturbations and their relationship with length of stay (LOS) remains uncertain.", + "Objective: To describe daily markers of physiologic and organ dysfunction in infants with HIE and test their association with LOS in the neonatal intensive care unit.", + "Methods: Eligible subjects were infants with HIE born \u226536 weeks' gestation from 2010-16 and received therapeutic hypothermia.", + "Those with major congenital anomalies were excluded.", + "From our institution\u2019s electronic data warehouse (EDW), we \u2026" + ] + }, + { + "title": "Construction of the Literature Graph in Semantic Scholar", + "abstract": [ + "We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery.", + "The resulting literature graph consists of more than 280M nodes, representing papers, authors, entities and various interactions between them (e.g., authorships, citations, entity mentions).", + "We reduce literature graph construction into familiar NLP tasks (e.g., entity extraction and linking), point out research challenges due to differences from standard formulations of these tasks, and report empirical results for each task.", + "The methods described in this paper are used to enable semantic features in www.semanticscholar.org." + ] + }, + { + "title": "Controlling Global Statistics in Recurrent Neural Network Text Generation", + "abstract": [ + "\n \n Recurrent neural network language models (RNNLMs) are an essential component for many language generation tasks such as machine translation, summarization, and automated conversation.", + "Often, we would like to subject the text generated by the RNNLM to constraints, in order to overcome systemic errors (e.g. word repetition) or achieve application-specific goals (e.g. more positive sentiment).", + "In this paper, we present a method for training RNNLMs to simultaneously optimize likelihood and follow a given set of statistical constraints on text generation.", + "\u00a0 The problem is challenging because the statistical constraints are defined over aggregate model behavior, rather than model parameters, meaning that a straightforward parameter regularization approach is insufficient.", + "\u00a0 We solve this problem using a dynamic regularizer that updates as training proceeds, based on the generative behavior of the RNNLMs.", + "\u00a0 Our experiments show that the dynamic regularizer outperforms both generic training and a static regularization baseline.", + "\u00a0 The approach is successful at improving word-level repetition statistics by a factor of four in RNNLMs on a definition modeling task.", + "\u00a0 It also improves model perplexity when the statistical constraints are $n$-gram statistics taken from a large corpus.", + "\n \n" + ] + }, + { + "title": "Extracting Commonsense Properties from Embeddings with Limited Human Guidance", + "abstract": [ + "Intelligent systems require common sense, but automatically extracting this knowledge from text can be difficult.", + "We propose and assess methods for extracting one type of commonsense knowledge, object-property comparisons, from pre-trained embeddings.", + "In experiments, we show that our approach exceeds the accuracy of previous work but requires substantially less hand-annotated knowledge.", + "Further, we show that an active learning approach that synthesizes common-sense queries can boost accuracy." + ] + }, + { + "title": "OTyper: A Neural Architecture for Open Named Entity Typing", + "abstract": [ + "\n \n Named Entity Typing (NET) is valuable for many natural language processing tasks, such as relation extraction, question answering, knowledge base population, and co-reference resolution.", + "Classical NET targeted a few coarse-grained types, but the task has expanded to sets of hundreds of types in recent years.", + "Existing work in NET assumes that the target types are specified in advance, and that hand-labeled examples of each type are available.", + "In this work, we introduce the task of Open Named Entity Typing (ONET), which is NET when the set of target types is not known in advance.", + "We propose a neural network architecture for ONET, called OTyper, and evaluate its ability to tag entities with types not seen in training.", + "On the benchmark FIGER(GOLD) dataset, OTyper achieves a weighted AUC-ROC score of 0.870 on unseen types, substantially outperforming pattern- and embedding-based baselines.", + "\n \n" + ] + }, + { + "title": "Sampling Informative Training Data for RNN Language Models", + "abstract": [ + "We propose an unsupervised importance sampling approach to selecting training data for recurrent neural network (RNNs) language models.", + "To increase the information content of the training set, our approach preferentially samples high perplexity sentences, as determined by an easily queryable n-gram language model.", + "We experimentally evaluate the heldout perplexity of models trained with our various importance sampling distributions.", + "We show that language models trained on data sampled using our proposed approach outperform models trained over randomly sampled subsets of both the Billion Word (Chelba et al., 2014 Wikitext-103 benchmark corpora (Merity et al., 2016)." + ] + }, + { + "title": "2204: Evaluations of physiologic perturbations and their relationship with length of stay in neonatal hypoxic-ischemic encephalopathy", + "abstract": [ + "OBJECTIVES/SPECIFIC AIMS: Neonatal hypoxic-ischemic encephalopathy (HIE) is frequently accompanied with physiologic perturbations and organ dysfunction.", + "Markers of these perturbations and their associations with length of stay (LOS) are uncertain.", + "To estimate the association between changes in selected physiologic and/or laboratory values with LOS in newborns with HIE.", + "METHODS/STUDY POPULATION: Using the Children\u2019s Hospitals Neonatal Database (CHND), we identified neonates with HIE at our center born \u226536 weeks\u2019 gestation from 2010 to 2016.", + "Those with major congenital anomalies were omitted.", + "Infants uniformly received therapeutic hypothermia for 72 hours unless death occurred sooner.", + "Inpatient vital signs and selected laboratory markers were collected from our institution\u2019s health informatics, electronic data warehouse (EDW) and then matched to records in CHND.", + "With severity of HIE, gender, and confirmed seizures, each marker\u2019s association with LOS was calculated using multivariable Cox proportional hazards regression equations.", + "These analyses were stratified by mortality.", + "Candidate markers were vital signs, pulse oximetry, creatinine, acidosis (pH), international normalized ratio (INR), and supplemental oxygen (FiO2).", + "RESULTS/ANTICIPATED RESULTS: There were 66 eligible infants (38 males) and 1741 patient-days identified; Severe HIE (48%) and mortality (n=21, 32%) were common.", + "Overall, the median length of stay (mLOS) was 20.5 days (25th\u201375th centile: 10\u201331 days), although shorter for nonsurvivors [nonsurvivors mLOS=8 days (5, 20); survivors mLOS=24 days (14, 31), p<0.001).", + "Median birthweight and gestational age were 3.3 kg and 39.4 weeks\u2019 gestation, respectively.", + "In survivors (n=45, 1290 days), regression analyses demonstrated that none of the selected parameters were associated with LOS.", + "Among nonsurvivors (n=21, 451 days), diastolic blood pressure changes [hazard ratio (HR)=0.93, 95% confidence interval (CI)=0.88, 0.97, p=0.04] was related to longer time of survival; conversely, temperature (HR=2.0, 95% CI=1.24, 3.26, p=0.005) was related to shorter survival.", + "Creatinine, pH, INR, FiO2, or other vital signs were unrelated to time-to-death in nonsurvivors.", + "DISCUSSION/SIGNIFICANCE OF IMPACT: In a pilot study of neonatal HIE, changes in physiologic values were related to duration of survival in nonsurvivors, while neither physiologic nor laboratory values were related to survivors\u2019 mLOS.", + "These results both exemplify novel uses for disease-specific, exposure-outcome relationships using EDWs and incorporates required functionalities of required software patches to extract, clean, and report from clinical information captured in electronic health records.", + "We anticipate that text mining with techniques such as natural language processing will augment associations and/or predictions of short-term outcomes." + ] + }, + { + "title": "VecShare: A Framework for Sharing Word Representation Vectors", + "abstract": [ + "Many Natural Language Processing (NLP) models rely on distributed vector representations of words.", + "Because the process of training word vectors can require large amounts of data and computation, NLP researchers and practitioners often utilize pre-trained embeddings downloaded from the Web.", + "However, finding the best embeddings for a given task is difficult, and can be computationally prohibitive.", + "We present a framework, called VecShare, that makes it easy to share and retrieve word embeddings on the Web.", + "The framework leverages a public data-sharing infrastructure to host embedding sets, and provides automated mechanisms for retrieving the embeddings most similar to a given corpus.", + "We perform an experimental evaluation of VecShare\u2019s similarity strategies, and show that they are effective at efficiently retrieving embeddings that boost accuracy in a document classification task.", + "Finally, we provide an open-source Python library for using the VecShare framework." + ] + }, + { + "title": "PAG2ADMG: A Novel Methodology to Enumerate Causal Graph Structures", + "abstract": [ + "\n \n Causal graphs, such as directed acyclic graphs (DAGs) and partial ancestral graphs (PAGs), represent causal relationships among variables in a model.", + "Methods exist for learning DAGs and PAGs from data and for converting DAGs to PAGs.", + "However, these methods only output a single causal graph consistent with the independencies/dependencies (the Markov equivalence class M) estimated from the data.", + "However, many distinct graphs may be consistent with M, and a data modeler may wish to select among these using domain knowledge.", + "In this paper, we present a method that makes this possible.", + "We introduce PAG2ADMG, the first method for enumerating all causal graphs consistent with M, under certain assumptions.", + "PAG2ADMG converts a given PAG into a set of acyclic directed mixed graphs (ADMGs).", + "We prove the correctness of the approach and demonstrate its efficiency relative to brute-force enumeration.", + "\n \n" + ] + }, + { + "title": "Learning Hierarchically Decomposable Concepts with Active Over-Labeling", + "abstract": [ + "Many classification tasks target high-level concepts that can be decomposed into a hierarchy of finer-grained sub-concepts.", + "For example, some string entities that are Locations are also Attractions, some Attractions are Museums, etc.", + "Such hierarchies are common in named entity recognition (NER), document classification, and biological sequence analysis.", + "We present a new approach for learning hierarchically decomposable concepts.", + "The approach learns a high-level classifier (e.g., location vs. non-location) by seperately learning multiple finer-grained classifiers (e.g., museum vs. non-museum), and then combining the results.", + "Soliciting labels at a finer level of granularity than that of the target concept is a new approach to active learning, which we term active over-labeling.", + "In experiments in NER and document classification tasks, we show that active over-labeling substantially improves area under the precision-recall curve when compared with standard passive or active learning.", + "Finally, because finer-grained labels may be more expensive to obtain, we also present a cost-sensitive active learner that uses a multi-armed bandit approach to dynamically choose the label granularity to target, and show that the bandit-based learner is robust to differences in label cost and labeling budget." + ] + }, + { + "title": "Beating the Artificial Chaos: Fighting OSN Spam Using Its Own Templates", + "abstract": [ + "Online social networks (OSNs) are extremely popular among Internet users.", + "However, spam originating from friends and acquaintances not only reduces the joy of Internet surfing but also causes damage to less security-savvy users.", + "Prior countermeasures combat OSN spam from different angles.", + "Due to the diversity of spam, there is hardly any existing method that can independently detect the majority or most of OSN spam.", + "In this paper, we empirically analyze the textual pattern of a large collection of OSN spam.", + "An inspiring finding is that the majority (e.g., 76.4% in 2015) of the collected spam is generated with underlying templates.", + "Based on the analysis, we propose tangram, an OSN spam filtering system that performs online inspection on the stream of user-generated messages.", + "Tangram extracts the templates of spam detected by existing methods and then matching messages against the templates toward the accurate and the fast spam detection.", + "It automatically divides the OSN spam into segments and uses the segments to construct templates to filter future spam.", + "Experimental results on Twitter and Facebook data sets show that tangram is highly accurate and can rapidly generate templates to throttle newly emerged campaigns.", + "Furthermore, we analyze the behavior of detected OSN spammers.", + "We find a series of spammer properties-such as spamming accounts are created in bursts and a single active organization orchestrates more spam than all other spammers combined-that promise more comprehensive spam countermeasures." + ] + }, + { + "title": "Definition Modeling: Learning to Define Word Embeddings in Natural Language", + "abstract": [ + "\n \n Distributed representations of words have been shown to capture lexical semantics, based on their effectiveness in word similarity and analogical relation tasks.", + "But, these tasks only evaluate lexical semantics indirectly.", + "In this paper, we study whether it is possible to utilize distributed representations to generate dictionary definitions of words, as a more direct and transparent representation of the embeddings' semantics.", + "We introduce definition modeling, the task of generating a definition for a given word and its embedding.", + "We present different definition model architectures based on recurrent neural networks, and experiment with the models over multiple data sets.", + "Our results show that a model that controls dependencies between the word being defined and the definition words performs significantly better, and that a character-level convolution layer that leverages morphology can complement word-level embeddings.", + "Our analysis reveals which components of our models contribute to accuracy.", + "Finally, the errors made by a definition model may provide insight into the shortcomings of word embeddings.", + "\n \n" + ] + }, + { + "title": "The Cool Story Behind Snow", + "abstract": [ + "Escape from the cold and embrace the avalanche of information about snow in this fact-tastic nonfiction Level 3 Ready-to-Read, part of a series about the science of fun stuff!", + "Did you ever wonder how snow is formed?", + "Did you know that no two snowflakes are exactly alike?", + "Learn about the polar vortex, different kinds of snow, how meteorologists predict snowstorms, and much more as you become a Science of Fun Stuff Expert on snow!", + "Amaze your friends with all you ve learned in this engaging, fact-filled Level 3 Ready-to-Read!", + "A special section at the back of the book includes Common Core vetted extras on subjects like social studies and math, and there s even a fun quiz so readers can test themselves to see what they ve learned!", + "Learning science has never been so much fun!\"" + ] + }, + { + "title": "Efficient Methods for Inferring Large Sparse Topic Hierarchies", + "abstract": [ + "Latent variable topic models such as Latent Dirichlet Allocation (LDA) can discover topics from text in an unsupervised fashion.", + "However, scaling the models up to the many distinct topics exhibited in modern corpora is challenging. \u201c", + "Flat\u201d topic models like LDA have difficulty modeling sparsely expressed topics, and richer hierarchical models become computationally intractable as the number of topics increases.", + "In this paper, we introduce efficient methods for inferring large topic hierarchies.", + "Our approach is built upon the Sparse Backoff Tree (SBT), a new prior for latent topic distributions that organizes the latent topics as leaves in a tree.", + "We show how a document model based on SBTs can effectively infer accurate topic spaces of over a million topics.", + "We introduce a collapsed sampler for the model that exploits sparsity and the tree structure in order to make inference efficient.", + "In experiments with multiple data sets, we show that scaling to large topic spaces results in much more accurate models, and that SBT document models make use of large topic spaces more effectively than flat LDA." + ] + }, + { + "title": "Efficient Methods for Incorporating Knowledge into Topic Models", + "abstract": [ + "Latent Dirichlet allocation (LDA) is a popular topic modeling technique for exploring hidden topics in text corpora.", + "Increasingly, topic modeling needs to scale to larger topic spaces and use richer forms of prior knowledge, such as word correlations or document labels.", + "However, inference is cumbersome for LDA models with prior knowledge.", + "As a result, LDA models that use prior knowledge only work in small-scale scenarios.", + "In this work, we propose a factor graph framework, Sparse Constrained LDA (SC-LDA), for efficiently incorporating prior knowledge into LDA.", + "We evaluate SC-LDA\u2019s ability to incorporate word correlation knowledge and document label knowledge on three benchmark datasets.", + "Compared to several baseline methods, SC-LDA achieves comparable performance but is significantly faster." + ] + }, + { + "title": "Efficient Methods for Incorporating Knowledge into Topic Models", + "abstract": [ + "Latent Dirichlet allocation (LDA) is a popular topic modeling technique for exploring hidden topics in text corpora.", + "Increasingly, topic modeling needs to scale to larger topic spaces and use richer forms of prior knowledge, such as word correlations or document labels.", + "However, inference is cumbersome for LDA models with prior knowledge.", + "As a result, LDA models that use prior knowledge only work in small-scale scenarios.", + "In this work, we propose a factor graph framework, Sparse Constrained LDA (SC-LDA), for efficiently incorporating prior knowledge into LDA.", + "We evaluate SC-LDA\u2019s ability to incorporate word correlation knowledge and document label knowledge on three benchmark datasets.", + "Compared to several baseline methods, SC-LDA achieves comparable performance but is significantly faster.", + "1 Challenge: Leveraging Prior Knowledge in Large-scale Topic Models Topic models, such as Latent Dirichlet Allocation (Blei et al., 2003, LDA), have been successfully used for discovering hidden topics in text collections.", + "LDA is an unsupervised model\u2014it requires no annotation\u2014and discovers, without any supervision, the thematic trends in a text collection.", + "However, LDA\u2019s lack of supervision can lead to disappointing results.", + "Often, the hidden topics learned by LDA fail to make sense to end users.", + "Part of the problem is that the objective function of topic models does not always correlate with human judgments of topic quality (Chang et al., 2009).", + "Therefore, it\u2019s often necessary to incorporate prior knowledge into topic models to improve the model\u2019s performance.", + "Recent work has also shown that by interactive human feedback can improve the quality and stability of topics (Hu and Boyd-Graber, 2012; Yang et al., 2015).", + "Information about documents (Ramage et al., 2009) or words (Boyd-Graber et al., 2007) can improve LDA\u2019s topics.", + "In addition to its occasional inscrutability, scalability can also hamper LDA\u2019s adoption.", + "Conventional Gibbs sampling\u2014the most widely used inference for LDA\u2014scales linearly with the number of topics.", + "Moreover, accurate training usually takes many sampling passes over the dataset.", + "Therefore, for large datasets with millions or even billions of tokens, conventional Gibbs sampling takes too long to finish.", + "For standard LDA, recently introduced fast sampling methods (Yao et al., 2009; Li et al., 2014; Yuan et al., 2015) enable industrial applications of topic modeling to search engines and online advertising, where capturing the \u201clong tail\u201d of infrequently used topics requires large topic spaces.", + "For example, while typical LDA models in academic papers have up to 103 topics, industrial applications with 105\u2013106 topics are common (Wang et al., 2014).", + "Moreover, scaling topic models to many topics can also reveal the hierarchical structure of topics (Downey et al., 2015).", + "Thus, there is a need for topic models that can both benefit from rich prior information and that can scale to large datasets.", + "However, existing methods for improving scalability focus on topic models without prior information.", + "To rectify this, we propose a factor graph model that encodes a potential function over the hidden topic variables, encouraging topics consistent with prior knowledge.", + "The factor model representation admits an efficient sampling algorithm that takes advantage of the model\u2019s sparsity.", + "We show that our method achieves comparable performance but runs significantly faster than baseline methods, enabling models to discover models with many topics enriched by prior knowledge.", + "2 Efficient Algorithm for Incorporating Knowledge into LDA In this section, we introduce the factor model for incorporating prior knowledge and show how to efficiently use Gibbs sampling for inference.", + "2.1 Background: LDA and SparseLDA A statistical topic model represents words in documents in a collection D as mixtures of T topics, which are multinomials over a vocabulary of size V .", + "In LDA, each document d is associated with a multinomial distribution over topics, \u03b8d.", + "The probability of a word type w given topic z is \u03c6w|z .", + "The multinomial distributions \u03b8d and \u03c6z are drawn from Dirichlet distributions: \u03b1 and \u03b2 are the hyperparameters for \u03b8 and \u03c6.", + "We represent the document collection D as a sequence of words w, and topic assignments as z. We use symmetric priors \u03b1 and \u03b2 in the model and experiment, but asymmetric priors are easily encoded in the models (Wallach et al., 2009).", + "Discovering the latent topic assignments z from observed words w requires inferring the the posterior distribution P (z|w).", + "Griffiths and Steyvers (2004) propose using collapsed Gibbs sampling.", + "The probability of a topic assignment z = t in document d given an observed word type w and the other topic assignments z\u2212 is P (z = t|z\u2212, w) \u221d (nd,t + \u03b1) nw,t + \u03b2" + ] + }, + { + "title": "Adding High-Precision Links to Wikipedia", + "abstract": [ + "Wikipedia\u2019s link structure is a valuable resource for natural language processing tasks, but only a fraction of the concepts mentioned in each article are annotated with hyperlinks.", + "In this paper, we study how to augment Wikipedia with additional high-precision links.", + "We present 3W, a system that identifies concept mentions in Wikipedia text, and links each mention to its referent page.", + "3W leverages rich semantic information present in Wikipedia to achieve high precision.", + "Our experiments demonstrate that 3W can add an average of seven new links to each Wikipedia article, at a precision of 0.98." + ] + }, + { + "title": "Analyzing the content emphasis of web search engines", + "abstract": [ + "Millions of people search the Web each day.", + "As a consequence, the ranking algorithms employed by Web search engines have a profound influence on which pages users visit.", + "Characterizing this influence, and informing users when different engines favor certain sites or points of view, enables more transparent access to the Web's information.", + "We present PAWS, a platform for analyzing differences among Web search engines.", + "PAWS measures content emphasis: the degree to which differences across search engines' rankings correlate with features of the ranked content, including point of view (e.g., positive or negative orientation toward their company's products) and advertisements.", + "We propose an approach for identifying the orientations in search results at scale, through a novel technique that minimizes the expected number of human judgments required.", + "We apply PAWS to news search on Google and Bing, and find no evidence that the engines emphasize results that express positive orientation toward the engine company's products.", + "We do find that the engines emphasize particular news sites, and that they also favor pages containing their company's advertisements, as opposed to competitor advertisements." + ] + }, + { + "title": "Active Learning with Constrained Topic Model", + "abstract": [ + "Latent Dirichlet Allocation (LDA) is a topic modeling tool that automatically discovers topics from a large collection of documents.", + "It is one of the most popular text analysis tools currently in use.", + "In practice however, the topics discovered by LDA do not always make sense to end users.", + "In this extended abstract, we propose an active learning framework that interactively and iteratively acquires user feedback to improve the quality of learned topics.", + "We conduct experiments to demonstrate its effectiveness with simulated userinput on a benchmarkdataset." + ] + }, + { + "title": "TextJoiner: On-demand Information Extraction with Multi-Pattern Queries", + "abstract": [ + "Web Information Extraction (WIE) is the task of automatically extracting knowledge from Web content.", + "On-demand WIE systems such as KNOWITNOW [1] and TEXTRUNNER [2] allow users to query the Web for a textual context that indicates a desired relation.", + "For example, the context \u201c$x invented $y\u201d indicates the Invented(x, y) relation.", + "The WIE system is tasked with responding in real-time with a list of argument tuples (e.g. ) of the query relation, extracted from the Web." + ] + }, + { + "title": "Learning Representations for Weakly Supervised Natural Language Processing Tasks", + "abstract": [ + "Finding the right representations for words is critical for building accurate NLP systems when domain-specific labeled data for the task is scarce.", + "This article investigates novel techniques for extracting features from n-gram models, Hidden Markov Models, and other statistical language models, including a novel Partial Lattice Markov Random Field model.", + "Experiments on part-of-speech tagging and information extraction, among other tasks, indicate that features taken from statistical language models, in combination with more traditional features, outperform traditional representations alone, and that graphical model representations outperform n-gram models, especially on sparse and polysemous words." + ] + }, + { + "title": "WebSAIL wikifier at ERD 2014", + "abstract": [ + "In this paper, we report on our participation in Entity Recognition and Disambiguation Challenge 2014.", + "We present WebSAIL Wikifier, an entity recognition and disambiguation system that identifies and links textual mentions to their referent entities in Wikipedia and later maps to the ERD set of target entities.", + "WebSAIL Wikifier can handle both short text and long text queries.", + "The system uses hybrid rule-based NER to discover mentions and a supervised machine learning approach to disambiguate their entities.", + "The system achieves F1 of 0.641 and 0.687 on short track and long track respectively." + ] + }, + { + "title": "Spam ain't as diverse as it seems: throttling OSN spam with templates underneath", + "abstract": [ + "In online social networks (OSNs), spam originating from friends and acquaintances not only reduces the joy of Internet surfing but also causes damage to less security-savvy users.", + "Prior countermeasures combat OSN spam from different angles.", + "Due to the diversity of spam, there is hardly any existing method that can independently detect the majority or most of OSN spam.", + "In this paper, we empirically analyze the textual pattern of a large collection of OSN spam.", + "An inspiring finding is that the majority (63.0%) of the collected spam is generated with underlying templates.", + "We therefore propose extracting templates of spam detected by existing methods and then matching messages against the templates toward accurate and fast spam detection.", + "We implement this insight through Tangram, an OSN spam filtering system that performs online inspection on the stream of user-generated messages.", + "Tangram automatically divides OSN spam into segments and uses the segments to construct templates to filter future spam.", + "Experimental results show that Tangram is highly accurate and can rapidly generate templates to throttle newly emerged campaigns.", + "Specifically, Tangram detects the most prevalent template-based spam with 95.7% true positive rate, whereas the existing template generation approach detects only 32.3%.", + "The integration of Tangram and its auxiliary spam filter achieves an overall accuracy of 85.4% true positive rate and 0.33% false positive rate." + ] + }, + { + "title": "Using natural language to integrate, evaluate, and optimize extracted knowledge bases", + "abstract": [ + "Web Information Extraction (WIE) systems extract billions of unique facts, but integrating the assertions into a coherent knowledge base and evaluating across different WIE techniques remains a challenge.", + "We propose a framework that utilizes natural language to integrate and evaluate extracted knowledge bases (KBs).", + "In the framework, KBs are integrated by exchanging probability distributions over natural language, and evaluated by how well the output distributions predict held-out text.", + "We describe the advantages of the approach, and detail remaining research challenges." + ] + }, + { + "title": "Tablet-based video modeling and prompting in the workplace for individuals with autism", + "abstract": [ + "The current study involved a preliminary job-site testing of computer software, i.e., VideoTote, delivered via a computer tablet and designed to provide users with video modeling and prompting for use by young adults with an autism spectrum disorder (ASD) across a range of employment settings.", + "A multiple baseline design was used to assess changes in rates of completion with a complex, 104-step shipping task by four participants diagnosed with ASD.", + "Baseline data were collected on accuracy of task completion after exposure to typical job-training involving instruction, modeling, and practice.", + "The intervention involved video modeling and prompting with a 13 minute video depicting an individual completing job responsibilities that entailed checking to make sure materials were in working order, replacing defective items, packing materials in a container, entering information into a computer, and attaching a label to a container.", + "Results suggested that video modeling and prompting were effective in helping individuals with autism complete a multi-step shipping task.", + "Participants and their parents gave the device and software high ratings as an acceptable treatment for adults with autism to use in the workplace and intervention that complies with universal design principles.", + "Implications for competitive job opportunities for individuals with autism are discussed." + ] + }, + { + "title": "A probabilistic graphical model for brand reputation assessment in social networks", + "abstract": [ + "Social media has become a popular platform that connects people who share information, in particular personal opinions.", + "Through such a fast information exchange mechanism, reputation of individuals, consumer products, or business companies can be quickly built up within a social network.", + "Recently, applications mining social network data start emerging to find the communities sharing the same interests for marketing purposes.", + "Knowing the reputation of social network entities, such as celebrities or business companies, can help develop better strategies for election campaigns or new product advertisements.", + "In this paper, we propose a probabilistic graphical model to collectively measure reputations of entities in social networks.", + "By collecting and analyzing large amount of user activities on Facebook, our model can effectively and efficiently rank entities, such as presidential candidates, professional sport teams, musician bands, and companies, based on their social reputation.", + "The proposed model produces results largely consistent with the two publicly available systems - movie ranking in Internet Movie Database and business school ranking by the US news & World Report - with the correlation coefficients of 0.75 and -0.71, respectively." + ] + }, + { + "title": "WebSAIL Wikifier: English Entity Linking at TAC 2013", + "abstract": [ + "In this paper, we report on our participation in the English Entity Linking task at TAC 2013.", + "We present the WebSAIL Wikifier system, an entity disambiguation system that links textual mentions to their referent entities in Wikipedia.", + "The system uses a supervised machine learning approach and a string-matching clustering method, and scores 58.1% B 3 + F1 on the TAC 2013 test set." + ] + }, + { + "title": "Overcoming the Memory Bottleneck in Distributed Training of Latent Variable Models of Text", + "abstract": [ + "Large unsupervised latent variable models (LVMs) of text, such as Latent Dirichlet Allocation models or Hidden Markov Models (HMMs), are constructed using parallel training algorithms on computational clusters.", + "The memory required to hold LVM parameters forms a bottleneck in training more powerful models.", + "In this paper, we show how the memory required for parallel LVM training can be reduced by partitioning the training corpus to minimize the number of unique words on any computational node.", + "We present a greedy document partitioning technique for the task.", + "For large corpora, our approach reduces memory consumption by over 50%, and trains the same models up to three times faster, when compared with existing approaches for parallel LVM training." + ] + }, + { + "title": "Methods for exploring and mining tables on Wikipedia", + "abstract": [ + "Knowledge bases extracted automatically from the Web present new opportunities for data mining and exploration.", + "Given a large, heterogeneous set of extracted relations, new tools are needed for searching the knowledge and uncovering relationships of interest.", + "We present WikiTables, a Web application that enables users to interactively explore tabular knowledge extracted from Wikipedia.", + "In experiments, we show that WikiTables substantially outperforms baselines on the novel task of automatically joining together disparate tables to uncover \"interesting\" relationships between table columns.", + "We find that a \"Semantic Relatedness\" measure that leverages the Wikipedia link structure accounts for a majority of this improvement.", + "Further, on the task of keyword search for tables, we show that WikiTables performs comparably to Google Fusion Tables despite using an order of magnitude fewer tables.", + "Our work also includes the release of a number of public resources, including over 15 million tuples of extracted tabular data, manually annotated evaluation sets, and public APIs." + ] + }, + { + "title": "Scaling Semi-supervised Naive Bayes with Feature Marginals", + "abstract": [ + "Semi-supervised learning (SSL) methods augment standard machine learning (ML) techniques to leverage unlabeled data.", + "SSL techniques are often effective in text classification, where labeled data is scarce but large unlabeled corpora are readily available.", + "However, existing SSL techniques typically require multiple passes over the entirety of the unlabeled data, meaning the techniques are not applicable to large corpora being produced today.", + "In this paper, we show that improving marginal word frequency estimates using unlabeled data can enable semi-supervised text classification that scales to massive unlabeled data sets.", + "We present a novel learning algorithm, which optimizes a Naive Bayes model to accord with statistics calculated from the unlabeled corpus.", + "In experiments with text topic classification and sentiment analysis, we show that our method is both more scalable and more accurate than SSL techniques from previous work." + ] + }, + { + "title": "Sentiment identification by incorporating syntax, semantics and context information", + "abstract": [ + "This paper proposes a method based on conditional random fields to incorporate sentence structure (syntax and semantics) and context information to identify sentiments of sentences within a document.", + "It also proposes and evaluates two different active learning strategies for labeling sentiment data.", + "The experiments with the proposed approach demonstrate a 5-15% improvement in accuracy on Amazon customer reviews compared to existing supervised learning and rule-based methods." + ] + }, + { + "title": "Explanatory semantic relatedness and explicit spatialization for exploratory search", + "abstract": [ + "Exploratory search, in which a user investigates complex concepts, is cumbersome with today's search engines.", + "We present a new exploratory search approach that generates interactive visualizations of query concepts using thematic cartography (e.g. choropleth maps, heat maps).", + "We show how the approach can be applied broadly across both geographic and non-geographic contexts through explicit spatialization, a novel method that leverages any figure or diagram -- from a periodic table, to a parliamentary seating chart, to a world map -- as a spatial search environment.", + "We enable this capability by introducing explanatory semantic relatedness measures.", + "These measures extend frequently-used semantic relatedness measures to not only estimate the degree of relatedness between two concepts, but also generate human-readable explanations for their estimates by mining Wikipedia's text, hyperlinks, and category structure.", + "We implement our approach in a system called Atlasify, evaluate its key components, and present several use cases." + ] + }, + { + "title": "Language Models as Representations for Weakly Supervised NLP Tasks", + "abstract": [ + "Finding the right representation for words is critical for building accurate NLP systems when domain-specific labeled data for the task is scarce.", + "This paper investigates language model representations, in which language models trained on unlabeled corpora are used to generate real-valued feature vectors for words.", + "We investigate ngram models and probabilistic graphical models, including a novel lattice-structured Markov Random Field.", + "Experiments indicate that language model representations outperform traditional representations, and that graphical model representations outperform ngram models, especially on sparse and polysemous words." + ] + }, + { + "title": "Local and Global Algorithms for Disambiguation to Wikipedia", + "abstract": [ + "Disambiguating concepts and entities in a context sensitive way is a fundamental problem in natural language processing.", + "The comprehensiveness of Wikipedia has made the online encyclopedia an increasingly popular target for disambiguation.", + "Disambiguation to Wikipedia is similar to a traditional Word Sense Disambiguation task, but distinct in that the Wikipedia link structure provides additional information about which disambiguations are compatible.", + "In this work we analyze approaches that utilize this information to arrive at coherent sets of disambiguations for a given document (which we call \"global\" approaches), and compare them to more traditional (local) approaches.", + "We show that previous approaches for global disambiguation can be improved, but even then the local disambiguation provides a baseline which is very hard to beat." + ] + }, + { + "title": "Improved Extraction Assessment through Better Language Models", + "abstract": [ + "A variety of information extraction techniques rely on the fact that instances of the same relation are \"distributionally similar,\" in that they tend to appear in similar textual contexts.", + "We demonstrate that extraction accuracy depends heavily on the accuracy of the language model utilized to estimate distributional similarity.", + "An unsupervised model selection technique based on this observation is shown to reduce extraction and type-checking error by 26% over previous results, in experiments with Hidden Markov Models.", + "The results suggest that optimizing statistical language models over unlabeled data is a promising direction for improving weakly supervised and unsupervised information extraction." + ] + }, + { + "title": "Learning to Integrate Relational Databases with Wikipedia", + "abstract": [ + "Wikipedia is a general encyclopedia of unprecedented breadth and popularity.", + "However, much of the Web\u2019s factual information still lies within relational databases, each focused on a specific topic.", + "While many database entities are described by corresponding Wikipedia pages, in general this correspondence is unknown unless it has been manually specified.", + "As a result, Web databases cannot leverage the relevant rich descriptions and interrelationships captured in Wikipedia, and Wikipedia readers miss the extensive coverage that a database typically provides on its specific topic.", + "In this paper, we present ETOW, a system that automatically integrates relational databases with Wikipedia.", + "ETOW uses machine learning techniques to identify the correspondences between database entities and Wikipedia pages.", + "In experiments with two distinct Web databases, we demonstrate that ETOW outperforms baseline techniques, reducing error overall by an average of 19%, and reducing false positive rate by 50%.", + "In one experiment, ETOW is able to identify approximately 13,000 correct matches at a precision of 0.97.", + "We also present evidence suggesting that ETOW can substantially improve the coverage and utility of both the relational databases and Wikipedia." + ] + }, + { + "title": "Look Ma, No Hands: Analyzing the Monotonic Feature Abstraction for Text Classification", + "abstract": [ + "Is accurate classification possible in the absence of hand-labeled data?", + "This paper introduces the Monotonic Feature (MF) abstraction\u2014where the probability of class membership increases monotonically with the MF's value.", + "The paper proves that when an MF is given, PAC learning is possible with no hand-labeled data under certain assumptions.", + "\n \nWe argue that MFs arise naturally in a broad range of textual classification applications.", + "On the classic \"20 Newsgroups\" data set, a learner given an MF and unlabeled data achieves classification accuracy equal to that of a state-of-the-art semi-supervised learner relying on 160 hand-labeled examples.", + "Even when MFs are not given as input, their presence or absence can be determined from a small amount of hand-labeled data, which yields a new semi-supervised learning method that reduces error by 15% on the 20 Newsgroups data." + ] + }, + { + "title": "Understanding the relationship between searchers' queries and information goals", + "abstract": [ + "We describe results from Web search log studies aimed at elucidating user behaviors associated with queries and destination URLs that appear with different frequencies.", + "We note the diversity of information goals that searchers have and the differing ways that goals are specified.", + "We examine rare and common information goals that are specified using rare or common queries.", + "We identify several significant differences in user behavior depending on the rarity of the query and the destination URL.", + "We find that searchers are more likely to be successful when the frequencies of the query and destination URL are similar.", + "We also establish that the behavioral differences observed for queries and goals of varying rarity persist even after accounting for potential confounding variables, including query length, search engine ranking, session duration, and task difficulty.", + "Finally, using an information-theoretic measure of search difficulty, we show that the benefits obtained by search and navigation actions depend on the frequency of the information goal." + ] + }, + { + "title": "Redundancy in web-scaled information extraction: probabilistic model and experimental results", + "abstract": [ + "Information Extraction (IE) is the task of automatically extracting knowledge from text.", + "The massive body of text now available on the World Wide Web presents an unprecedented opportunity for IE.", + "IE systems promise to encode vast quantities of Web content into machine-processable knowledge bases, presenting a new approach to a fundamental challenge for artificial intelligence: the automatic acquisition of massive bodies of knowledge.", + "Such knowledge bases would dramatically extend the capabilities of Web applications.", + "Future Web search engines, for example, could query the knowledge bases to answer complicated questions that require synthesizing information across multiple Web pages.", + "\nHowever, IE on the Web is challenging due to the enormous variety of distinct concepts expressed.", + "All extraction techniques make errors, and the standard error-detection strategy used in previous, small-corpus extraction systems hand-labeling examples of each concept to be extracted, then training a classifier using the labeled examples\u2014is intractable on the Web.", + "How can we automatically identify correct extractions for arbitrary target concepts, without hand-labeled examples?", + "\nThis thesis shows how IE on the Web is made possible through the KnowItAll hypothesis, which states that extractions that occur more frequently in distinct sentences in a corpus are more likely to be correct.", + "The KnowItAll hypothesis holds on the Web, and can be used to identify many correct extractions because the Web is highly redundant: individual facts are often repeated many times, and in many different ways.", + "In this thesis, we show that a probabilistic model of the KnowItAll hypothesis, coupled with the redundancy of the Web, can power effective IE for arbitrary target concepts without hand-labeled data.", + "In experiments with IE on the Web, we show that the probabilities produced by our model are 15 times better, on average, when compared with techniques from previous work.", + "We also prove formally that under the assumptions of the model, \"Probably Approximately Correct\" IE can be attained from only unlabeled data." + ] + }, + { + "title": "It\u2019s a Contradiction \u2013 no, it\u2019s not: A Case Study using Functional Relations", + "abstract": [ + "Contradiction Detection (CD) in text is a difficult NLP task.", + "We investigate CD over functions (e.g., BornIn(Person)=Place), and present a domain-independent algorithm that automatically discovers phrases denoting functions with high precision.", + "Previous work on CD has investigated hand-chosen sentence pairs.", + "In contrast, we automatically harvested from the Web pairs of sentences that appear contradictory, but were surprised to find that most pairs are in fact consistent.", + "For example, \"Mozart was born in Salzburg\" does not contradict \"Mozart was born in Austria\" despite the functional nature of the phrase \"was born in\".", + "We show that background knowledge about meronyms (e.g., Salzburg is in Austria), synonyms, functions, and more is essential for success in the CD task." + ] + }, + { + "title": "In situ transmissiometer measurements for real-time monitoring of dust discharge during orchard nut harvesting.", + "abstract": [ + "Rapid assessments of operating conditions and field preparation on dust discharge from nut harvesters are needed to guide improved equipment design and grower practices for dust reduction.", + "An industrial opacity sensor, typically used for industrial stack monitoring, was adapted for use on a nut harvester to measure relative dust intensity during nut pick-up operations in almond orchards.", + "Due to the high volume of discharge air and the presence of large debris such as leaves, additional components were coupled with the sensor to enable subsampling of the air.", + "Pre-harvest windrow preparation conditions were evaluated.", + "Results indicated that relative dust intensity decreased by 32% during harvest activities after windrow preparation with proper nut sweeper adjustment.", + "Conventional harvesting results indicated that under typical operating conditions, reducing the separation fan speed could reduce relative dust intensity by 54%.", + "Ground speed also had a strong effect; reducing speed from 4.8 to 2.4 km h(-1) reduced opacity of discharged air by 50%.", + "The measurement system was also mounted on a separate vehicle and used as a tool for comparing modifications in harvest machine designs where direct measurement of discharge may not be feasible due to mechanical constraints.", + "A comparison between a conventional harvester and one modification in the harvester design found that the machine modification decreased relative dust intensity by 73%.", + "The measurement tools described in this work can be used to provide rapid feedback on harvester operating conditions, orchard cultural practices, and machine design modifications." + ] + }, + { + "title": "Models of Searching and Browsing: Languages, Studies, and Application", + "abstract": [ + "We describe the formulation, construction, and evaluation of predictive models of human information seeking from a large dataset of Web search activities.", + "We first introduce an expressive language for describing searching and browsing behavior, and use this language to characterize several prior studies of search behavior.", + "Then, we focus on the construction of predictive models from the data.", + "We review several analyses, including an exploration of the properties of users, queries, and search sessions that are most predictive of future behavior.", + "We also investigate the influence of temporal delay on user actions, and representational tradeoffs with varying the number of steps of user activity considered.", + "Finally, we discuss applications of the predictive models, and focus on the example of performing principled prefetching of content." + ] + }, + { + "title": "Sparse Information Extraction: Unsupervised Language Models to the Rescue", + "abstract": [ + "Even in a massive corpus such as the Web, a substantial fraction of extractions appear infrequently.", + "This paper shows how to assess the correctness of sparse extractions by utilizing unsupervised language models.", + "The REALM system, which combines HMMbased and n-gram-based language models, ranks candidate extractions by the likelihood that they are correct.", + "Our experiments show that REALM reduces extraction error by 39%, on average, when compared with previous work.", + "Because REALM pre-computes language models based on its corpus and does not require any hand-tagged seeds, it is far more scalable than approaches that learn models for each individual relation from handtagged data.", + "Thus, REALM is ideally suited for open information extraction where the relations of interest are not specified in advance and their number is potentially vast." + ] + }, + { + "title": "Heads and tails: studies of web search with common and rare queries", + "abstract": [ + "A large fraction of queries submitted to Web search enginesoccur very infrequently.", + "We describe search log studiesaimed at elucidating behaviors associated with rare andcommon queries.", + "We present several analyses and discussresearch directions." + ] + }, + { + "title": "Locating Complex Named Entities in Web Text", + "abstract": [ + "Named Entity Recognition (NER) is the task of locating and classifying names in text.", + "In previous work, NER was limited to a small number of pre-defined entity classes (e.g., people, locations, and organizations).", + "However, NER on the Web is a far more challenging problem.", + "Complex names (e.g., film or book titles) can be very difficult to pick out precisely from text.", + "Further, the Web contains a wide variety of entity classes, which are not known in advance.", + "Thus, hand-tagging examples of each entity class is impractical.", + "\n \nThis paper investigates a novel approach to the first step in Web NER: locating complex named entities in Web text.", + "Our key observation is that named entities can be viewed as a species of multiword units, which can be detected by accumulating n-gram statistics over the Web corpus.", + "We show that this statistical method's F1 score is 50% higher than that of supervised techniques including Conditional Random Fields (CRFs) and Conditional Markov Models (CMMs) when applied to complex names.", + "The method also outperforms CMMs and CRFs by 117% on entity classes absent from the training data.", + "Finally, our method outperforms a semi-supervised CRF by 73%." + ] + }, + { + "title": "A Probabilistic Model of Redundancy in Information Extraction", + "abstract": [ + "Unsupervised Information Extraction (UIE) is the task of extracting knowledge from text without using hand-tagged training examples.", + "A fundamental problem for both UIE and supervised IE is assessing the probability that extracted information is correct.", + "In massive corpora such as the Web, the same extraction is found repeatedly in different documents.", + "How does this redundancy impact the probability of correctness?", + "\n \nThis paper introduces a combinatorial \"balls-andurns\" model that computes the impact of sample size, redundancy, and corroboration from multiple distinct extraction rules on the probability that an extraction is correct.", + "We describe methods for estimating the model's parameters in practice and demonstrate experimentally that for UIE the model's log likelihoods are 15 times better, on average, than those obtained by Pointwise Mutual Information (PMI) and the noisy-or model used in previous work.", + "For supervised IE, the model's performance is comparable to that of Support Vector Machines, and Logistic Regression." + ] + }, + { + "title": "KnowItNow: Fast, Scalable Information Extraction from the Web", + "abstract": [ + "Numerous NLP applications rely on search-engine queries, both to extract information from and to compute statistics over the Web corpus.", + "But search engines often limit the number of available queries.", + "As a result, query-intensive NLP applications such as Information Extraction (IE) distribute their query load over several days, making IE a slow, offline process.", + "This paper introduces a novel architecture for IE that obviates queries to commercial search engines.", + "The architecture is embodied in a system called KnowItNow that performs high-precision IE in minutes instead of days.", + "We compare KnowItNow experimentally with the previously-published KnowItAll system, and quantify the tradeoff between recall and speed.", + "KnowItNow's extraction rate is two to three orders of magnitude higher than KnowItAll's." + ] + }, + { + "title": "Methods for Domain-Independent Information Extraction from the Web: An Experimental Comparison", + "abstract": [ + "Our KNOWITALL system aims to automate the tedious process of extracting large collections of facts (e.g., names of scientists or politicians) from the Web in an autonomous, domain-independent, and scalable manner.", + "In its first major run, KNOWITALL extracted over 50,000 facts with high precision, but suggested a challenge: How can we improve KNOWITALL's recall and extraction rate without sacrificing precision?", + "\n \nThis paper presents three distinct ways to address this challenge and evaluates their performance.", + "Rule Learning learns domain-specific extraction rules.", + "Subclass Extraction automatically identifies sub-classes in order to boost recall.", + "List Extraction locates lists of class instances, learns a \"wrapper\" for each list, and extracts elements of each list.", + "Since each method bootstraps from KNOWITALL's domain-independent methods, no hand-labeled training examples are required.", + "Experiments show the relative coverage of each method and demonstrate their synergy.", + "In concert, our methods gave KNOWITALL a 4-fold to 19-fold increase in recall, while maintaining high precision, and discovered 10,300 cities missing from the Tipster Gazetteer." + ] + }, + { + "title": "Web-scale information extraction in knowitall: (preliminary results)", + "abstract": [ + "Manually querying search engines in order to accumulate a large bodyof factual information is a tedious, error-prone process of piecemealsearch.", + "Search engines retrieve and rank potentially relevantdocuments for human perusal, but do not extract facts, assessconfidence, or fuse information from multiple documents.", + "This paperintroduces KnowItAll, a system that aims to automate the tedious process ofextracting large collections of facts from the web in an autonomous,domain-independent, and scalable manner.", + "The paper describes preliminary experiments in which an instance of KnowItAll, running for four days on a single machine, was able to automatically extract 54,753 facts.", + "KnowItAll associates a probability with each fact enabling it to trade off precision and recall.", + "The paper analyzes KnowItAll's architecture and reports on lessons learned for the design of large-scale information extraction systems." + ] + }, + { + "title": "Learning knowledge bases for information extraction from multiple text based Web sites", + "abstract": [ + "We describe a learning approach to automatically building knowledge bases for information extraction from multiple text based Web pages.", + "A frame based representation is introduced to represent domain knowledge as knowledge unit frames.", + "A frame learning algorithm is developed to automatically learn knowledge unit frames from training examples.", + "Some training examples can be obtained by automatically parsing a number of tabular Web pages in the same domain, which greatly reduced the time consuming manual work.", + "This approach was investigated on ten Web sites of real estate advertisements and car advertisements and nearly all the information was successfully extracted with very few false alarms.", + "These results suggest that both the knowledge unit frame representation and the frame learning algorithm work well, domain specific knowledge base can be learned from training examples, and the domain specific knowledge base can be used for information extraction from flexible text-based semi-structured Web pages on multiple Web sites." + ] + }, + { + "title": "Learning Text Patterns for Web Information Extraction and Assessment", + "abstract": [ + "Learning text patterns that suggest a desired type of information is a common strategy for extracting information from unstructured text on the Web.", + "In this paper, we introduce the idea that learned patterns can be used as both extractors (to generate new information) and discriminators (to assess the truth of extracted information).", + "We demonstrate experimentally that a Web information extraction system (KnowItAll) can be improved (in terms of coverage and accuracy) through the addition of a simple pattern-learning algorithm.", + "By using learned patterns as extractors, we are able to boost recall by 50% to 80%; and by using such patterns as discriminators we are able to reduce classification errors by 28% to 35%.", + "In addition, the paper reports theoretical results on optimally selecting and ordering discriminators, and shows that this theory yields a heuristic that further reduces classification errors by an additional 19% to 35% \u2013 giving an overall error reduction of 47% to 53%." + ] + }, + { + "title": "Further Experiments in the Evolution of Minimally Cognitive Behavior: From Perceiving Affordances to Selective Attention", + "abstract": [ + "In this paper, we extend previous work on the evolution of continuous-time recurrent neural networks for minimally cognitive behavior (the simplest behavior that raises issues of genuine cognitive interest).", + "Previously, we evolved dynamical \u201cnervous ystems\u201d for orientation, reaching, and discrimination.", + "Here we evolve agents that can judge the passability of openings relative to their own body size, discriminate between visible parts of themselves and other objects in their environment, predict and remember the future location of objects in order to catch them blind, and switch their attention between multiple distal objects." + ] } ], "user_kps": [ - "biomedical literature mining", - "citation context analysis", "citation network", - "co-authorship networks", - "coherent summaries", - "collaborative writing", - "conceptual data models", - "conceptual tool", + "cross-document coreference resolution", "document representations", - "e-science", + "entity recognition", + "explainable artificial intelligence", + "explainable recommendation", "exploratory searches", - "human readers", + "fetal hypoxia", "knowledge graph", - "knowledge graph embedding", + "latent dirichlet allocation model", "literature-based discovery", - "machine comprehension", + "named entity recognition", + "neural language model", "neural language models", "neural ranking models", - "terminology extraction", - "textual summaries" + "pre-trained word vectors", + "recurrent neural network language model", + "spammer detection", + "web information extraction", + "word models" ] } \ No newline at end of file