@article{fortier-davyMultiDimensionalApproachDrugInduced2020,
  title        = {The {{Multi-Dimensional Approach}} to {{Drug-Induced States}}: {{A Commentary}} on {{Bayne}} and {{Carter}}'s ``{{Dimensions}} of {{Consciousness}} and the {{Psychedelic State}}''},
  shorttitle   = {The Multi-Dimensional Approach to Drug-Induced States},
  author       = {{Fortier-Davy}, Martin and Milli{\`e}re, Rapha{\"e}l},
  year         = 2020,
  month        = jan,
  journal      = {Neuroscience of Consciousness},
  publisher    = {Oxford Academic},
  volume       = 2020,
  number       = 1,
  doi          = {10.1093/nc/niaa004},
  urldate      = {2020-05-04},
  copyright    = {All rights reserved},
  abstract     = {Bayne and Carter argue that the mode of Consciousness induced by psychedelic drugs does not fit squarely within the traditional account of modes as levels of Consciousness, and favors instead a multi-dimensional account according to which modes of Consciousness differ along several dimensions-none of which warrants a linear ordering of modes. We discuss the assumption that psychedelic drugs induce a single or paradigmatic mode of Consciousness, as well as conceptual issues related to Bayne and Carter's main argument against the traditional account. Finally, we raise a set of questions about the individuation of dimensions selected to differentiate modes of Consciousness that could be addressed in future discussions of the multi-dimensional account.},
  langid       = {english},
  keywords     = {Altered States,Consciousness}
}
@incollection{mandelbaumAssociationistTheoriesThought2025,
  title        = {Associationist {{Theories}} of {{Thought}}},
  author       = {Mandelbaum, Eric and Milli{\`e}re, Rapha{\"e}l},
  year         = 2025,
  booktitle    = {The {{Stanford Encyclopedia}} of {{Philosophy}}},
  publisher    = {Metaphysics Research Lab, Stanford University},
  url          = {https://plato.stanford.edu/entries/associationist-thought/},
  urldate      = {2025-07-29},
  editor       = {Zalta, Edward N. and Nodelman, Uri},
  edition      = {Fall 2025},
  abstract     = {Associationism is one of the oldest, and, in some form or another,most widely held theories of thought. Associationism has been theengine behind empiricism for centuries, from the British Empiriciststhrough the Behaviorists and modern day Connectionists. Nevertheless,``associationism'' does not refer to one particular theoryof cognition per se, but rather a constellation of relatedthough separable theses. What ties these theses together is acommitment to a certain arationality of thought: a creature'smental states are associated because of some facts about its causalhistory, and having these mental states associated entails thatbringing one of a pair of associates to mind will, ceterisparibus, ensure that the other also becomes activated.},
  keywords     = {Cognitive Modeling,History of Philosophy}
}
@article{Milliere2020varieties,
  title        = {The {{Varieties}} of {{Selflessness}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2020,
  journal      = {Philosophy and the Mind Sciences},
  volume       = 1,
  number       = {I},
  pages        = 8,
  doi          = {10.33735/phimisci.2020.I.48},
  copyright    = {All rights reserved},
  abstract     = {Many authors argue that conscious experience involves a sense of self or Self-Consciousness. According to the strongest version of this claim, there can be no selfless states of Consciousness, namely states of Consciousness that lack Self-Consciousness altogether. Disagreements about this claim are likely to remain merely verbal as long as the target notion of Self-Consciousness is not adequately specified. After distinguishing six notions of Self-Consciousness commonly discussed in the literature, I argue that none of the corresponding features is necessary for Consciousness, because there are states of Consciousness in which each of them is plausibly missing. Such states can be said to be at least partially selfless, since they lack at least one of the ways in which one could be self-conscious. Furthermore, I argue that there is also preliminary empirical evidence that some states of Consciousness lack all of these six putative forms of Self-Consciousness. Such states might be totally selfless, insofar as they lack all the ways in which one could be self-conscious. I conclude by addressing four objections to the possibility and reportability of totally selfless states of Consciousness.},
  keywords     = {Altered States,Consciousness,Self-Consciousness}
}
@misc{milliereAdversarialAttacksImage2022,
  title        = {Adversarial {{Attacks}} on {{Image Generation With Made-Up Words}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2022,
  month        = aug,
  publisher    = {arXiv},
  number       = {arXiv:2208.04135},
  doi          = {10.48550/arXiv.2208.04135},
  urldate      = {2022-08-24},
  copyright    = {All rights reserved},
  eprint       = {2208.04135},
  primaryclass = {cs},
  abstract     = {Text-guided image generation models can be prompted to generate images using nonce words adversarially designed to robustly evoke specific visual concepts. Two approaches for such generation are introduced: macaronic prompting, which involves designing cryptic hybrid words by concatenating subword units from different languages; and evocative prompting, which involves designing nonce words whose broad morphological features are similar enough to that of existing words to trigger robust visual associations. The two methods can also be combined to generate images associated with more specific visual concepts. The implications of these techniques for the circumvention of existing approaches to content moderation, and particularly the generation of offensive or harmful images, are discussed.},
  archiveprefix = {arXiv},
  keywords     = {Adversarial Robustness,AI Safety,Generative AI,Multimodal AI}
}
@misc{milliereAlignmentProblemContext2023,
  title        = {The {{Alignment Problem}} in {{Context}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2023,
  month        = nov,
  publisher    = {arXiv},
  number       = {arXiv:2311.02147},
  doi          = {10.48550/arXiv.2311.02147},
  urldate      = {2023-11-26},
  copyright    = {All rights reserved},
  eprint       = {2311.02147},
  primaryclass = {cs},
  abstract     = {A core challenge in the development of increasingly capable AI systems is to make them safe and reliable by ensuring their behaviour is consistent with human values. This challenge, known as the alignment problem, does not merely apply to hypothetical future AI systems that may pose catastrophic risks; it already applies to current systems, such as Large Language Models, whose potential for harm is rapidly increasing. In this paper, I assess whether we are on track to solve the alignment problem for Large Language Models, and what that means for the safety of future AI systems. I argue that existing strategies for alignment are insufficient, because Large Language Models remain vulnerable to adversarial attacks that can reliably elicit unsafe behaviour. I offer an explanation of this lingering vulnerability on which it is not simply a contingent limitation of current language models, but has deep technical ties to a crucial aspect of what makes these models useful and versatile in the first place -- namely, their remarkable aptitude to learn "in context" directly from user instructions. It follows that the alignment problem is not only unsolved for current AI systems, but may be intrinsically difficult to solve without severely undermining their capabilities. Furthermore, this assessment raises concerns about the prospect of ensuring the safety of future and more capable AI systems.},
  archiveprefix = {arXiv},
  keywords     = {Adversarial Robustness,AI Alignment,AI Safety,In-Context Learning,Large Language Models}
}
@article{milliereAnthropocentricBiasLanguage2025a,
	title        = {Anthropocentric {{Bias}} in {{Language Model Evaluation}}},
	author       = {Milli{\`e}re, Rapha{\"e}l and Rathkopf, Charles},
	year         = 2026,
	month        = mar,
	journal      = {Computational Linguistics},
	volume       = 52,
	number       = 1,
	pages        = {379--388},
	doi          = {10.1162/COLI.a.582},
	issn         = {0891-2017},
	urldate      = {2026-06-02},
  url          = {https://direct.mit.edu/coli/article/52/1/379/134269/Anthropocentric-Bias-in-Language-Model-Evaluation},
	abstract     = {Evaluating the cognitive capacities of large language models (LLMs) requires overcoming not only anthropomorphic but also anthropocentric biases. This article identifies two types of anthropocentric bias that have been neglected: overlooking how auxiliary factors can impede LLM performance despite competence (auxiliary oversight), and dismissing LLM mechanistic strategies that differ from those of humans as not genuinely competent (mechanistic chauvinism). Mitigating these biases requires an empirical, iterative approach to mapping cognitive tasks to LLM-specific capacities and mechanisms, achieved by supplementing behavioral experiments with mechanistic studies.},
	langid       = {english},
  keywords     = {Cognitive Modeling,Large Language Models,LLM Evaluation,Mechanistic Interpretability,Reasoning}
}

@article{milliereAreThereDegrees2019,
  title        = {Are {{There Degrees}} of {{Self-Consciousness}}?},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2019,
  journal      = {Journal of Consciousness Studies},
  volume       = 26,
  number       = {3-4},
  pages        = {252--276},
  issn         = {1355-8250},
  copyright    = {All rights reserved},
  url          = {https://psycnet.apa.org/record/2020-14359-012},
  abstract     = {It is widely assumed that ordinary conscious experience involves some form of sense of self or Consciousness of oneself. Moreover, this claim is often restricted to a `thin' or `minimal' notion of Self-Consciousness, or even `the simplest form of Self-Consciousness', as opposed to more sophisticated forms of Self-Consciousness which are not deemed ubiquitous in ordinary experience. These formulations suggest that Self-Consciousness comes in degrees, and that individual subjects may differ with respect to the degree of Self-Consciousness they exhibit at a given time. In this article, I critically examine this assumption. I consider what the claim that Self-Consciousness comes in degrees may mean, raise some challenges against the different versions of the claim, and conclude that none of them is both coherent and particularly plausible.},
  keywords     = {Consciousness,Self-Consciousness}
}
@article{milliereConstitutiveSelfConsciousness2025,
  title        = {Constitutive {{Self-Consciousness}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2025,
  month        = jul,
  journal      = {Australasian Journal of Philosophy},
  publisher    = {Routledge},
  volume       = 103,
  number       = 3,
  pages        = {617--637},
  doi          = {10.1080/00048402.2024.2417750},
  issn         = {0004-8402},
  urldate      = {2026-01-21},
  abstract     = {The claim that Consciousness constitutively involves Self-Consciousness has a long philosophical history, and has received renewed support in recent years. My aim in this paper is to argue that this surprisingly enduring idea is misleading at best, and insufficiently supported at worst. I start by offering an elucidatory account of Consciousness, and outlining a number of foundational claims that plausibly follow from it. I subsequently distinguish two notions of Self-Consciousness: Consciousness of oneself and Consciousness of one's experience. While `Self-Consciousness' is often taken to refer to the former notion, the most common variant of the constitutive claim, on which I focus here, targets the latter. This claim can be further interpreted in two ways: on a deflationary reading, it falls within the scope of foundational claims about Consciousness, while on an inflationary reading, it points to determinate aspects of phenomenology that are not acknowledged by the foundational claims as being aspects of all conscious mental states. I argue that the deflationary reading of the constitutive claim is plausible, but should be formulated without using a term as polysemous and suggestive as `Self-Consciousness'; by contrast, the inflationary reading is not adequately supported, and ultimately rests on contentious intuitions about phenomenology. I conclude that we should abandon the idea that Self-Consciousness is constitutive of Consciousness.},
  keywords     = {Consciousness,Self-Consciousness}
}
@article{milliereDeepLearningSynthetic2022,
  title        = {Deep {{Learning}} and {{Synthetic Media}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2022,
  month        = may,
  journal      = {Synthese},
  volume       = 200,
  number       = 3,
  pages        = 231,
  doi          = {10.1007/s11229-022-03739-2},
  issn         = {1573-0964},
  urldate      = {2022-06-03},
  copyright    = {All rights reserved},
  abstract     = {Deep learning algorithms are rapidly changing the way in which audiovisual media can be produced. Synthetic audiovisual media generated with deep learning -- often subsumed colloquially under the label "deepfakes" -- have a number of impressive characteristics; they are increasingly trivial to produce, and can be indistinguishable from real sounds and images recorded with a sensor. Much attention has been dedicated to ethical concerns raised by this technological development. Here, I focus instead on a set of issues related to the notion of synthetic audiovisual media, its place within a broader taxonomy of audiovisual media, and how deep learning techniques differ from more traditional approaches to media synthesis. After reviewing important etiological features of deep learning pipelines for media manipulation and generation, I argue that ``deepfakes'' and related synthetic media produced with such pipelines do not merely offer incremental improvements over previous methods, but challenge traditional taxonomical distinctions, and pave the way for genuinely novel kinds of audiovisual media.},
  langid       = {english},
  keywords     = {Generative AI}
}
@incollection{milliereDruginducedAlterationsBodily2022,
  title        = {Drug-Induced {{Alterations}} of {{Bodily Awareness}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2022,
  booktitle    = {The {{Routledge Handbook}} of {{Bodily Awareness}}},
  publisher    = {Routledge},
  address      = {London ; New York},
  pages        = {503--521},
  isbn         = {978-0-429-32154-2},
  copyright    = {All rights reserved},
  url          = {https://www.taylorfrancis.com/chapters/edit/10.4324/9780429321542-40/drug-induced-alterations-bodily-awareness-rapha%C3%ABl-milli%C3%A8re},
  abstract     = {Psychoactive compounds can have more or less noticeable effects on conscious experience, through neuropharmacological pathways involving activation (agonism) or blockade (antagonism) of different neurotransmitter receptors. Some of these compounds are specifically known to alter or disrupt Bodily Awareness in various ways. Philosophical and empirical discussions of Bodily Awareness have mostly focused so far on bodily disorders -- such as somatoparaphrenia -- and bodily illusions induced in an experimental setting -- such as the rubber hand illusion. However, drug-induced alterations of Bodily Awareness also include a wide range of conditions that are highly relevant to these discussions. The purpose of this chapter is to provide an overview of these lesser known bodily effects, and to outline some way in which they can bear on recent debates regarding Bodily Awareness and bodily ownership.},
  keywords     = {Altered States,Bodily Awareness}
}
@incollection{milliereDrugInducedBodyDisownership2024,
  title        = {Drug-{{Induced Body Disownership}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2024,
  month        = sep,
  booktitle    = {Philosophical Perspectives on Psychedelic Psychiatry},
  publisher    = {Oxford University Press},
  doi          = {10.1093/oso/9780192898371.003.0002},
  isbn         = {978-0-19-289837-1},
  eprint       = {https://academic.oup.com/book/0/chapter/480350218/chapter-pdf/59014193/oso-9780192898371-chapter-2.pdf},
  abstract     = {This chapter examines the debate on the phenomenology of body ownership---the putative experience of one's body as one's own. Proponents argue that this phenomenology exists and that it explains reports from pathological conditions and bodily illusions, but these reports face interpretative challenges. In this chapter, drug-induced experiences wherein subjects report `disownership' of their body parts or whole body are considered. Unlike patient reports, these are from healthy people, with detailed descriptions obtained in controlled settings. Reports that describe subjective transitions between the experience of owning and disowning one's body provide novel tentative evidence for the view that ordinary experience involves a phenomenology of ownership that can be disrupted. While such evidence is not definitive, the debate could benefit from paying closer attention to drug-induced states. Disentangling the multi-faceted bodily effects of psychoactive compounds could further illuminate whether body ownership is a component of ordinary Bodily Awareness.},
  keywords     = {Altered States,Bodily Awareness,Self-Consciousness}
}
@incollection{milliereIngardensCombinatorialAnalysis2016,
  title        = {Ingarden's {{Combinatorial Analysis}} of {{The Realism-Idealism Controversy}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2016,
  booktitle    = {Form(s) and {{Modes}} of {{Being}}: {{The Ontology}} of {{Roman Ingarden}}},
  publisher    = {Peter Lang},
  address      = {Bern},
  pages        = {67--98},
  copyright    = {All rights reserved},
  editor       = {Richard, S{\'e}bastien and Malherbe, Olivier},
  url          = {https://www.perlego.com/book/2941846/formes-et-modes-dtre-forms-and-modes-of-being-lontologie-de-roman-ingarden-the-ontology-of-roman-ingarden-pdf},
  abstract     = {The Controversy over the Existence of the World (henceforth Controversy) is the magnum opus of Polish philosopher Roman Ingarden. Despite the renewed interest for Ingarden's pioneering ontological work within analytic philosophy, little attention has been dedicated to Controversy's main goal, clearly indicated by the very title of the book: finding a solution to the centuries-old philosophical controversy about the ontological status of the external world. There are at least three reasons for this relative indifference. First, even at the time when the book was published, the Controversy was no longer seen as a serious polemical topic, whether it was disqualified as an archaic metaphysical pseudo-problem, or taken to be the last remnant of an antiscientific approach to philosophy culminating in idealism and relativism. Second, Ingarden's Reasoning on the matter is highly complex, at times misleading, and even occasionally faulty. Finally, his analysis is not only incomplete -- Controversy being unfinished -- but also arguably aporetic. One may wonder, then, why it is still worth excavating this mammoth treatise to study an issue apparently no longer relevant to contemporary philosophy. Aside from historical and exegetical purposes, which are of course very interesting in their own right, Ingarden's treatment of the Controversy remains one of the most detailed and ambitious ontological undertakings of the twentieth century. Not only does it lay out an incredibly detailed map of possible solutions to the Controversy, but it also tries to show why the latter is a genuine and fundamental problem that owes its hasty disqualification to various oversimplifications over the course of the History of Philosophy. In this chapter, I first give an overview of Ingarden's method, which relies mainly on a combinatorial analysis. Then, I summarize his examination of possible solutions to the Controversy, and determine which ones can be ruled out on ontological grounds. Finally, I explain why this ambitious project ultimately leads to a theoretical impasse, leaving Ingarden unable to come up with a definitive solution to the Controversy -- regardless of the fact that the book is unfinished. I argue that his analysis of the problem yields a more modest but nonetheless valuable result.},
  keywords     = {History of Philosophy}
}
@incollection{milliereInterventionistMethodsInterpreting2025,
  title        = {Interventionist Methods for Interpreting Deep Neural Networks},
  author       = {Milli{\`e}re, Rapha{\"e}l and Buckner, Cameron},
  year         = 2025,
  booktitle    = {Neurocognitive {{Foundations}} of {{Mind}}},
  publisher    = {Routledge},
  isbn         = {978-1-003-45853-1},
  editor       = {Piccinini, Gualtiero},
  url          = {https://www.taylorfrancis.com/chapters/edit/10.4324/9781003458531-10/interventionist-methods-interpreting-deep-neural-networks-rapha%C3%ABl-milli%C3%A8re-cameron-buckner},
  abstract     = {Recent breakthroughs in artificial intelligence (AI) have primarily resulted from training deep neural networks (DNNs) with vast numbers of adjustable parameters on enormous datasets. Due to their complex internal structure, DNNs are frequently characterized as inscrutable ``black boxes,'' making it challenging to interpret the mechanisms underlying their impressive performance. This opacity creates difficulties for explanation, safety assurance, trustworthiness, and comparisons to human cognition, leading to divergent perspectives on these systems. This chapter examines recent developments in interpretability methods for DNNs, with a focus on interventionist approaches inspired by causal explanation in philosophy of science. We argue that these methods offer a promising avenue for understanding how DNNs process information compared to merely behavioral benchmarking and correlational probing. We review key interventionist methods and illustrate their application through practical case studies. These methods allow researchers to identify and manipulate specific computational components within DNNs, providing insights into their causal structure and internal representations. We situate these approaches within the broader framework of causal abstraction, which aims to align low-level neural computations with high-level interpretable models. While acknowledging current limitations, we contend that interventionist methods offer a path toward more rigorous and theoretically grounded interpretability research, potentially informing both AI development and computational cognitive neuroscience.},
  keywords     = {Large Language Models,Mechanistic Interpretability,World Models}
}
@incollection{milliereLanguageModelsModels2026,
  title        = {Language {{Models}} as {{Models}} of {{Language}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2026,
  booktitle    = {The {{Oxford Handbook}} of the {{Philosophy}} of {{Linguistics}}},
  publisher    = {Oxford University Press},
  address      = {Oxford},
  editor       = {Nefdt, Ryan and Dupre, Gabe and Stanton, Kate},
  url          = {https://global.oup.com/academic/product/the-oxford-handbook-of-philosophy-of-linguistics-9780198879640?cc=gb&lang=en&},
  abstract     = {This chapter critically examines the potential contributions of modern language models to theoretical linguistics. Despite their focus on engineering goals, these models' ability to acquire sophisticated linguistic knowledge from mere exposure to data warrants a careful reassessment of their relevance to linguistic theory. I review a growing body of empirical evidence suggesting that language models can learn hierarchical syntactic structure and exhibit sensitivity to various linguistic phenomena, even when trained on developmentally plausible amounts of data. While the competence/performance distinction has been invoked to dismiss the relevance of such models to linguistic theory, I argue that this assessment may be premature. By carefully controlling learning conditions and making use of causal intervention methods, experiments with language models can potentially constrain hypotheses about language acquisition and competence. I conclude that closer collaboration between theoretical linguists and computational researchers could yield valuable insights, particularly in advancing debates about linguistic nativism.},
  keywords     = {Compositionality,Language & Linguistics,Large Language Models,Mechanistic Interpretability,Transformer Architecture}
}
@article{milliereLookingSelfPhenomenology2017,
  title        = {Looking {{For The Self}}: {{Phenomenology}}, {{Neurophysiology}} and {{Philosophical Significance}} of {{Drug-induced Ego Dissolution}}},
  shorttitle   = {Looking {{For The Self}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2017,
  journal      = {Frontiers in Human Neuroscience},
  volume       = 11,
  number       = 245,
  pages        = {1--22},
  doi          = {10.3389/fnhum.2017.00245},
  issn         = {1662-5161},
  urldate      = {2017-04-25},
  copyright    = {All rights reserved},
  abstract     = {There is converging evidence that high doses of hallucinogenic drugs can produce significant alterations of self-experience, described as the dissolution of the sense of self and the loss of boundaries between self and world. This article discusses the relevance of this phenomenon, known as `drug-induced ego dissolution', for cognitive neuroscience, psychology and philosophy of mind. Data from self-report questionnaires suggest that three neuropharmacological classes of drugs can induce ego dissolution: classical psychedelics, dissociative anesthetics and agonists of the kappa opioid receptor. While these substances act on different neurotransmitter receptors, they all produce strong subjective effects that can be compared to the symptoms of acute psychosis, including ego dissolution. It has been suggested that neuroimaging of drug-induced ego dissolution can indirectly shed light on the neural correlates of the self. While this line of inquiry is promising, its results must be interpreted with caution. First, neural correlates of ego dissolution might reveal the necessary neurophysiological conditions for the maintenance of the sense of self, but it is more doubtful that this method can reveal its minimally sufficient conditions. Second, it is necessary to define the relevant notion of self at play in the phenomenon of drug-induced ego dissolution. This article suggests that drug-induced ego dissolution consists in the disruption of subpersonal processes underlying the `minimal' or `embodied' self, i.e. the basic experience of being a self rooted in multimodal integration of self-related stimuli. This hypothesis is consistent with Bayesian models of phenomenal selfhood, according to which the subjective structure of conscious experience ultimately results from the optimization of predictions in perception and action. Finally, it is argued that drug-induced ego dissolution is also of particular interest for philosophy of mind. One the one hand, it challenges theories according to which Consciousness always involves self-awareness. On the other hand, it suggests that ordinary conscious experience might involve a minimal kind of self-awareness rooted in multisensory processing, which is what appears to fade away during drug-induced ego dissolution.},
  langid       = {english},
  keywords     = {Altered States,Consciousness,Self-Consciousness}
}
@article{milliereNormativeConflictsShallow2025,
  title        = {Normative Conflicts and Shallow {{AI}} Alignment},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2025,
  month        = jul,
  journal      = {Philosophical Studies},
  volume       = 182,
  number       = 7,
  pages        = {2035--2078},
  doi          = {10.1007/s11098-025-02347-3},
  issn         = {1573-0883},
  urldate      = {2025-08-04},
  abstract     = {The progress of AI systems such as Large Language Models (LLMs) raises increasingly pressing concerns about their safe deployment. This paper examines the value alignment problem for LLMs, arguing that current alignment strategies are fundamentally inadequate to prevent misuse. Despite ongoing efforts to instill norms such as helpfulness, honesty, and harmlessness in LLMs through fine-tuning based on human preferences, they remain vulnerable to adversarial attacks that exploit conflicts between these norms. I argue that this vulnerability reflects a fundamental limitation of existing alignment methods: they reinforce shallow behavioral dispositions rather than endowing LLMs with a genuine capacity for normative deliberation. Drawing from on research in moral psychology, I show how humans' ability to engage in deliberative Reasoning enhances their resilience against similar adversarial tactics. LLMs, by contrast, lack a robust capacity to detect and rationally resolve normative conflicts, leaving them susceptible to manipulation; even recent advances in Reasoning-focused LLMs have not addressed this vulnerability. This ``shallow alignment'' problem carries significant implications for AI safety and regulation, suggesting that current approaches are insufficient for mitigating potential harms posed by increasingly capable AI systems.},
  langid       = {english},
  keywords     = {Adversarial Robustness,AI Alignment,AI Safety,Large Language Models,Reasoning}
}
@misc{millierePhilosophicalIntroductionLanguage2024,
  title        = {A {{Philosophical Introduction}} to {{Language Models}} -- {{Part II}}: {{The Way Forward}}},
  shorttitle   = {A {{Philosophical Introduction}} to {{Language Models}} - {{Part II}}},
  author       = {Milli{\`e}re, Rapha{\"e}l and Buckner, Cameron},
  year         = 2024,
  month        = may,
  publisher    = {arXiv},
  number       = {arXiv:2405.03207},
  urldate      = {2024-05-07},
  copyright    = {All rights reserved},
  eprint       = {2405.03207},
  primaryclass = {cs},
  abstract     = {In this paper, the second of two companion pieces, we explore novel philosophical questions raised by recent progress in Large Language Models (LLMs) that go beyond the classical debates covered in the first part. We focus particularly on issues related to interpretability, examining evidence from causal intervention methods about the nature of LLMs' internal representations and computations. We also discuss the implications of multimodal and modular extensions of LLMs, recent debates about whether such systems may meet minimal criteria for Consciousness, and concerns about secrecy and reproducibility in LLM research. Finally, we discuss whether LLM-like systems may be relevant to modeling aspects of human cognition, if their architectural characteristics and learning scenario are adequately constrained.},
  archiveprefix = {arXiv},
  keywords     = {Cognitive Modeling,Large Language Models,Mechanistic Interpretability,Multimodal AI,World Models}
}
@misc{millierePhilosophicalIntroductionLanguage2024a,
  title        = {A {{Philosophical Introduction}} to {{Language Models}} -- {{Part I}}: {{Continuity With Classic Debates}}},
  shorttitle   = {A {{Philosophical Introduction}} to {{Language Models}} -- {{Part I}}},
  author       = {Milli{\`e}re, Rapha{\"e}l and Buckner, Cameron},
  year         = 2024,
  month        = jan,
  publisher    = {arXiv},
  number       = {arXiv:2401.03910},
  doi          = {10.48550/arXiv.2401.03910},
  urldate      = {2024-01-19},
  copyright    = {All rights reserved},
  eprint       = {2401.03910},
  primaryclass = {cs},
  abstract     = {Large language models like GPT-4 have achieved remarkable proficiency in a broad spectrum of language-based tasks, some of which are traditionally associated with hallmarks of human intelligence. This has prompted ongoing disagreements about the extent to which we can meaningfully ascribe any kind of linguistic or cognitive competence to language models. Such questions have deep philosophical roots, echoing longstanding debates about the status of artificial neural networks as cognitive models. This article -- the first part of two companion papers -- serves both as a primer on language models for philosophers, and as an opinionated survey of their significance in relation to classic debates in the philosophy cognitive science, artificial intelligence, and linguistics. We cover topics such as Compositionality, language acquisition, semantic competence, grounding, World Models, and the transmission of cultural knowledge. We argue that the success of language models challenges several long-held assumptions about artificial neural networks. However, we also highlight the need for further empirical investigation to better understand their internal mechanisms. This sets the stage for the companion paper (Part II), which turns to novel empirical methods for probing the inner workings of language models, and new philosophical questions prompted by their latest developments.},
  archiveprefix = {arXiv},
  keywords     = {Cognitive Modeling,Compositionality,Grounding & Meaning,Language & Linguistics,Large Language Models,World Models}
}
@article{millierePhilosophyCognitiveScience2024,
  title        = {Philosophy of {{Cognitive Science}} in the {{Age}} of {{Deep Learning}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2024,
  journal      = {WIREs Cognitive Science},
  volume       = {n/a},
  number       = {n/a},
  pages        = {e1684},
  doi          = {10.1002/wcs.1684},
  issn         = {1939-5086},
  urldate      = {2024-05-23},
  copyright    = {\copyright{} 2024 The Author(s). WIREs Cognitive Science published by Wiley Periodicals LLC.},
  abstract     = {Deep learning has enabled major advances across most areas of artificial intelligence research. This remarkable progress extends beyond mere engineering achievements and holds significant relevance for the philosophy of cognitive science. Deep neural networks have made significant strides in overcoming the limitations of older connectionist models that once occupied the center stage of philosophical debates about cognition. This development is directly relevant to long-standing theoretical debates in the philosophy of cognitive science. Furthermore, ongoing methodological challenges related to the comparative evaluation of deep neural networks stand to benefit greatly from interdisciplinary collaboration with philosophy and cognitive science. The time is ripe for philosophers to explore foundational issues related to deep learning and cognition; this perspective paper surveys key areas where their contributions can be especially fruitful.},
  langid       = {english},
  keywords     = {Cognitive Modeling,Compositionality,Language & Linguistics,Large Language Models}
}
@article{millierePsychedelicsMeditationSelfConsciousness2018,
  title        = {Psychedelics, {{Meditation}} and {{Self-Consciousness}}},
  author       = {Milli{\`e}re, Rapha{\"e}l and {Carhart-Harris}, Robin L. and Roseman, Leor and Trautwein, Fynn-Mathis and {Berkovich-Ohana}, Aviva},
  year         = 2018,
  journal      = {Frontiers in Psychology},
  volume       = 9,
  number       = 1475,
  pages        = {1--29},
  doi          = {10.3389/fpsyg.2018.01475},
  issn         = {1664-1078},
  urldate      = {2018-08-20},
  copyright    = {All rights reserved},
  abstract     = {In recent years, the scientific study of meditation and psychedelic drugs has seen remarkable developments. The increased focus on meditation in cognitive neuroscience has led to a cross-cultural classification of standard meditation styles validated by functional and structural neuroanatomical data. Meanwhile, the renaissance of psychedelic research has shed light on the neurophysiology of Altered States of Consciousness induced by classical hallucinogens, such as psilocybin and LSD, whose effects are mainly mediated by agonism of serotonin receptors. Few attempts have been made at bridging these two domains of inquiry, despite intriguing evidence of overlap between the phenomenology and neurophysiology of meditation practice and psychedelic states. In particular, many contemplative traditions explicitly aim at dissolving the sense of self by eliciting Altered States of Consciousness through meditation, while classical psychedelics are known to produce significant disruptions of Self-Consciousness, a phenomenon known as drug-induced ego dissolution. In this article, we discuss available evidence regarding convergences and differences between phenomenological and neurophysiological data on meditation practice and psychedelic drug-induced states, with a particular emphasis on alterations of self-experience. While both meditation and psychedelics may disrupt Self-Consciousness and underlying neural processes, we emphasize that neither meditation nor psychedelic states can be conceived as simple, uniform categories. Moreover, we suggest that there are important phenomenological differences even between conscious states described as experiences of self-loss. As a result, we propose that Self-Consciousness may be best construed as a multidimensional construct, and that ``self-loss'', far from being an unequivocal phenomenon, can take several forms. Indeed, various aspects of Self-Consciousness, including narrative aspects linked to autobiographical memory, self-related thoughts and mental time travel, and embodied aspects rooted in multisensory processes, may be differently affected by psychedelics and meditation practices. Finally, we consider long-term outcomes of experiences of self-loss induced by meditation and psychedelics on individual traits and prosocial behavior. We call for caution regarding the problematic conflation of temporary states of self-loss with ``selflessness'' as a behavioral or social trait, although there is preliminary evidence that correlations between short-term experiences of self-loss and long-term trait alterations may exist.},
  langid       = {english},
  keywords     = {Altered States,Consciousness,Self-Consciousness}
}
@article{milliereRadicalDisruptionsSelfConsciousness2020,
  title        = {Radical Disruptions of Self-Consciousness},
  author       = {Milli{\`e}re, Rapha{\"e}l and Metzinger, Thomas},
  year         = 2020,
  month        = mar,
  journal      = {Philosophy and the Mind Sciences},
  volume       = 1,
  number       = {I},
  pages        = {1--1},
  doi          = {10.33735/phimisci.2020.I.50},
  issn         = {2699-0369},
  urldate      = {2020-04-22},
  copyright    = {Copyright (c) 2020 Raphael Milliere, Thomas Metzinger},
  abstract     = {This special issue is about something most of us might find very hard to conceive: states of Consciousness in which Self-Consciousness is radically disrupted or altogether missing.},
  langid       = {english},
  keywords     = {Consciousness,Self-Consciousness}
}
@article{milliereSelflessMemories2022,
  title        = {Selfless {{Memories}}},
  author       = {Milli{\`e}re, Rapha{\"e}l and Newen, Albert},
  year         = 2022,
  month        = may,
  journal      = {Erkenntnis},
  doi          = {10.1007/s10670-022-00562-6},
  issn         = {1572-8420},
  copyright    = {All rights reserved},
  abstract     = {Many authors claim that being conscious constitutively involves being self-conscious, or conscious of oneself. This claim appears to be threatened by reports of `selfless' episodes, or conscious episodes lacking Self-Consciousness, recently described in a number of pathological and nonpathological conditions. However, the credibility of these reports has in turn been challenged on the following grounds: remembering and reporting a past conscious episode as an episode that one went through is only possible if one was conscious of oneself while undergoing it. Call this the Memory Challenge. This paper argues that the Memory Challenge fails to undermine the credibility to reports of selfless episodes, because it rests on problematic assumptions about episodic memory. The paper further argues that we should distinguish between several kinds of self-representation that may be involved in the process of episodic remembering, and that once we do so, it is no longer mysterious how one could accurately remember and report a selfless episode as an episode that one went through. Thus, we should take reports of this kind seriously, and view them as credible counter-examples to the claim that Consciousness constitutively involves Self-Consciousness.},
  keywords     = {Consciousness,Self-Consciousness}
}
@phdthesis{milliereSelfMindPluralist2020,
  title        = {Self in {{Mind}}: {{A Pluralist Account}} of {{Self-Consciousness}}},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2020,
  address      = {Oxford},
  type         = {{DPhil Thesis}},
  abstract     = {This thesis investigates the relationship between Consciousness and Self-Consciousness. I consider two broad claims about this relationship: a constitutive claim, according to which all conscious experiences constitutively involve Self-Consciousness; and a typicalist claim, according to which ordinary conscious experiences contingently involve Self-Consciousness. Both of these claims call for elucidation of the relevant notions of Consciousness and Self-Consciousness. In the first part of the thesis ('The Myth of Constitutive Self-Consciousness'), I critically examine the constitutive claim. I start by offering an elucidatory account of Consciousness, and outlining a number of foundational claims that plausibly follow from it. I subsequently distinguish between two concepts of Self-Consciousness: Consciousness of one's experience, and Consciousness of oneself (as oneself). Each of these concepts yields a distinct variant of the constitutive claim. In turn, each resulting variant of the constitutive claim can be interpreted in two ways: on a 'minimal' or deflationary reading, they fall within the scope of foundational claims about Consciousness, while on a 'strong' or inflationary reading, they point to determinate aspects of phenomenology that are not acknowledged by the foundational claims as being aspects of all conscious mental states. I argue that the deflationary readings of either variant of the constitutive claim are plausible and illuminating, but would ideally be formulated without using a term as polysemous as 'Self-Consciousness'; by contrast, the inflationary readings of either variant are not adequately supported. In the second part of the thesis ('Self-Consciousness in the Real World'), I focus on the second concept of Self-Consciousness, or Consciousness of oneself as oneself. Drawing upon empirical evidence, I defend a pluralist account of Self-Consciousness so construed, according to which there are several ways in which one can be conscious of oneself as oneself -- through conscious thoughts, bodily experiences and perceptual experiences -- that make distinct determinate contributions to one's phenomenology. This pluralist account provides us with the resources to vindicate the typicalist claim according to which Consciousness of oneself as oneself -- a sense of self -- is pervasive in ordinary conscious experiences, as a matter of contingent empirical fact. It also provides us with the resources to assess the possibility that a subject might be conscious without being conscious of herself as herself in any way.},
  school       = {University of Oxford},
  keywords     = {Consciousness,Self-Consciousness}
}
@article{milliereTransformers2025,
  title        = {Transformers},
  author       = {Milli{\`e}re, Rapha{\"e}l},
  year         = 2025,
  month        = jul,
  journal      = {Open Encyclopedia of Cognitive Science},
  publisher    = {MIT Press},
  doi          = {10.21428/e2759450.d3acfbfb},
  urldate      = {2025-08-04},
  langid       = {english},
  keywords     = {Cognitive Modeling,Large Language Models,Scaling & Emergent Abilities,Transformer Architecture}
}
@misc{molloVectorGroundingProblem2025,
  title        = {The {{Vector Grounding Problem}}},
  author       = {Mollo, Dimitri Coelho and Milli{\`e}re, Rapha{\"e}l},
  year         = 2025,
  month        = jun,
  publisher    = {arXiv},
  number       = {arXiv:2304.01481},
  doi          = {10.48550/arXiv.2304.01481},
  urldate      = {2025-06-23},
  eprint       = {2304.01481},
  primaryclass = {cs},
  abstract     = {The remarkable performance of Large Language Models (LLMs) on complex linguistic tasks has sparked debate about their capabilities. Unlike humans, these models learn language solely from textual data without directly interacting with the world. Yet they generate seemingly meaningful text on diverse topics. This achievement has renewed interest in the classical `Symbol Grounding Problem' -- the question of whether the internal representations and outputs of symbolic AI systems can possess intrinsic meaning that is not parasitic on external interpretation. Although modern LLMs compute over vectors rather than symbols, an analogous problem arises for these systems, which we call the Vector Grounding Problem. This paper has two main goals. First, we distinguish five main notions of grounding that are often conflated in the literature, and argue that only one of them, which we call referential grounding, is relevant to the Vector Grounding Problem. Second, drawing on philosophical theories of representational content, we provide two arguments for the claim that LLMs and related systems can achieve referential grounding: (1) through preference fine-tuning methods that explicitly establish world-involving functions, and (2) through pre-training alone, which in limited domains may select for internal states with world-involving content, as Mechanistic Interpretability research suggests. Through these pathways, LLMs can establish connections to the world sufficient for intrinsic meaning. One potentially surprising implication of our discussion is that that multimodality and embodiment are neither necessary nor sufficient to overcome the Grounding Problem.},
  archiveprefix = {arXiv},
  keywords     = {Grounding & Meaning,Large Language Models,Mechanistic Interpretability,Multimodal AI}
}
@article{muskerLLMsModelsAnalogical2025,
  title        = {{{LLMs}} as Models for Analogical Reasoning},
  author       = {Musker, Sam and Duchnowski, Alex and Milli{\`e}re, Rapha{\"e}l and Pavlick, Ellie},
  year         = 2025,
  month        = dec,
  journal      = {Journal of Memory and Language},
  volume       = 145,
  pages        = 104676,
  doi          = {10.1016/j.jml.2025.104676},
  issn         = {0749-596X},
  urldate      = {2025-08-04},
  eprint       = {2406.13803},
  archiveprefix = {arXiv},
  abstract     = {Analogical Reasoning --- the capacity to identify and map structural relationships between different domains --- is fundamental to human cognition and learning. Recent studies have shown that Large Language Models (LLMs) can sometimes match humans in analogical Reasoning tasks, opening the possibility that analogical Reasoning might emerge from domain-general processes. However, it is still debated whether these emergent capacities are largely superficial and limited to simple relations seen during training or whether they encompass the flexible representational and mapping capabilities which are the focus of leading cognitive models of analogy. In this study, we introduce novel analogical Reasoning tasks that require participants to map between semantically contentful words and sequences of letters and other abstract characters. This task necessitates the ability to flexibly re-represent rich semantic information---an ability which is known to be central to human analogy but which is thus far not well-captured by existing cognitive theories and models. We assess the performance of both human participants and LLMs on tasks focusing on Reasoning from semantic structure and semantic content, introducing variations that test the robustness of their analogical inferences. Advanced LLMs match human performance across several conditions, though humans and LLMs respond differently to certain task variations and semantic distractors. Our results thus provide new evidence that LLMs might offer a how-possibly explanation of human analogical Reasoning in contexts that are not yet well modeled by existing theories, but that even today's best models are unlikely to yield how-actually explanations.},
  keywords     = {Cognitive Modeling,Compositionality,In-Context Learning,Large Language Models,Reasoning}
}
@article{srivastavaImitationGameQuantifying2023,
  title        = {Beyond the {{Imitation Game}}: {{Quantifying}} and Extrapolating the Capabilities of Language Models},
  shorttitle   = {Beyond the {{Imitation Game}}},
  author       = {Srivastava, Aarohi and Rastogi, Abhinav and Rao, Abhishek and Shoeb, Abu Awal Md and Abid, Abubakar and Fisch, Adam and Brown, Adam R. and Santoro, Adam and Gupta, Aditya and {Garriga-Alonso}, Adri{\`a} and Kluska, Agnieszka and Lewkowycz, Aitor and Agarwal, Akshat and Power, Alethea and Ray, Alex and Warstadt, Alex and Kocurek, Alexander W. and Safaya, Ali and Tazarv, Ali and Xiang, Alice and Parrish, Alicia and Nie, Allen and Hussain, Aman and Askell, Amanda and Dsouza, Amanda and Slone, Ambrose and Rahane, Ameet and Iyer, Anantharaman S. and Andreassen, Anders Johan and Madotto, Andrea and Santilli, Andrea and Stuhlm{\"u}ller, Andreas and Dai, Andrew M. and La, Andrew and Lampinen, Andrew and Zou, Andy and Jiang, Angela and Chen, Angelica and Vuong, Anh and Gupta, Animesh and Gottardi, Anna and Norelli, Antonio and Venkatesh, Anu and Gholamidavoodi, Arash and Tabassum, Arfa and Menezes, Arul and Kirubarajan, Arun and Mullokandov, Asher and Sabharwal, Ashish and Herrick, Austin and Efrat, Avia and Erdem, Aykut and Karaka{\c s}, Ayla and Roberts, B. Ryan and Loe, Bao Sheng and Zoph, Barret and Bojanowski, Bart{\l}omiej and {\"O}zyurt, Batuhan and Hedayatnia, Behnam and Neyshabur, Behnam and Inden, Benjamin and Stein, Benno and Ekmekci, Berk and Lin, Bill Yuchen and Howald, Blake and Orinion, Bryan and Diao, Cameron and Dour, Cameron and Stinson, Catherine and Argueta, Cedrick and Ferri, Cesar and Singh, Chandan and Rathkopf, Charles and Meng, Chenlin and Baral, Chitta and Wu, Chiyu and {Callison-Burch}, Chris and Waites, Christopher and Voigt, Christian and Manning, Christopher D. and Potts, Christopher and Ramirez, Cindy and Rivera, Clara E. and Siro, Clemencia and Raffel, Colin and Ashcraft, Courtney and Garbacea, Cristina and Sileo, Damien and Garrette, Dan and Hendrycks, Dan and Kilman, Dan and Roth, Dan and Freeman, C. Daniel and Khashabi, Daniel and Levy, Daniel and Gonz{\'a}lez, Daniel Mosegu{\'i} and Perszyk, Danielle and Hernandez, Danny and Chen, Danqi and Ippolito, Daphne and Gilboa, Dar and Dohan, David and Drakard, David and Jurgens, David and Datta, Debajyoti and Ganguli, Deep and Emelin, Denis and Kleyko, Denis and Yuret, Deniz and Chen, Derek and Tam, Derek and Hupkes, Dieuwke and Misra, Diganta and Buzan, Dilyar and Mollo, Dimitri Coelho and Yang, Diyi and Lee, Dong-Ho and Schrader, Dylan and Shutova, Ekaterina and Cubuk, Ekin Dogus and Segal, Elad and Hagerman, Eleanor and Barnes, Elizabeth and Donoway, Elizabeth and Pavlick, Ellie and Rodol{\`a}, Emanuele and Lam, Emma and Chu, Eric and Tang, Eric and Erdem, Erkut and Chang, Ernie and Chi, Ethan A. and Dyer, Ethan and Jerzak, Ethan and Kim, Ethan and Manyasi, Eunice Engefu and Zheltonozhskii, Evgenii and Xia, Fanyue and Siar, Fatemeh and {Mart{\'i}nez-Plumed}, Fernando and Happ{\'e}, Francesca and Chollet, Francois and Rong, Frieda and Mishra, Gaurav and Winata, Genta Indra and de Melo, Gerard and Kruszewski, Germ{\'a}n and Parascandolo, Giambattista and Mariani, Giorgio and Wang, Gloria Xinyue and {Jaimovitch-Lopez}, Gonzalo and Betz, Gregor and {Gur-Ari}, Guy and Galijasevic, Hana and Kim, Hannah and Rashkin, Hannah and Hajishirzi, Hannaneh and Mehta, Harsh and Bogar, Hayden and Shevlin, Henry Francis Anthony and Schuetze, Hinrich and Yakura, Hiromu and Zhang, Hongming and Wong, Hugh Mee and Ng, Ian and Noble, Isaac and Jumelet, Jaap and Geissinger, Jack and Kernion, Jackson and Hilton, Jacob and Lee, Jaehoon and Fisac, Jaime Fern{\'a}ndez and Simon, James B. and Koppel, James and Zheng, James and Zou, James and Kocon, Jan and Thompson, Jana and Wingfield, Janelle and Kaplan, Jared and Radom, Jarema and {Sohl-Dickstein}, Jascha and Phang, Jason and Wei, Jason and Yosinski, Jason and Novikova, Jekaterina and Bosscher, Jelle and Marsh, Jennifer and Kim, Jeremy and Taal, Jeroen and Engel, Jesse and Alabi, Jesujoba and Xu, Jiacheng and Song, Jiaming and Tang, Jillian and Waweru, Joan and Burden, John and Miller, John and Balis, John U. and Batchelder, Jonathan and Berant, Jonathan and Frohberg, J{\"o}rg and Rozen, Jos and {Hernandez-Orallo}, Jose and Boudeman, Joseph and Guerr, Joseph and Jones, Joseph and Tenenbaum, Joshua B. and Rule, Joshua S. and Chua, Joyce and Kanclerz, Kamil and Livescu, Karen and Krauth, Karl and Gopalakrishnan, Karthik and Ignatyeva, Katerina and Markert, Katja and Dhole, Kaustubh and Gimpel, Kevin and Omondi, Kevin and Mathewson, Kory Wallace and Chiafullo, Kristen and Shkaruta, Ksenia and Shridhar, Kumar and McDonell, Kyle and Richardson, Kyle and Reynolds, Laria and Gao, Leo and Zhang, Li and Dugan, Liam and Qin, Lianhui and {Contreras-Ochando}, Lidia and Morency, Louis-Philippe and Moschella, Luca and Lam, Lucas and Noble, Lucy and Schmidt, Ludwig and He, Luheng and {Oliveros-Col{\'o}n}, Luis and Metz, Luke and Senel, L{\"u}tfi Kerem and Bosma, Maarten and Sap, Maarten and Hoeve, Maartje Ter and Farooqi, Maheen and Faruqui, Manaal and Mazeika, Mantas and Baturan, Marco and Marelli, Marco and Maru, Marco and {Ramirez-Quintana}, Maria Jose and Tolkiehn, Marie and Giulianelli, Mario and Lewis, Martha and Potthast, Martin and Leavitt, Matthew L. and Hagen, Matthias and Schubert, M{\'a}ty{\'a}s and Baitemirova, Medina Orduna and Arnaud, Melody and McElrath, Melvin and Yee, Michael Andrew and Cohen, Michael and Gu, Michael and Ivanitskiy, Michael and Starritt, Michael and Strube, Michael and Sw{\k e}drowski, Micha{\l} and Bevilacqua, Michele and Yasunaga, Michihiro and Kale, Mihir and Cain, Mike and Xu, Mimee and Suzgun, Mirac and Walker, Mitch and Tiwari, Mo and Bansal, Mohit and Aminnaseri, Moin and Geva, Mor and Gheini, Mozhdeh and T, Mukund Varma and Peng, Nanyun and Chi, Nathan Andrew and Lee, Nayeon and Krakover, Neta Gur-Ari and Cameron, Nicholas and Roberts, Nicholas and Doiron, Nick and Martinez, Nicole and Nangia, Nikita and Deckers, Niklas and Muennighoff, Niklas and Keskar, Nitish Shirish and Iyer, Niveditha S. and Constant, Noah and Fiedel, Noah and Wen, Nuan and Zhang, Oliver and Agha, Omar and Elbaghdadi, Omar and Levy, Omer and Evans, Owain and Casares, Pablo Antonio Moreno and Doshi, Parth and Fung, Pascale and Liang, Paul Pu and Vicol, Paul and Alipoormolabashi, Pegah and Liao, Peiyuan and Liang, Percy and Chang, Peter W. and Eckersley, Peter and Htut, Phu Mon and Hwang, Pinyu and Mi{\l}kowski, Piotr and Patil, Piyush and Pezeshkpour, Pouya and Oli, Priti and Mei, Qiaozhu and Lyu, Qing and Chen, Qinlang and Banjade, Rabin and Rudolph, Rachel Etta and Gabriel, Raefer and Habacker, Rahel and Risco, Ramon and Milli{\`e}re, Rapha{\"e}l and Garg, Rhythm and Barnes, Richard and Saurous, Rif A. and Arakawa, Riku and Raymaekers, Robbe and Frank, Robert and Sikand, Rohan and Novak, Roman and Sitelew, Roman and Bras, Ronan Le and Liu, Rosanne and Jacobs, Rowan and Zhang, Rui and Salakhutdinov, Russ and Chi, Ryan Andrew and Lee, Seungjae Ryan and Stovall, Ryan and Teehan, Ryan and Yang, Rylan and Singh, Sahib and Mohammad, Saif M. and Anand, Sajant and Dillavou, Sam and Shleifer, Sam and Wiseman, Sam and Gruetter, Samuel and Bowman, Samuel R. and Schoenholz, Samuel Stern and Han, Sanghyun and Kwatra, Sanjeev and Rous, Sarah A. and Ghazarian, Sarik and Ghosh, Sayan and Casey, Sean and Bischoff, Sebastian and Gehrmann, Sebastian and Schuster, Sebastian and Sadeghi, Sepideh and Hamdan, Shadi and Zhou, Sharon and Srivastava, Shashank and Shi, Sherry and Singh, Shikhar and Asaadi, Shima and Gu, Shixiang Shane and Pachchigar, Shubh and Toshniwal, Shubham and Upadhyay, Shyam and Debnath, Shyamolima Shammie and Shakeri, Siamak and Thormeyer, Simon and Melzi, Simone and Reddy, Siva and Makini, Sneha Priscilla and Lee, Soo-Hwan and Torene, Spencer and Hatwar, Sriharsha and Dehaene, Stanislas and Divic, Stefan and Ermon, Stefano and Biderman, Stella and Lin, Stephanie and Prasad, Stephen and Piantadosi, Steven and Shieber, Stuart and Misherghi, Summer and Kiritchenko, Svetlana and Mishra, Swaroop and Linzen, Tal and Schuster, Tal and Li, Tao and Yu, Tao and Ali, Tariq and Hashimoto, Tatsunori and Wu, Te-Lin and Desbordes, Th{\'e}o and Rothschild, Theodore and Phan, Thomas and Wang, Tianle and Nkinyili, Tiberius and Schick, Timo and Kornev, Timofei and Tunduny, Titus and Gerstenberg, Tobias and Chang, Trenton and Neeraj, Trishala and Khot, Tushar and Shultz, Tyler and Shaham, Uri and Misra, Vedant and Demberg, Vera and Nyamai, Victoria and Raunak, Vikas and Ramasesh, Vinay Venkatesh and Prabhu, Vinay Uday and Padmakumar, Vishakh and Srikumar, Vivek and Fedus, William and Saunders, William and Zhang, William and Vossen, Wout and Ren, Xiang and Tong, Xiaoyu and Zhao, Xinran and Wu, Xinyi and Shen, Xudong and Yaghoobzadeh, Yadollah and Lakretz, Yair and Song, Yangqiu and Bahri, Yasaman and Choi, Yejin and Yang, Yichi and Hao, Yiding and Chen, Yifu and Belinkov, Yonatan and Hou, Yu and Hou, Yufang and Bai, Yuntao and Seid, Zachary and Zhao, Zhuoye and Wang, Zijian and Wang, Zijie J. and Wang, Zirui and Wu, Ziyi},
  year         = 2023,
  month        = jan,
  journal      = {Transactions on Machine Learning Research},
  doi          = {10.48550/arXiv.2206.04615},
  issn         = {2835-8856},
  urldate      = {2023-08-09},
  copyright    = {All rights reserved},
  abstract     = {Language models demonstrate both quantitative improvement and new qualitative capabilities with increasing scale. Despite their potentially transformative impact, these new capabilities are as yet poorly characterized. In order to inform future research, prepare for disruptive new model capabilities, and ameliorate socially harmful effects, it is vital that we understand the present and near-future capabilities and limitations of language models. To address this challenge, we introduce the Beyond the Imitation Game benchmark (BIG- bench). BIG-bench currently consists of 204 tasks, contributed by 450 authors across 132 institutions. Task topics are diverse, drawing problems from linguistics, childhood develop- ment, math, common-sense Reasoning, biology, physics, social bias, software development, and beyond. BIG-bench focuses on tasks that are believed to be beyond the capabilities of current language models. We evaluate the behavior of OpenAI's GPT models, Google- internal dense Transformer Architectures, and Switch-style sparse transformers on BIG-bench, across model sizes spanning millions to hundreds of billions of parameters. In addition, a team of human expert raters performed all tasks in order to provide a strong baseline. Findings include: model performance and calibration both improve with scale, but are poor in absolute terms (and when compared with rater performance); performance is remarkably similar across model classes, though with benefits from sparsity; tasks that improve gradually and predictably commonly involve a large knowledge or memorization component, whereas tasks that exhibit "breakthrough" behavior at a critical scale often involve multiple steps or components, or brittle metrics; social bias typically increases with scale in settings with ambiguous context, but this can be improved with prompting.},
  langid       = {english},
  keywords     = {Large Language Models,LLM Evaluation,Scaling & Emergent Abilities}
}
@article{timmermannNeuralCorrelatesDMT2019,
  title        = {Neural Correlates of the {{DMT}} Experience Assessed with Multivariate {{EEG}}},
  author       = {Timmermann, Christopher and Roseman, Leor and Schartner, Michael and Milliere, Raphael and Williams, Luke T. J. and Erritzoe, David and Muthukumaraswamy, Suresh and Ashton, Michael and Bendrioua, Adam and Kaur, Okdeep and Turton, Samuel and Nour, Matthew M. and Day, Camilla M. and Leech, Robert and Nutt, David J. and {Carhart-Harris}, Robin L.},
  year         = 2019,
  month        = nov,
  journal      = {Scientific Reports},
  volume       = 9,
  number       = 1,
  pages        = {1--13},
  doi          = {10.1038/s41598-019-51974-4},
  issn         = {2045-2322},
  urldate      = {2019-12-17},
  copyright    = {2019 The Author(s)},
  ids          = {timmermannNeuralCorrelatesDMTforthcoming},
  abstract     = {Studying transitions in and out of the altered state of Consciousness caused by intravenous (IV) N,N-Dimethyltryptamine (DMT - a fast-acting tryptamine psychedelic) offers a safe and powerful means of advancing knowledge on the neurobiology of conscious states. Here we sought to investigate the effects of IV DMT on the power spectrum and signal diversity of human brain activity (6 female, 7 male) recorded via multivariate EEG, and plot relationships between subjective experience, brain activity and drug plasma concentrations across time. Compared with placebo, DMT markedly reduced oscillatory power in the alpha and beta bands and robustly increased spontaneous signal diversity. Time-referenced and neurophenomenological analyses revealed close relationships between changes in various aspects of subjective experience and changes in brain activity. Importantly, the emergence of oscillatory activity within the delta and theta frequency bands was found to correlate with the peak of the experience - particularly its eyes-closed visual component. These findings highlight marked changes in oscillatory activity and signal diversity with DMT that parallel broad and specific components of the subjective experience, thus advancing our understanding of the neurobiological underpinnings of immersive states of Consciousness.},
  langid       = {english},
  keywords     = {Altered States,Consciousness}
}
@inproceedings{wuHowTransformersLearn2025,
  title        = {How {{Do Transformers Learn Variable Binding}} in {{Symbolic Programs}}?},
  author       = {Wu, Yiwei and Geiger, Atticus and Milli{\`e}re, Rapha{\"e}l},
  year         = 2025,
  month        = may,
  booktitle    = {Forty-Second {{International Conference}} on {{Machine Learning}}},
  urldate      = {2025-05-28},
  website      = {https://variablescope.org},
  abstract     = {Variable binding---the ability to associate variables with values---is fundamental to symbolic computation and cognition. Although classical architectures typically implement variable binding via addressable memory, it is not well understood how modern neural networks lacking built-in binding operations may acquire this capacity. We investigate this by training a Transformer to dereference queried variables in symbolic programs where variables are assigned either numerical constants or other variables. Each program requires following chains of variable assignments up to four steps deep to find the queried value, and also contains irrelevant chains of assignments acting as distractors. Our analysis reveals a developmental trajectory with three distinct phases during training: (1) random prediction of numerical constants, (2) a shallow heuristic prioritizing early variable assignments, and (3) the emergence of a systematic mechanism for dereferencing assignment chains. Using causal interventions, we find that the model learns to exploit the residual stream as an addressable memory space, with specialized attention heads routing information across token positions. This mechanism allows the model to dynamically track variable bindings across layers, resulting in accurate dereferencing. Our results show how Transformer models can learn to implement systematic variable binding without explicit architectural support, bridging connectionist and symbolic approaches.},
  langid       = {english},
  keywords     = {Compositionality,Large Language Models,Mechanistic Interpretability,Reasoning,Transformer Architecture}
}
@misc{yousefiDecodingInContextLearning2024,
  title        = {Decoding {{In-Context Learning}}: {{Neuroscience-inspired Analysis}} of {{Representations}} in {{Large Language Models}}},
  shorttitle   = {Decoding {{In-Context Learning}}},
  author       = {Yousefi, Safoora and Betthauser, Leo and Hasanbeig, Hosein and Milli{\`e}re, Rapha{\"e}l and Momennejad, Ida},
  year         = 2024,
  month        = feb,
  publisher    = {arXiv},
  number       = {arXiv:2310.00313},
  doi          = {10.48550/arXiv.2310.00313},
  urldate      = {2024-02-13},
  copyright    = {All rights reserved},
  eprint       = {2310.00313},
  primaryclass = {cs},
  abstract     = {Large language models (LLMs) exhibit remarkable performance improvement through In-Context Learning (ICL) by leveraging task-specific examples in the input. However, the mechanisms behind this improvement remain elusive. In this work, we investigate how LLM embeddings and attention representations change following in-context-learning, and how these changes mediate improvement in behavior. We employ neuroscience-inspired techniques such as representational similarity analysis (RSA) and propose novel methods for parameterized probing and measuring ratio of attention to relevant vs. irrelevant information in Llama-2 70B and Vicuna 13B. We designed two tasks with a priori relationships among their conditions: linear regression and reading comprehension. We formed hypotheses about expected similarities in task representations and measured hypothesis alignment of LLM representations before and after ICL as well as changes in attention. Our analyses revealed a meaningful correlation between improvements in behavior after ICL and changes in both embeddings and attention weights across LLM layers. This empirical framework empowers a nuanced understanding of how latent representations shape LLM behavior, offering valuable tools and insights for future research and practical applications.},
  archiveprefix = {arXiv},
  keywords     = {In-Context Learning,Large Language Models,Mechanistic Interpretability}
}
