add files

This commit is contained in:
wangziao 2025-10-11 20:13:32 -07:00
parent 4117ea7b02
commit 4c7bbe60be
5 changed files with 188 additions and 1 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*.aux
*.log
*.out
*.synctex.gz

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2025 admin
Copyright (c) 2025 Ziao Wang
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

27
sample.py Normal file
View File

@ -0,0 +1,27 @@
import numpy as np
N = 100000
d = 1000
Xs = np.random.normal(size=(N,d))
# We demonstrate that X0 is optimal. Find vector k.
X0 = Xs[0]
X0p = X0.clip(min=0)
L2_X0p = np.linalg.norm(X0p)
X0p_hat = X0p / L2_X0p
X0_score = np.dot(X0p_hat, X0) # is also L2_X0p
print("X0_score: %.4f" % X0_score)
# Find scores
scores = Xs @ X0p_hat
scores = np.partition(scores, -2)
print("1st score: %.4f, squared = %.4f ~ d/2 = %.2f" % (scores[-1], scores[-1]**2, d/2))
print("2nd score: %.4f, squared = %.4f < bd, bd ~ 2log(n) = %.4f" % (scores[-2], scores[-2]**2, 2*np.log(N)))
print("If we allow other candidates to remove their negative scores")
Xs = Xs.clip(min=0)
scores = Xs @ X0p_hat
scores = np.partition(scores, -2)
dpart = np.sqrt(d/2)/np.pi
npart = np.sqrt((1-1/np.pi)*np.log(N))
print("2nd score: %.4f ~ sqrt{d/2pi^2} + sqrt{(1-1/pi)ln(n)} = %.4f" % (scores[-2], dpart+npart))

Binary file not shown.

View File

@ -0,0 +1,156 @@
\documentclass{article}
\title{Probability of points being ``positive linear optimal'' in high dimensionality}
\author{Ziao Wang}
\date{}
\usepackage{indentfirst}
\usepackage{amsmath,amssymb}
\usepackage[margin=1in]{geometry}
\usepackage{hyperref}
\usepackage{graphicx}
\graphicspath{.}
\usepackage{listings}
% Optional: Global settings for Python listings
\lstset{
language=Python,
basicstyle=\ttfamily\small, % Monospaced font, small size
showstringspaces=false, % Don't show spaces in strings
frame=single, % Add a frame around the listing
breaklines=true % Allow line breaking
}
\begin{document}
\maketitle
\section*{1. Background}
\section*{Lemma 1}
If $X$ follows a d-dimensional standard Gaussian distribution, and $v$ is a fixed unit vector, then $X \cdot d \sim N(0,1)$.
Justification: the d-dimensional standard Gaussian distribution is symmetric on any direction, like a ball; or, use the property that any linear combination of a multivariate normal vector is itself a univariate normal random variable.
\section*{Lemma 2}
When $n >> 0$, the maximum value among n standard Gaussians is very unlikely to exceed $\sqrt{2\ln{n}}$.
Relying on Lemma 5.3 in \href{https://psdey.web.illinois.edu/math595fa19/lec05.pdf}{``Concentration for Maximum'' from a UCLA lecture by Partha S. Dey (scribe: Hsin-Po Wang)} to supports this claim. Copying Lemma 5.3 here:
Suppose $M_n$ is the maximum of n i.i.d. standard normals, then
$$ P[M_n \geq \sqrt{2\log{n}} + \frac{x}{\sqrt{2\log{n}}}] \leq e^{-x} $$
\section*{Lemma 3}
If x is standard Gaussian, its mean value given that $x>0$ is $\sqrt{\frac{2}{\pi}}$. That is, $x \sim N(0,1) \rightarrow E[x|x>0] = \sqrt{\frac{2}{\pi}}$.
Derivation: $E[x|x>0] = \sqrt{\frac{2}{\pi}} \int_{x=0}^\infty xe^{-x^2/2}dx = \sqrt{\frac{2}{\pi}} \int_{u=0}^\infty e^{-u} du = \sqrt{\frac{2}{\pi}}$
\section*{2. Problem}
Let$X_1,...,X_n$ be i.i.d. samples from a d-dimensional standard Gaussian. We say sample $X_i$ is ``positive linear optimal'' (PLO) if there exists a d-dimensional vector k whose elements are non-negative such that $X_i \cdot k > X_j \cdot k, \forall j\neq i$. Suppose $n >> d >> \ln{n}$. As $n\rightarrow \infty$, is every point almost always, for some probability, or almost never positive linear optimal?
\section*{2.1 Lower bound}
If $X_i$ is greatest in any dimension r, $X_i$ is definitely for PLO - just choose $k=e_r$. The probability of being greatest in any dimension is $\frac{1}{n}$, so the probability of being greatest in any dimension is $1-(1-\frac{1}{n})^d \sim \frac{d}{n} \rightarrow 0$, a lower bound too loose.
\section*{2.2 Upper bound}
If $X_i$ is PLO, then $X_i$ must be Pareto optimal: $\forall j$, we must not have $X_j > X_i$ on every dimension (i.e. $X_i$ is dominated by $X_j$). The probability of being PLO is upper bounded by being Pareto optimal. The latter can be estimated as follows: the probability of $X_i$ being dominated by $X_j$ is the product of every dimension, $\frac{1}{2^d}$. Assume that $X_j > X_i$ is independent from $x_m > X_i$. Then the probability of $X_i$ being Pareto optimal is about $1-\frac{n}{2^d} \rightarrow 1$, also too loose.
\section*{3. Construction}
Claim: Every sample is almost always PLO.
Construction: For sample $X_i$, we choose k to be $\frac{X_i^{+}}{||X_i^{+}||}$, where $X_i^{+}$ is the vector formed by taking the components of $X_i$ that are positive and setting the non-positive components to 0. We see that k is a unit vector.
\section*{3.1. The Candidate's score}
We know that every component of $X_i^+$ follows the ``clipped'' standard Gaussian, where the probability mass for negative values are clipped to 0 (which has 50\% chance of happening). Therefore the ``score'' $S$ (we use it to refer to the dot product of a sample with k) of $X_i$ is
\begin{align*}
S &= X_i \cdot k = X_i^{+} \cdot k = X_i^{+} \cdot \frac{X_i^+}{||X_i^+||} = ||X_i^+|| \\
S^2 &= X_i^+ \cdot X_i^+ = \Sigma_{r=1}^d (X_{i,r}^+)^2 \sim d/2
\end{align*}
(Use the law of large numbers here)
\section*{3.2 The Competitor's score}
$X_i$ must compete against the rest of the samples. For $X_j, j\neq i$, since $X_j$ is independent to $k$ and $k$ is a unit vector, we apply Lemma 1 and see $X_j \cdot k \sim N(0,1)$.
Among n-1 competitors, the strongest competitor's score $T$ must not exceed $\sqrt{2\ln{n}}$ a lot, according to Lemma 2. So $T^2 \sim 2\ln{n}$.
\section*{3.3 Comparison}
Since $d >> \ln{n}$ in the problem statement, we have $S^2 >> T^2$. Therefore every point is almost always PLO.
\section*{3.4 Variant: Allow competitors to exclude negative scores}
Consider what happens if competitors are allowed to exclude negative components from the dot product. Or equivalently, $X_1^+,...,X_n^+$ is sampled from a clipped d-dimensional standard Gaussian. The candidate's score wouldn't change; will the competitors' scores increase so much as to surpass the candidate? For each competitor j, its score $ T_j = X_j^+ \cdot k = \frac{1}{||X_i^+||} \Sigma_{r=0}^d X_{jr}^+ \cdot X_{ir}^+ $
Apply Lemma 3, we see that $E[X_{jr}^+] = \frac{1}{2}*0 + \frac{1}{2}E[x|x>0,x\sim N(0,1)] = \sqrt{\frac{1}{2\pi}}$. We also know that $E[(X_{jr}^+)^2] = \frac{1}{2}$, thus $Var[X_{jr}^+] = (\frac{1}{2}-\frac{1}{2\pi})$.
On the first order,
$$ E[||X_i^+||*T_j] = \sqrt{\frac{1}{2\pi}} \Sigma_{r=1}^d X_{ir}^+ := \sqrt{\frac{1}{2\pi}} s_1$$
For the variance,
$$ Var[||X_i^+||*T_j] = \Sigma_{r=1}^d (\frac{1}{2}-\frac{1}{2\pi}) * (X_{ir}^+)^2 := (\frac{1}{2}-\frac{1}{2\pi}) s_2$$
Since $T_j$ is the sum of many independent distributions, apply the law of large numbers on the component dimension (assuming $d >> 1$), $T_j$ is approximately a normal distribution:
$$ ||X_i^+||*T_j \sim N(\sqrt{\frac{1}{2\pi}} s_1,\ (\frac{1}{2}-\frac{1}{2\pi}) s_2 ) := N(\mu, \sigma^2) $$
Apply Lemma 2 here, among n competitors
$$ ||X_i^+||*T_{max} \sim \mu + \sqrt{2\ln{n}}*\sigma $$
Recall that $||X_i^+|| = \sqrt{s_2}$
$$ T_{max} \sim \sqrt{\frac{1}{2\pi}} \frac{s_1}{\sqrt{s_2}} + \sqrt{(1-\frac{1}{\pi})\ln{n}} $$
Considering that $s_1 \sim d\sqrt{\frac{1}{2\pi}}$, $s_2 \sim \frac{d}{2}$,
$$ T_{max} \sim \frac{\sqrt{d/2}}{\pi} + \sqrt{(1-\frac{1}{\pi})\ln{n}} $$
When $d >> \log{n}$, $T_{max} \sim \frac{1}{\pi} \sqrt{d/2}$ while $S \sim \sqrt{d/2}$. The strongest competitor is no longer order of magnitudes smaller than S (thanks to the exclusion of negative scores), but instead a constant fraction (specifically, $\frac{1}{\pi}$) of S. That said, the conclusion doesn't change - $X_i$ almost always wins.
\section*{4. Numerical Sanity Check}
\begin{lstlisting}
import numpy as np
N = 100000
d = 1000
Xs = np.random.normal(size=(N,d))
# Find the metric vector
X0 = Xs[0] # We demonstrate that X0 is optimal
X0p = X0.clip(min=0)
L2_X0p = np.linalg.norm(X0p)
X0p_hat = X0p / L2_X0p
X0_score = np.dot(X0p_hat, X0) # is also L2_X0p
print("X0_score: %.4f" % X0_score)
# Find scores
scores = Xs @ X0p_hat
scores = np.partition(scores, -2)
print("1st score: %.4f, squared = %.4f ~ d/2 = %.2f" % (scores[-1], scores[-1]**2, d/2))
print("2nd score: %.4f, squared = %.4f < bd, bd ~ 2log(n) = %.4f" % (scores[-2], scores[-2]**2, 2*np.log(N)))
print("If we allow other candidates to remove their negative scores")
Xs = Xs.clip(min=0)
scores = Xs @ X0p_hat
scores = np.partition(scores, -2)
dpart = np.sqrt(d/2)/np.pi
npart = np.sqrt((1-1/np.pi)*np.log(N))
print("2nd score: %.4f ~ sqrt{d/2pi^2} + sqrt{(1-1/pi)ln(n)} = %.4f" % (scores[-2], dpart+npart))
\end{lstlisting}
Example Result:
\begin{lstlisting}
X0_score: 21.2264
1st score: 21.2264, squared = 450.5582 ~ d/2 = 500.00
2nd score: 4.3707, squared = 19.1034 < bd, bd ~ 2log(n) = 23.0259
If we allow other candidates to remove their negative scores
2nd score: 9.6617 ~ sqrt{d/2pi^2} + sqrt{(1-1/pi)ln(n)} = 9.9191
\end{lstlisting}
\end{document}